We just introduce some basic and usual tricks to get performance.
x = [2, 3, 4, 5, 6, 2, 3, 4, 5]
y = [32, 32, 5, 42, 6, 17, 19, 20, 24]
9-element Array{Int64,1}: 32 32 5 42 6 17 19 20 24
n = length(x)
x̄ = sum(x) / n
ȳ = sum(y) / n
21.88888888888889
x̂ = x .- x̄
ŷ = y .- ȳ
9-element Array{Float64,1}: 10.11111111111111 10.11111111111111 -16.88888888888889 20.11111111111111 -15.88888888888889 -4.888888888888889 -2.8888888888888893 -1.8888888888888893 2.1111111111111107
ρ = sum(x̂ .* ŷ) / (sqrt(sum(x̂.^2))*sqrt(sum(ŷ.^2)))
-0.20034374130204088
function correlation()
n = length(x)
@assert length(y) == n "Not matched sample size"
x̄ = sum(x) / n
ȳ = sum(y) / n
x̂ = x .- x̄
ŷ = y .- ȳ
ρ = sum(x̂ .* ŷ) / (sqrt(sum(x̂.^2))*sqrt(sum(ŷ.^2)))
return ρ
end
correlation (generic function with 1 method)
using BenchmarkTools
correlation()
-0.20034374130204088
@btime correlation()
3.127 μs (28 allocations: 1.19 KiB)
-0.20034374130204088
@benchmark correlation()
BenchmarkTools.Trial: memory estimate: 1.19 KiB allocs estimate: 28 -------------- minimum time: 3.155 μs (0.00% GC) median time: 3.286 μs (0.00% GC) mean time: 4.314 μs (18.39% GC) maximum time: 6.021 ms (99.88% GC) -------------- samples: 10000 evals/sample: 8
function correlation(x, y)
n = length(x)
@assert length(y) == n "Not matched sample size"
x̄ = sum(x) / n
ȳ = sum(y) / n
x̂ = x .- x̄
ŷ = y .- ȳ
ρ = sum(x̂ .* ŷ) / (sqrt(sum(x̂.^2))*sqrt(sum(ŷ.^2)))
return ρ
end
correlation (generic function with 2 methods)
correlation(x, y)
-0.20034374130204088
@btime correlation(x, y)
244.085 ns (6 allocations: 816 bytes)
-0.20034374130204088
@benchmark correlation(x, y)
BenchmarkTools.Trial: memory estimate: 816 bytes allocs estimate: 6 -------------- minimum time: 242.258 ns (0.00% GC) median time: 258.422 ns (0.00% GC) mean time: 309.224 ns (12.24% GC) maximum time: 113.919 μs (99.70% GC) -------------- samples: 10000 evals/sample: 434
x[2:8]
7-element Array{Int64,1}: 3 4 5 6 2 3 4
@btime x[2:8]
56.149 ns (1 allocation: 144 bytes)
7-element Array{Int64,1}: 3 4 5 6 2 3 4
x_ = 5x .+ 1
9-element Array{Int64,1}: 11 16 21 26 31 11 16 21 26
@btime x_ = 5x .+ 1
382.456 ns (4 allocations: 368 bytes)
9-element Array{Int64,1}: 11 16 21 26 31 11 16 21 26
@btime x[2:8];
54.950 ns (1 allocation: 144 bytes)
@btime view(x, 2:8);
26.640 ns (1 allocation: 48 bytes)
largeX = rand(100_000)
@btime largeX[10:10_010]; # get 10000 elements
5.209 μs (2 allocations: 78.27 KiB)
@btime view(largeX, 10:10_010);
25.960 ns (1 allocation: 48 bytes)
X1 = [[1, 2, 3], [4, 5]]
2-element Array{Array{Int64,1},1}: [1, 2, 3] [4, 5]
X2 = copy(X1) # shallow copy
2-element Array{Array{Int64,1},1}: [1, 2, 3] [4, 5]
X2[1] === X1[1]
true
X3 = deepcopy(X1)
2-element Array{Array{Int64,1},1}: [1, 2, 3] [4, 5]
X3[1] === X1[1]
false
@btime X2 = copy(X1);
44.176 ns (1 allocation: 96 bytes)
@btime X3 = deepcopy(X1);
474.327 ns (5 allocations: 672 bytes)
function correlation(x, y)
n = length(x)
@assert length(y) == n "Not matched sample size"
x̄ = sum(x) / n
ȳ = sum(y) / n
x̂ŷ = (x .- x̄).*(y .- ȳ)
x̂ = (x .- x̄).^2
ŷ = (y .- ȳ).^2
ρ = sum(x̂ŷ) / (sqrt(sum(x̂))*sqrt(sum(ŷ)))
return ρ
end
correlation (generic function with 2 methods)
correlation(x, y)
@benchmark correlation(x, y)
BenchmarkTools.Trial: memory estimate: 496 bytes allocs estimate: 4 -------------- minimum time: 178.035 ns (0.00% GC) median time: 184.145 ns (0.00% GC) mean time: 225.122 ns (11.98% GC) maximum time: 77.747 μs (99.59% GC) -------------- samples: 10000 evals/sample: 690
function prealloc(X)
Y = similar(X, 10, 10)
# Y = Array{SOMETYPE}(undef, 10, 10)
# Y = fill(π, (2, 3, 4))
# do something
Y
end
prealloc (generic function with 1 method)
function row_major(X)
s = 0.
for i = 1:size(X, 1)
for j = 1:size(X, 2)
s += X[i, j]
end
end
s
end
row_major (generic function with 1 method)
X = rand(10_000, 10_000)
row_major(X)
@btime row_major(X)
1.822 s (1 allocation: 16 bytes)
4.9999165264804e7
function col_major(X)
s = 0.
for j = 1:size(X, 2)
for i = 1:size(X, 1)
s += X[i, j]
end
end
s
end
col_major (generic function with 1 method)
col_major(X)
@btime col_major(X)
99.203 ms (1 allocation: 16 bytes)
4.999916526480132e7
function type_instable(x)
if x > 1
return 5
else
return 5.0
end
end
type_instable (generic function with 1 method)
@code_warntype type_instable(4)
Variables #self#::Core.Compiler.Const(type_instable, false) x::Int64 Body::Union{Float64, Int64} 1 ─ %1 = (x > 1)::Bool └── goto #3 if not %1 2 ─ return 5 3 ─ return 5.0
ret1 = function1(args...)
ret2 = function2(ret1, ...)
ret3 = function3(ret1, ret2, ...)
Any
or Union{...}
¶ret1 = function1(args...) # ret1 isa Union{A,B}
if ret1 isa A
ret2 = function2_specialized_for_A(ret1, ...)
ret3 = function3_specialized_for_A(ret1, ret2, ...)
...
else
ret2 = function2_specialized_for_B(ret1, ...)
ret3 = function3_specialized_for_B(ret1, ret2, ...)
...
end
Ref: Union splitting
@code_warntype correlation(x, y)
Variables #self#::Core.Compiler.Const(correlation, false) x::Array{Int64,1} y::Array{Int64,1} n::Int64 x̄::Float64 ȳ::Float64 x̂ŷ::Array{Float64,1} x̂::Array{Float64,1} ŷ::Array{Float64,1} ρ::Float64 Body::Float64 1 ─ Core.NewvarNode(:(x̄)) │ Core.NewvarNode(:(ȳ)) │ Core.NewvarNode(:(x̂ŷ)) │ Core.NewvarNode(:(x̂)) │ Core.NewvarNode(:(ŷ)) │ Core.NewvarNode(:(ρ)) │ (n = Main.length(x)) │ %8 = Main.length(y)::Int64 │ %9 = (%8 == n)::Bool └── goto #3 if not %9 2 ─ goto #4 3 ─ %12 = Base.AssertionError("Not matched sample size")::AssertionError └── Base.throw(%12) 4 ┄ %14 = Main.sum(x)::Int64 │ (x̄ = %14 / n) │ %16 = Main.sum(y)::Int64 │ (ȳ = %16 / n) │ %18 = Base.broadcasted(Main.:-, x, x̄)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}} │ %19 = Base.broadcasted(Main.:-, y, ȳ)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}} │ %20 = Base.broadcasted(Main.:*, %18, %19)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(*),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}},Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}}}} │ (x̂ŷ = Base.materialize(%20)) │ %22 = Base.broadcasted(Main.:-, x, x̄)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}} │ %23 = Core.apply_type(Base.Val, 2)::Core.Compiler.Const(Val{2}, false) │ %24 = (%23)()::Core.Compiler.Const(Val{2}(), false) │ %25 = Base.broadcasted(Base.literal_pow, Main.:^, %22, %24)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(Base.literal_pow),Tuple{Base.RefValue{typeof(^)},Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}},Base.RefValue{Val{2}}}} │ (x̂ = Base.materialize(%25)) │ %27 = Base.broadcasted(Main.:-, y, ȳ)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}} │ %28 = Core.apply_type(Base.Val, 2)::Core.Compiler.Const(Val{2}, false) │ %29 = (%28)()::Core.Compiler.Const(Val{2}(), false) │ %30 = Base.broadcasted(Base.literal_pow, Main.:^, %27, %29)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(Base.literal_pow),Tuple{Base.RefValue{typeof(^)},Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(-),Tuple{Array{Int64,1},Float64}},Base.RefValue{Val{2}}}} │ (ŷ = Base.materialize(%30)) │ %32 = Main.sum(x̂ŷ)::Float64 │ %33 = Main.sum(x̂)::Float64 │ %34 = Main.sqrt(%33)::Float64 │ %35 = Main.sum(ŷ)::Float64 │ %36 = Main.sqrt(%35)::Float64 │ %37 = (%34 * %36)::Float64 │ (ρ = %32 / %37) └── return ρ
There are some convenient functions to get type information.
X = rand(3, 4)
eltype(X)
Float64
From parametric methods...
function foo(A::Array{T}) where {T}
return zero(T)
end
foo (generic function with 1 method)
foo(rand(3,4))
0.0
There are some ways to generate objects with known type information.
zero(Float64)
0.0
one(Int64)
1
zero(Bool)
false
@btime col_major(X)
33.016 ns (1 allocation: 16 bytes)
6.069340471885451
function col_major2(X)
s = 0.
for j = 1:size(X, 2)
@inbounds for i = 1:size(X, 1)
s += X[i, j]
end
end
s
end
col_major2 (generic function with 1 method)
col_major2(X)
@btime col_major2(X)
27.140 ns (1 allocation: 16 bytes)
6.069340471885451
function matrix_multiply(X, Y) # not "this" generic
n = size(X, 2)
@assert size(Y, 1) == n "Dimension dismatched!"
Z = similar(X, size(X, 1), size(Y, 2))
for i = 1:size(Z, 1), j = 1:size(Z, 2)
Z[i, j] = X[i, :] * Y[:, j]
end
return Z
end
matrix_multiply (generic function with 1 method)
function matrix_multiply(X::Array, Y::Array) # good? generic?
n = size(X, 2)
@assert size(Y, 1) == n "Dimension dismatched!"
Z = similar(X, size(X, 1), size(Y, 2))
for i = 1:size(Z, 1), j = 1:size(Z, 2)
Z[i, j] = X[i, :] * Y[:, j]
end
return Z
end
matrix_multiply (generic function with 2 methods)
function matrix_multiply(X::AbstractArray{T,2}, Y::AbstractArray{T,2}) where {T} # generic
n = size(X, 2)
@assert size(Y, 1) == n "Dimension dismatched!"
Z = similar(X, size(X, 1), size(Y, 2))
for i = 1:size(Z, 1), j = 1:size(Z, 2)
Z[i, j] = X[i, :] * Y[:, j]
end
return Z
end
matrix_multiply (generic function with 3 methods)
struct Point
x
y
end
struct Point
x::Number
y::Number
end
x = Point(2, 3.4)
Point(2, 3.4)
typeof(x)
Point
typeof(x).uid
50365
y = Point(2., 3.4)
Point(2.0, 3.4)
typeof(x).uid
50365
struct Point{T<:Number}
x::T
y::T
end
x = Point(2, 3)
Point{Int64}(2, 3)
y = Point(2., 3.)
Point{Float64}(2.0, 3.0)
typeof(x)
Point{Int64}
typeof(x).uid
47142
typeof(y)
Point{Float64}
typeof(y).uid
47289