using BenchmarkTools
dispdiff(fn) = display("text/markdown", "```diff\n"*read(fn, String)*"\n```")
dispdiff (generic function with 1 method)
"""https://qiita.com//ishigaki/items/1168b5c5adb099535d1c のコード"""
module A
function main()
##パラメータ
##Fの大きさで挙動が変わる
F = 8
N = 40
dt = 0.01
tend = 146
nstep = Int64(tend/dt)
xn, t = progress(dt, F, N, nstep)
end
# dx/dt=f(x)
# Lorentz96の方程式
function f(x, F, N)
g = fill(0.0, N)
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
# 周期境界
g[1] = (x[2]-x[N-1])x[N] - x[1] + F
g[2] = (x[3]-x[N])x[1] - x[2] + F
g[N] = (x[1]-x[N-2])x[N-1] - x[N] + F
return g
end
# L96をRK4で解く
function RK4(xold, dt, F, N)
k1 = f(xold, F, N)
k2 = f(xold + k1*dt/2., F, N)
k3 = f(xold + k2*dt/2., F, N)
k4 = f(xold + k3*dt, F, N)
xnew = xold + dt/6.0*(k1 + 2.0k2 + 2.0k3 + k4)
end
# 真値を全ステップ計算
function progress(deltat, F, N, nstep)
xn = zeros(Float64, N, nstep)
t = zeros(Float64, nstep)
#X = fill(F,N)+randn(N)
X = fill(Float64(F), N)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
X = RK4(X, deltat, F, N)
for i=1:N
xn[i, j] = X[i]
end
t[j] = deltat*j
end
return xn, t
end
end
Main.A
"""https://qiita.com//ishigaki/items/1168b5c5adb099535d1c のコードの改良版1"""
module B
function main()
##パラメータ
##Fの大きさで挙動が変わる
F = 8
N = 40
dt = 0.01
tend = 146
nstep = Int64(tend/dt)
xn, t = progress(dt, F, N, nstep)
end
# dx/dt=f(x)
# Lorentz96の方程式
function f!(g, x, F, N)
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
# 周期境界
g[1] = (x[2]-x[N-1])x[N] - x[1] + F
g[2] = (x[3]-x[N])x[1] - x[2] + F
g[N] = (x[1]-x[N-2])x[N-1] - x[N] + F
return g
end
# L96をRK4で解く
function RK4!(x, xtmp, g, k1, k2, k3, k4, dt, F, N)
f!(k1, x, F, N)
@. xtmp = x + k1*dt/2
f!(k2, xtmp, F, N)
@. xtmp = x + k2*dt/2
f!(k3, xtmp, F, N)
@. xtmp = x + k3*dt
f!(k4, xtmp, F, N)
@. x = x + dt/6*(k1 + 2k2 + 2k3 + k4)
end
# 真値を全ステップ計算
function progress(deltat, F, N, nstep)
xn = zeros(Float64, N, nstep)
t = zeros(Float64, nstep)
#X = fill(F,N)+randn(N)
X = fill(Float64(F), N)
xtmp = similar(X)
g = similar(X)
k1 = similar(X)
k2 = similar(X)
k3 = similar(X)
k4 = similar(X)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
RK4!(X, xtmp, g, k1, k2, k3, k4, deltat, F, N)
for i=1:N
xn[i, j] = X[i]
end
t[j] = deltat*j
end
return xn, t
end
end
dispdiff("Lorenz96 Part 2/Lorenz96-A-to-B.diff")
--- Lorenz96-A.jl 2020-10-06 10:52:57.876232100 +0900
+++ Lorenz96-B.jl 2020-10-06 10:58:50.977413200 +0900
@@ -13,8 +13,7 @@
# dx/dt=f(x)
# Lorentz96の方程式
-function f(x, F, N)
- g = fill(0.0, N)
+function f!(g, x, F, N)
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
@@ -28,13 +27,16 @@
end
# L96をRK4で解く
-function RK4(xold, dt, F, N)
- k1 = f(xold, F, N)
- k2 = f(xold + k1*dt/2., F, N)
- k3 = f(xold + k2*dt/2., F, N)
- k4 = f(xold + k3*dt, F, N)
+function RK4!(x, xtmp, g, k1, k2, k3, k4, dt, F, N)
+ f!(k1, x, F, N)
+ @. xtmp = x + k1*dt/2
+ f!(k2, xtmp, F, N)
+ @. xtmp = x + k2*dt/2
+ f!(k3, xtmp, F, N)
+ @. xtmp = x + k3*dt
+ f!(k4, xtmp, F, N)
- xnew = xold + dt/6.0*(k1 + 2.0k2 + 2.0k3 + k4)
+ @. x = x + dt/6*(k1 + 2k2 + 2k3 + k4)
end
# 真値を全ステップ計算
@@ -44,11 +46,17 @@
#X = fill(F,N)+randn(N)
X = fill(Float64(F), N)
+ xtmp = similar(X)
+ g = similar(X)
+ k1 = similar(X)
+ k2 = similar(X)
+ k3 = similar(X)
+ k4 = similar(X)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
- X = RK4(X, deltat, F, N)
+ RK4!(X, xtmp, g, k1, k2, k3, k4, deltat, F, N)
for i=1:N
xn[i, j] = X[i]
end
"""https://qiita.com//ishigaki/items/1168b5c5adb099535d1c のコードの改良版2"""
module C
function main()
##パラメータ
##Fの大きさで挙動が変わる
F = 8
N = 40
dt = 0.01
tend = 146
nstep = Int64(tend/dt)
xn, t = progress(dt, F, N, nstep)
end
# dx/dt=f(x)
# Lorentz96の方程式
function f!(g, x, F, N)
@inbounds begin
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
# 周期境界
g[1] = (x[2]-x[N-1])x[N] - x[1] + F
g[2] = (x[3]-x[N])x[1] - x[2] + F
g[N] = (x[1]-x[N-2])x[N-1] - x[N] + F
end
return g
end
# L96をRK4で解く
function RK4!(x, xtmp, g, k1, k2, k3, k4, dt, F, N)
f!(k1, x, F, N)
@. xtmp = x + k1*dt/2
f!(k2, xtmp, F, N)
@. xtmp = x + k2*dt/2
f!(k3, xtmp, F, N)
@. xtmp = x + k3*dt
f!(k4, xtmp, F, N)
@. x = x + dt/6*(k1 + 2k2 + 2k3 + k4)
end
# 真値を全ステップ計算
function progress(deltat, F, N, nstep)
xn = zeros(Float64, N, nstep)
t = zeros(Float64, nstep)
#X = fill(F,N)+randn(N)
X = fill(Float64(F), N)
xtmp = similar(X)
g = similar(X)
k1 = similar(X)
k2 = similar(X)
k3 = similar(X)
k4 = similar(X)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
RK4!(X, xtmp, g, k1, k2, k3, k4, deltat, F, N)
@inbounds for i=1:N
xn[i, j] = X[i]
end
t[j] = deltat*j
end
return xn, t
end
end
dispdiff("Lorenz96 Part 2/Lorenz96-B-to-C.diff")
--- Lorenz96-B.jl 2020-10-06 10:58:50.977413200 +0900
+++ Lorenz96-C.jl 2020-10-06 18:33:57.652248100 +0900
@@ -14,6 +14,7 @@
# dx/dt=f(x)
# Lorentz96の方程式
function f!(g, x, F, N)
+ @inbounds begin
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
@@ -22,6 +23,7 @@
g[1] = (x[2]-x[N-1])x[N] - x[1] + F
g[2] = (x[3]-x[N])x[1] - x[2] + F
g[N] = (x[1]-x[N-2])x[N-1] - x[N] + F
+ end
return g
end
@@ -57,7 +59,7 @@
for j=1:nstep
RK4!(X, xtmp, g, k1, k2, k3, k4, deltat, F, N)
- for i=1:N
+ @inbounds for i=1:N
xn[i, j] = X[i]
end
t[j] = deltat*j
"""https://qiita.com//ishigaki/items/1168b5c5adb099535d1c のコードの改良版4"""
module D
using Parameters
function main()
xn, t = progress(Param())
end
@with_kw struct Param{TF<:AbstractFloat, TN, Tnstep}
##パラメータ
##Fの大きさで挙動が変わる
F::TF = 8.0
N::TN = 40
dt::TF = 0.01
tend::TF = 146.0
nstep::Tnstep = round(Int, tend/dt)
end
# dx/dt=f(x)
# Lorentz96の方程式
function f!(param::Param, g, x)
@unpack N, F = param
@inbounds begin
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
end
# 周期境界
g[1] = (x[2]-x[N-1])x[N] - x[1] + F
g[2] = (x[3]-x[N])x[1] - x[2] + F
g[N] = (x[1]-x[N-2])x[N-1] - x[N] + F
end
return g
end
# L96をRK4で解く
struct RK4Func{T, P<:Param} param::P; xtmp::T; k1::T; k2::T; k3::T; k4::T end
function RK4Func(param::Param{TF}) where TF
@unpack N = param
xtmp = Vector{TF}(undef, N)
k1, k2, k3, k4 = similar(xtmp), similar(xtmp), similar(xtmp), similar(xtmp)
RK4Func(param, xtmp, k1, k2, k3, k4)
end
function (RK4!::RK4Func)(x)
@unpack param, xtmp, k1, k2, k3, k4 = RK4!
@unpack dt = param
f!(param, k1, x)
@. xtmp = x + k1*dt/2
f!(param, k2, xtmp)
@. xtmp = x + k2*dt/2
f!(param, k3, xtmp)
@. xtmp = x + k3*dt
f!(param, k4, xtmp)
@. x = x + dt/6*(k1 + 2k2 + 2k3 + k4)
end
# 真値を全ステップ計算
function progress(param::Param{TF}) where TF
@unpack dt, F, N, nstep = param
RK4! = RK4Func(param)
xn = zeros(TF, N, nstep)
t = zeros(TF, nstep)
#X = fill(F,N)+randn(N)
X = fill(F, N)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
RK4!(X)
@inbounds for i=1:N
xn[i, j] = X[i]
end
t[j] = dt*j
end
return xn, t
end
end
dispdiff("Lorenz96 Part 2/Lorenz96-C-to-D.diff")
--- Lorenz96-C.jl 2020-10-06 18:33:57.652248100 +0900
+++ Lorenz96-D.jl 2020-10-06 18:51:02.247214800 +0900
@@ -1,19 +1,22 @@
+using Parameters
+
function main()
+ xn, t = progress(Param())
+end
+@with_kw struct Param{TF<:AbstractFloat, TN, Tnstep}
##パラメータ
##Fの大きさで挙動が変わる
- F = 8
- N = 40
- dt = 0.01
- tend = 146
- nstep = Int64(tend/dt)
-
- xn, t = progress(dt, F, N, nstep)
-
+ F::TF = 8.0
+ N::TN = 40
+ dt::TF = 0.01
+ tend::TF = 146.0
+ nstep::Tnstep = round(Int, tend/dt)
end
# dx/dt=f(x)
# Lorentz96の方程式
-function f!(g, x, F, N)
+function f!(param::Param, g, x)
+ @unpack N, F = param
@inbounds begin
for i=3:N-1
g[i] = (x[i+1]-x[i-2])x[i-1] - x[i] + F
@@ -29,40 +32,47 @@
end
# L96をRK4で解く
-function RK4!(x, xtmp, g, k1, k2, k3, k4, dt, F, N)
- f!(k1, x, F, N)
+struct RK4Func{T, P<:Param} param::P; xtmp::T; k1::T; k2::T; k3::T; k4::T end
+function RK4Func(param::Param{TF}) where TF
+ @unpack N = param
+ xtmp = Vector{TF}(undef, N)
+ k1, k2, k3, k4 = similar(xtmp), similar(xtmp), similar(xtmp), similar(xtmp)
+ RK4Func(param, xtmp, k1, k2, k3, k4)
+end
+function (RK4!::RK4Func)(x)
+ @unpack param, xtmp, k1, k2, k3, k4 = RK4!
+ @unpack dt = param
+
+ f!(param, k1, x)
@. xtmp = x + k1*dt/2
- f!(k2, xtmp, F, N)
+ f!(param, k2, xtmp)
@. xtmp = x + k2*dt/2
- f!(k3, xtmp, F, N)
+ f!(param, k3, xtmp)
@. xtmp = x + k3*dt
- f!(k4, xtmp, F, N)
+ f!(param, k4, xtmp)
@. x = x + dt/6*(k1 + 2k2 + 2k3 + k4)
end
+
# 真値を全ステップ計算
-function progress(deltat, F, N, nstep)
- xn = zeros(Float64, N, nstep)
- t = zeros(Float64, nstep)
+function progress(param::Param{TF}) where TF
+ @unpack dt, F, N, nstep = param
+ RK4! = RK4Func(param)
+ xn = zeros(TF, N, nstep)
+ t = zeros(TF, nstep)
#X = fill(F,N)+randn(N)
- X = fill(Float64(F), N)
- xtmp = similar(X)
- g = similar(X)
- k1 = similar(X)
- k2 = similar(X)
- k3 = similar(X)
- k4 = similar(X)
+ X = fill(F, N)
# 初期擾乱はj=20の点にFの値の0.1%の値を与える。
X[20] = 1.001*F
for j=1:nstep
- RK4!(X, xtmp, g, k1, k2, k3, k4, deltat, F, N)
+ RK4!(X)
@inbounds for i=1:N
xn[i, j] = X[i]
end
- t[j] = deltat*j
+ t[j] = dt*j
end
return xn, t
A.main() == B.main() == C.main() == D.main() # 結果は全部同じ
true
@btime A.main()
@btime B.main()
@btime C.main()
@btime D.main()
;
23.488 ms (248205 allocations: 99.25 MiB) 10.406 ms (11 allocations: 4.57 MiB) 5.923 ms (11 allocations: 4.57 MiB) 5.886 ms (10 allocations: 4.57 MiB)
module A に引用したのはオリジナルのコードである.
module B では, 公式ドキュメントの Performance Tips の Pre-allocating outputs の節に従うことによって, メモリアロケーションを大幅に減らしている.
そのおかげでオリジナルの module A よりも改良版の module B では 2.2倍以上高速化 されている.
さらに, module C では Performance Annotations にしたがって @inbounds
を追加した.
そのおかげでオリジナルの module A よりも改良版の module C では 4倍程度高速化 されている.
しかし, 作業用配列変数が函数に引数としてすべて渡されているので煩雑になっている. その点は
の In[10] のコードと本質的に同じ module D では改善されている.