C_code = """ #include double c_sum(size_t n, double *X) { double s = 0.0; for (size_t i = 0; i < n; ++i) { s += X[i]; } return s; } """ # compile to a shared library by piping C_code to gcc: # (only works if you have gcc installed) const Clib = tempname() using Libdl open(`gcc -fPIC -O3 -msse3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f print(f, C_code) end c_sum(X::Array{Float64}) = ccall(("c_sum", Clib), Float64, (Csize_t, Ptr{Float64}), length(X), X) # define a function to compute the relative (fractional) error |x-y| / mean(|x|,|y|) relerr(x,y) = abs(x - y) * 2 / (abs(x) + abs(y)) a = rand(10^7) # array of random numbers in [0,1) relerr(c_sum(a), sum(a)) using BenchmarkTools c_bench = @btime c_sum($a) using PyCall PyCall.pyversion # call a low-level PyCall function to get a Python list, because # by default PyCall will convert to a NumPy array instead (we benchmark NumPy below): apy_list = PyCall.array2py(a, 1, 1) # get the Python built-in "sum" function: pysum = pybuiltin("sum") relerr(pysum(apy_list), sum(a)) py_list_bench = @btime $pysum($apy_list) numpy_sum = pyimport("numpy")["sum"] apy_numpy = PyObject(a) # converts to a numpy array by default py_numpy_bench = @btime $numpy_sum($apy_numpy) py""" def mysum(a): s = 0.0 for x in a: s = s + x return s """ mysum_py = py"mysum" relerr(mysum_py(apy_list), sum(a)) @btime $mysum_py($apy_list) @btime $mysum_py($apy_numpy) j_bench = @btime sum($a) typeof(a) a_any = Vector{Any}(a) j_bench_any = @btime sum($a_any) function mysum1(A) s = zero(eltype(A)) # the correct type of zero for A for a in A s += a end return s end relerr(mysum1(a), sum(a)) j2_bench = @btime mysum1($a) function mysum(A) s = zero(eltype(A)) @simd for a in A s += a end return s end relerr(mysum(a), sum(a)) j3_bench = @btime mysum($a) z = rand(Complex{Float64}, length(a)); @btime mysum($z)