1.5e7 # a floating-point value 1.5 × 10⁷ x = 1/49 # division of two integers produces a floating-point value x * 49 1 - x * 49 2.0^-52, eps(), eps(1.0), eps(Float64) # these are all the same thing setprecision(BigFloat, 256) big(1)/big(49) 49 * (big(1)/big(49)) - 1 3//2 typeof(3//2) dump(3//2) # dump lets us see how the underlying data of Rational is stored, as 2 integers # 1.5 is exactly represented in binary floating point: big(1.5) == 3//2, 1.5 == 3//2 1/49 == 1//49 # 0.1 is *not* exactly represented big(0.1), 0.1 == 1//10 big"0.1" # 1e10 in 𝔽 for Float64 (about 15 decimal digits) big(1e10) # 1e100 is also *not* exactly represented in Float64 precision, # since it not a "small" (≈15 digit) integer times a power of two, # but *is* exactly represented in 256-bit BigFloat: big(1e100) # rounds 1e100 to Float64 then extends to BigFloat big"1e100" # exact in 256-bit BigFLoat 1.0 + 1.0 == 2.0 1e-100 setprecision(4096) do # even 256 digits isn't enough: temporarily increase BigFloat precision big(1.0e-100) end (1.0 + -1.0) + 1e-100 1.0 + (-1.0 + 1e-100) big"1.0" + (big"-1.0" + big"1e-100") 1e300 # okay: 10³⁰⁰ is in the representable range (1e300)^2 # overflows -Inf 1 / Inf floatmax(Float64), floatmax(Float32) 1e-300 # okay (1e-300)^2 # underflows to +0 floatmin(Float64), floatmin(Float32) -1e-300 * 1e-300 # underflows to -0 +0.0 == -0.0 1 / +0.0, 1 / -0.0 signbit(+0.0), signbit(-0.0) 1 / (1 / -Inf) 0 * Inf, Inf / Inf, 0 / 0, 0 * NaN NaN == NaN isinf(2.5), isinf(Inf), isinf(-Inf), isinf(NaN) isnan(2.5), isnan(Inf), isnan(-Inf), isnan(NaN) isfinite(2.5), isfinite(Inf), isfinite(-Inf), isfinite(NaN) sqrt(-1.0) sqrt(-1.0 + 0im) x = 2.0^-60 @show x @show exp(x) @show exp(x) - 1 # naive algorithm: catastrophic cancellation # naive algorithm computed in BigFloat precision and rounded back to Float64: Float64(exp(big(x)) - 1) x + x^2/2 + x^3/6 # 3 terms is more than enough for x ≈ 8.7e-19 x # in fact, just one term is enough expm1(x) function my_cumsum(x) y = similar(x) # allocate an array of the same type and size as x y[1] = x[1] for i = 2:length(x) y[i] = y[i-1] + x[i] end return y end eps(Float32), eps(Float64) x = rand(Float32, 10^7) # 10^7 single-precision values uniform in [0,1) @time y = my_cumsum(x) yexact = my_cumsum(Float64.(x)) # same thing in double precision err = abs.(y .- yexact) ./ abs.(yexact) # relative error in y using PyPlot n = 1:10:length(err) # downsample by 10 for plotting speed loglog(n, err[n]) ylabel("relative error") xlabel("# summands") # plot a √n line for comparison loglog([1,length(err)], sqrt.([1,length(err)]) * 1e-7, "k--") text(1e3,1e-5, L"\sim \sqrt{n}") title("naive cumsum implementation") @time y2 = cumsum(x) err2 = abs.(y2 .- yexact) ./ abs.(yexact) loglog(n, err2[n]) ylabel("relative error") xlabel("# summands") title("built-in cumsum function") loglog(n, sqrt.(log.(n)) * 1e-7, "k--") text(1e2,3.3e-7, L"\sim \sqrt{\log n}") rounding(Float32) ] add SetRounding using SetRounding errup = setrounding(Float32, RoundUp) do # error in single-precision (Float32) sum, rounding temporarily set to RoundUp abs.(my_cumsum(x) .- yexact) ./ abs.(yexact) # relative error in y end loglog(n, errup[n]) ylabel("relative error") xlabel("# summands") # plot an O(n) line for comparison loglog([1,length(errup)], [1,length(errup)] * 1e-7, "k--") text(1e3,4e-4, L"\sim n") title("naive cumsum implementation, rounded up")