💥💻💥 A data-parallel functional programming language http://futhark-lang.org / http://github.com/hiperfit/futhark
%load_ext futhark
%%futhark
magic cellgpu
option to compile and run with pyopencl%%futhark gpu
let conv1d(kernel: [#k]f32)(M: [#m]f32): []f32 =
map (\i -> reduce (+) 0f32 (map (*) kernel M[i:i+k]))
(iota (m-k+1))
entry f(x: f32): f32 = 6.28f32*x/360f32
let main(): []f32 = conv1d [1.5f32, 0f32, 1f32] (map f32 (iota 100))
Warning: Device limits tile size to 22 (setting was 32)
main
are available in the Notebook namespaceprint(f(12))
print(main())
0.209333 [ 2. 4.5 7. 9.5 12. 14.5 17. 19.5 22. 24.5 27. 29.5 32. 34.5 37. 39.5 42. 44.5 47. 49.5 52. 54.5 57. 59.5 62. 64.5 67. 69.5 72. 74.5 77. 79.5 82. 84.5 87. 89.5 92. 94.5 97. 99.5 102. 104.5 107. 109.5 112. 114.5 117. 119.5 122. 124.5 127. 129.5 132. 134.5 137. 139.5 142. 144.5 147. 149.5 152. 154.5 157. 159.5 162. 164.5 167. 169.5 172. 174.5 177. 179.5 182. 184.5 187. 189.5 192. 194.5 197. 199.5 202. 204.5 207. 209.5 212. 214.5 217. 219.5 222. 224.5 227. 229.5 232. 234.5 237. 239.5 242. 244.5]
%%futhark
entry main(): f32 =
42
In: main(): f32 =
Declaration of function main at tmpElpeLL.fut:2:7-2:7 declares return type f32, but body has type i32
If you find this error message confusing, uninformative, or wrong, please open an issue at https://github.com/HIPERFIT/futhark/issues.
View how Futhark's compiler internalized a program using the inspect
option. Defaults to cpu compilation pipeline, use the gpu
option to view gpu-optimized output.
%%futhark inspect
let conv1d(kernel: [#k]f32)(M: [#m]f32): []f32 =
map (\i -> reduce (+) 0f32 (map (*) kernel M[i:i+k]))
(iota (m-k+1))
entry f(x: f32): f32 = 6.28f32*x/360f32
let main(): []f32 = conv1d [1.5f32, 0f32, 1f32] (map f32 (iota 100))
entry [f32] f(f32 x_475) = let {f32 x_476} = fmul32(6.28f32, x_475) let {f32 res_477} = fdiv32(x_476, 360.0f32) in {res_477} entry [[?1]f32@2] main() = let {mem(12i64) mem_524} = alloc(12i64) -- arg_478 : [3i32]f32@mem_524->Direct(3i32) let {[3i32]f32 arg_478} = [1.5f32, 0.0f32, 1.0f32] let {mem(400i64) mem_527} = alloc(400i64) -- result_505 : [100i32]f32@mem_527->Direct(100i32) let {[100i32]f32 result_505} = scratch(f32, 100i32) -- arg_480 : [100i32]f32@mem_527->Direct(100i32) let {[100i32]f32 arg_480} = -- Consumes result_505 -- map_outarr_506 : *[100i32]f32@mem_527->Direct(100i32) loop {*[100i32]f32 map_outarr_506} = {result_505} for i_504:i32 < 100i32 do { let {f32 res_482} = sitofp i32 i_504 to f32 -- lw_dest_507 : [100i32]f32@mem_527->Direct(100i32) let {([100i32]f32 lw_dest_507 <- map_outarr_506)[i_504]} = -- Consumes map_outarr_506 res_482 in {lw_dest_507} } let {mem(392i64) mem_530} = alloc(392i64) -- result_515 : [98i32]f32@mem_530->Direct(98i32) let {[98i32]f32 result_515} = scratch(f32, 98i32) -- res_484 : [98i32]f32@mem_530->Direct(98i32) let {[98i32]f32 res_484} = -- Consumes result_515 -- map_outarr_516 : *[98i32]f32@mem_530->Direct(98i32) loop {*[98i32]f32 map_outarr_516} = {result_515} for i_514:i32 < 98i32 do { let {f32 res_491} = loop {f32 acc_511} = {0.0f32} for i_510:i32 < 3i32 do { let {f32 binop_param_x_496} = arg_478[i_510] let {i32 j_p_i_t_s_521} = add32(i_514, i_510) let {f32 binop_param_y_497} = arg_480[j_p_i_t_s_521] let {f32 res_498} = fmul32(binop_param_x_496, binop_param_y_497) let {f32 res_499} = fadd32(acc_511, res_498) in {res_499} } -- lw_dest_517 : [98i32]f32@mem_530->Direct(98i32) let {([98i32]f32 lw_dest_517 <- map_outarr_516)[i_514]} = -- Consumes map_outarr_516 res_491 in {lw_dest_517} } in {res_484}