From 18250e716ff7651d846a946f3a9221a550e04b11 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Thu, 21 Apr 2022 01:09:10 -0400 Subject: [PATCH] Ah, the remaining calls were to =. Added 'inlining' the = and comp_helper loop into repeated calls to comp_helper_helper, eliminating the param array overhead. Now fib only allocates 10 times (instead of 4 million), and runs in .107s, finally beating Python handilly and becoming about 2x as slow as Chez. Feels like a decent spot for now, and that was most all of the low hanging fruit. The only thing left now is inlining of user functions to get fib_let performing as well - it looks glacial now at .4s because of the 2 remaining closure calls that the let expands to --- partial_eval.scm | 40 ++++++++++++++++++++++++++++++++++++---- table.md | 20 ++++++++++---------- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/partial_eval.scm b/partial_eval.scm index 442b540..0e1dddd 100644 --- a/partial_eval.scm +++ b/partial_eval.scm @@ -4566,12 +4566,12 @@ ;; Test for the function being a constant to inline ;; Namely, vcond (also veval!) - (single_num_type_check (lambda (code) (concat (local.set '$type_check_tmp code) + (single_num_type_check (lambda (code) (concat (local.set '$prim_tmp_a code) (_if '$not_num - (i64.ne (i64.const 0) (i64.and (i64.const 1) (local.get '$type_check_tmp))) + (i64.ne (i64.const 0) (i64.and (i64.const 1) (local.get '$prim_tmp_a))) (then (unreachable)) ) - (local.get '$type_check_tmp)))) + (local.get '$prim_tmp_a)))) (gen_numeric_impl (lambda (operation) (dlet (((param_codes err ctx) (compile_params false ctx params))) (mif err (array nil nil (str err " from function params in call to comb " (str_strip c)) ctx) @@ -4580,6 +4580,31 @@ (single_num_type_check (idx param_codes 0)) (slice param_codes 1 -1)) nil ctx))) )) + (gen_cmp_impl (lambda (lt_case eq_case gt_case) + (dlet (((param_codes err ctx) (compile_params false ctx params))) + (mif err (array nil nil (str err " from function params in call to comb " (str_strip c)) ctx) + (array nil + (concat + (apply concat param_codes) + (i64.const true_val) + (flat_map (lambda (i) (concat + (local.set '$prim_tmp_a) + (local.set '$prim_tmp_b) + (local.set '$prim_tmp_c) + (call '$comp_helper_helper (local.get '$prim_tmp_c) + (local.get '$prim_tmp_b) + (i64.const lt_case) + (i64.const eq_case) + (i64.const gt_case)) + (local.set '$prim_tmp_a (i64.and (local.get '$prim_tmp_a))) + (local.get '$prim_tmp_c) + (local.get '$prim_tmp_a) + )) + (range 1 num_params)) + (_drop) (_drop) (local.get '$prim_tmp_a) + ) + nil ctx))) + )) ) (cond ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) 'veval)) (dlet ( @@ -4619,6 +4644,7 @@ ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '+)) (gen_numeric_impl i64.add)) ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '-)) (gen_numeric_impl i64.sub)) + ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '=)) (gen_cmp_impl false_val true_val false_val)) (true (dlet ( @@ -4665,6 +4691,9 @@ ;params (mif (= #b0 (band (>> func_val 35) #b1)) (concat + ;(dlet ( (_ (true_print "WIRED " (>> func_val 35) " " (true_str_strip c))) ) nil) + ;(call '$print (i64.const (<< (>> func_val 35) 1))) + ;(call '$print (i64.const newline_msg_val)) (dlet ((wrap_level (>> (band func_val #x10) 4))) (cond ((= 0 wrap_level) wrap_0_inner_code) ((= 1 wrap_level) wrap_1_inner_code) @@ -4676,6 +4705,9 @@ (array)) ) (concat + ;(call '$print (i64.const (<< (>> func_val 35) 1))) + ;(call '$print (i64.const newline_msg_val)) + (dlet ( (_ (mif (= 29 (>> func_val 35)) (true_print "TIRED " (>> func_val 35) " " (true_str_strip c)))) ) nil) (dlet ((wrap_level (>> (band func_val #x10) 4))) (cond ((= 0 wrap_level) wrap_0_param_code) ((= 1 wrap_level) wrap_1_param_code) @@ -4945,7 +4977,7 @@ (call '$drop (local.get '$d_env))) (local.get '$outer_s_env)) )) - (our_func (apply func (concat (array '$userfunc) parameter_symbols (array '(param $outer_s_env i64) '(result i64) '(local $param_ptr i32) '(local $s_env i64) '(local $tmp_ptr i32) '(local $tmp i64) '(local $type_check_tmp i64) + (our_func (apply func (concat (array '$userfunc) parameter_symbols (array '(param $outer_s_env i64) '(result i64) '(local $param_ptr i32) '(local $s_env i64) '(local $tmp_ptr i32) '(local $tmp i64) '(local $prim_tmp_a i64) '(local $prim_tmp_b i64) '(local $prim_tmp_c i64) (local.set '$s_env (i64.const nil_val)) diff --git a/table.md b/table.md index d2eb6ae..9df0f69 100644 --- a/table.md +++ b/table.md @@ -1,12 +1,12 @@ | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | |:---|---:|---:|---:|---:| -| `echo 30 \| wasmtime ./fib_compiled.wasm` | 281.3 ± 4.3 | 274.3 | 289.5 | 24.35 ± 1.25 | -| `echo 30 \| wasmtime ./fib_compiled_let.wasm` | 716.4 ± 52.4 | 692.3 | 862.6 | 62.00 ± 5.46 | -| `echo 30 \| wasmtime ./builtin_fib.wasm` | 11.6 ± 0.6 | 10.3 | 13.2 | 1.00 | -| `echo 30 \| wasmtime ./fib_compiled_manual.wasm` | 468.8 ± 4.1 | 462.5 | 477.2 | 40.57 ± 2.03 | -| `scheme --script ./fib.scm 30` | 53.4 ± 0.8 | 52.2 | 57.4 | 4.62 ± 0.24 | -| `scheme --script ./fib_let.scm 30` | 53.6 ± 0.9 | 52.4 | 56.7 | 4.64 ± 0.24 | -| `python3 ./fib.py 30` | 284.7 ± 5.6 | 276.9 | 292.7 | 24.64 ± 1.30 | -| `python3 ./fib_let.py 30` | 299.8 ± 5.3 | 291.5 | 304.7 | 25.94 ± 1.36 | -| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/debug/rust_let.wasm` | 29.8 ± 0.7 | 28.7 | 32.5 | 2.58 ± 0.14 | -| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/release/rust_let.wasm` | 18.5 ± 0.6 | 17.4 | 20.4 | 1.60 ± 0.09 | +| `echo 30 \| wasmtime ./fib_compiled.wasm` | 107.5 ± 2.2 | 105.6 | 116.4 | 9.33 ± 0.51 | +| `echo 30 \| wasmtime ./fib_compiled_let.wasm` | 469.3 ± 3.1 | 464.3 | 474.2 | 40.74 ± 2.08 | +| `echo 30 \| wasmtime ./builtin_fib.wasm` | 11.5 ± 0.6 | 10.4 | 14.3 | 1.00 | +| `echo 30 \| wasmtime ./fib_compiled_manual.wasm` | 292.5 ± 5.7 | 287.5 | 308.1 | 25.39 ± 1.38 | +| `scheme --script ./fib.scm 30` | 54.6 ± 1.4 | 52.8 | 60.1 | 4.74 ± 0.27 | +| `scheme --script ./fib_let.scm 30` | 53.7 ± 0.9 | 52.2 | 55.9 | 4.67 ± 0.25 | +| `python3 ./fib.py 30` | 291.7 ± 3.7 | 286.0 | 296.9 | 25.32 ± 1.32 | +| `python3 ./fib_let.py 30` | 303.7 ± 4.2 | 293.7 | 308.0 | 26.36 ± 1.38 | +| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/debug/rust_let.wasm` | 29.6 ± 0.6 | 28.4 | 31.6 | 2.57 ± 0.14 | +| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/release/rust_let.wasm` | 18.4 ± 0.5 | 17.3 | 20.0 | 1.59 ± 0.09 |