Ah, the remaining calls were to =. Added 'inlining' the = and comp_helper loop into repeated calls to comp_helper_helper, eliminating the param array overhead. Now fib only allocates 10 times (instead of 4 million), and runs in .107s, finally beating Python handilly and becoming about 2x as slow as Chez. Feels like a decent spot for now, and that was most all of the low hanging fruit. The only thing left now is inlining of user functions to get fib_let performing as well - it looks glacial now at .4s because of the 2 remaining closure calls that the let expands to

This commit is contained in:
Nathan Braswell
2022-04-21 01:09:10 -04:00
parent 0cb52eb0b4
commit 18250e716f
2 changed files with 46 additions and 14 deletions

View File

@@ -4566,12 +4566,12 @@
;; Test for the function being a constant to inline ;; Test for the function being a constant to inline
;; Namely, vcond (also veval!) ;; Namely, vcond (also veval!)
(single_num_type_check (lambda (code) (concat (local.set '$type_check_tmp code) (single_num_type_check (lambda (code) (concat (local.set '$prim_tmp_a code)
(_if '$not_num (_if '$not_num
(i64.ne (i64.const 0) (i64.and (i64.const 1) (local.get '$type_check_tmp))) (i64.ne (i64.const 0) (i64.and (i64.const 1) (local.get '$prim_tmp_a)))
(then (unreachable)) (then (unreachable))
) )
(local.get '$type_check_tmp)))) (local.get '$prim_tmp_a))))
(gen_numeric_impl (lambda (operation) (gen_numeric_impl (lambda (operation)
(dlet (((param_codes err ctx) (compile_params false ctx params))) (dlet (((param_codes err ctx) (compile_params false ctx params)))
(mif err (array nil nil (str err " from function params in call to comb " (str_strip c)) ctx) (mif err (array nil nil (str err " from function params in call to comb " (str_strip c)) ctx)
@@ -4580,6 +4580,31 @@
(single_num_type_check (idx param_codes 0)) (single_num_type_check (idx param_codes 0))
(slice param_codes 1 -1)) nil ctx))) (slice param_codes 1 -1)) nil ctx)))
)) ))
(gen_cmp_impl (lambda (lt_case eq_case gt_case)
(dlet (((param_codes err ctx) (compile_params false ctx params)))
(mif err (array nil nil (str err " from function params in call to comb " (str_strip c)) ctx)
(array nil
(concat
(apply concat param_codes)
(i64.const true_val)
(flat_map (lambda (i) (concat
(local.set '$prim_tmp_a)
(local.set '$prim_tmp_b)
(local.set '$prim_tmp_c)
(call '$comp_helper_helper (local.get '$prim_tmp_c)
(local.get '$prim_tmp_b)
(i64.const lt_case)
(i64.const eq_case)
(i64.const gt_case))
(local.set '$prim_tmp_a (i64.and (local.get '$prim_tmp_a)))
(local.get '$prim_tmp_c)
(local.get '$prim_tmp_a)
))
(range 1 num_params))
(_drop) (_drop) (local.get '$prim_tmp_a)
)
nil ctx)))
))
) (cond ) (cond
((and (prim_comb? func_value) (= (.prim_comb_sym func_value) 'veval)) (dlet ( ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) 'veval)) (dlet (
@@ -4619,6 +4644,7 @@
((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '+)) (gen_numeric_impl i64.add)) ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '+)) (gen_numeric_impl i64.add))
((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '-)) (gen_numeric_impl i64.sub)) ((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '-)) (gen_numeric_impl i64.sub))
((and (prim_comb? func_value) (= (.prim_comb_sym func_value) '=)) (gen_cmp_impl false_val true_val false_val))
(true (dlet ( (true (dlet (
@@ -4665,6 +4691,9 @@
;params ;params
(mif (= #b0 (band (>> func_val 35) #b1)) (mif (= #b0 (band (>> func_val 35) #b1))
(concat (concat
;(dlet ( (_ (true_print "WIRED " (>> func_val 35) " " (true_str_strip c))) ) nil)
;(call '$print (i64.const (<< (>> func_val 35) 1)))
;(call '$print (i64.const newline_msg_val))
(dlet ((wrap_level (>> (band func_val #x10) 4))) (dlet ((wrap_level (>> (band func_val #x10) 4)))
(cond ((= 0 wrap_level) wrap_0_inner_code) (cond ((= 0 wrap_level) wrap_0_inner_code)
((= 1 wrap_level) wrap_1_inner_code) ((= 1 wrap_level) wrap_1_inner_code)
@@ -4676,6 +4705,9 @@
(array)) (array))
) )
(concat (concat
;(call '$print (i64.const (<< (>> func_val 35) 1)))
;(call '$print (i64.const newline_msg_val))
(dlet ( (_ (mif (= 29 (>> func_val 35)) (true_print "TIRED " (>> func_val 35) " " (true_str_strip c)))) ) nil)
(dlet ((wrap_level (>> (band func_val #x10) 4))) (dlet ((wrap_level (>> (band func_val #x10) 4)))
(cond ((= 0 wrap_level) wrap_0_param_code) (cond ((= 0 wrap_level) wrap_0_param_code)
((= 1 wrap_level) wrap_1_param_code) ((= 1 wrap_level) wrap_1_param_code)
@@ -4945,7 +4977,7 @@
(call '$drop (local.get '$d_env))) (call '$drop (local.get '$d_env)))
(local.get '$outer_s_env)) (local.get '$outer_s_env))
)) ))
(our_func (apply func (concat (array '$userfunc) parameter_symbols (array '(param $outer_s_env i64) '(result i64) '(local $param_ptr i32) '(local $s_env i64) '(local $tmp_ptr i32) '(local $tmp i64) '(local $type_check_tmp i64) (our_func (apply func (concat (array '$userfunc) parameter_symbols (array '(param $outer_s_env i64) '(result i64) '(local $param_ptr i32) '(local $s_env i64) '(local $tmp_ptr i32) '(local $tmp i64) '(local $prim_tmp_a i64) '(local $prim_tmp_b i64) '(local $prim_tmp_c i64)
(local.set '$s_env (i64.const nil_val)) (local.set '$s_env (i64.const nil_val))

View File

@@ -1,12 +1,12 @@
| Command | Mean [ms] | Min [ms] | Max [ms] | Relative | | Command | Mean [ms] | Min [ms] | Max [ms] | Relative |
|:---|---:|---:|---:|---:| |:---|---:|---:|---:|---:|
| `echo 30 \| wasmtime ./fib_compiled.wasm` | 281.3 ± 4.3 | 274.3 | 289.5 | 24.35 ± 1.25 | | `echo 30 \| wasmtime ./fib_compiled.wasm` | 107.5 ± 2.2 | 105.6 | 116.4 | 9.33 ± 0.51 |
| `echo 30 \| wasmtime ./fib_compiled_let.wasm` | 716.4 ± 52.4 | 692.3 | 862.6 | 62.00 ± 5.46 | | `echo 30 \| wasmtime ./fib_compiled_let.wasm` | 469.3 ± 3.1 | 464.3 | 474.2 | 40.74 ± 2.08 |
| `echo 30 \| wasmtime ./builtin_fib.wasm` | 11.6 ± 0.6 | 10.3 | 13.2 | 1.00 | | `echo 30 \| wasmtime ./builtin_fib.wasm` | 11.5 ± 0.6 | 10.4 | 14.3 | 1.00 |
| `echo 30 \| wasmtime ./fib_compiled_manual.wasm` | 468.8 ± 4.1 | 462.5 | 477.2 | 40.57 ± 2.03 | | `echo 30 \| wasmtime ./fib_compiled_manual.wasm` | 292.5 ± 5.7 | 287.5 | 308.1 | 25.39 ± 1.38 |
| `scheme --script ./fib.scm 30` | 53.4 ± 0.8 | 52.2 | 57.4 | 4.62 ± 0.24 | | `scheme --script ./fib.scm 30` | 54.6 ± 1.4 | 52.8 | 60.1 | 4.74 ± 0.27 |
| `scheme --script ./fib_let.scm 30` | 53.6 ± 0.9 | 52.4 | 56.7 | 4.64 ± 0.24 | | `scheme --script ./fib_let.scm 30` | 53.7 ± 0.9 | 52.2 | 55.9 | 4.67 ± 0.25 |
| `python3 ./fib.py 30` | 284.7 ± 5.6 | 276.9 | 292.7 | 24.64 ± 1.30 | | `python3 ./fib.py 30` | 291.7 ± 3.7 | 286.0 | 296.9 | 25.32 ± 1.32 |
| `python3 ./fib_let.py 30` | 299.8 ± 5.3 | 291.5 | 304.7 | 25.94 ± 1.36 | | `python3 ./fib_let.py 30` | 303.7 ± 4.2 | 293.7 | 308.0 | 26.36 ± 1.38 |
| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/debug/rust_let.wasm` | 29.8 ± 0.7 | 28.7 | 32.5 | 2.58 ± 0.14 | | `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/debug/rust_let.wasm` | 29.6 ± 0.6 | 28.4 | 31.6 | 2.57 ± 0.14 |
| `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/release/rust_let.wasm` | 18.5 ± 0.6 | 17.4 | 20.4 | 1.60 ± 0.09 | | `echo 30 \| wasmtime ./rust_fib/target/wasm32-wasi/release/rust_let.wasm` | 18.4 ± 0.5 | 17.3 | 20.0 | 1.59 ± 0.09 |