From 069c9775e16cf7ff38c715c5d219f0d022764f06 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 29 Jan 2024 02:03:59 -0500 Subject: [PATCH] Ported interpreter (without trace execution) to use new packed, JIT-friendly Form, with all needed additions --- slj/src/grammar.lalrpop | 17 +- slj/src/lib.rs | 1069 ++++++++++++++++++++++++++------------- slj/src/main.rs | 406 +-------------- 3 files changed, 742 insertions(+), 750 deletions(-) diff --git a/slj/src/grammar.lalrpop b/slj/src/grammar.lalrpop index 5b383b8..a693e0d 100644 --- a/slj/src/grammar.lalrpop +++ b/slj/src/grammar.lalrpop @@ -1,24 +1,23 @@ use std::str::FromStr; -use std::rc::Rc; use sl::Form; grammar; -pub Term: Rc
= { +pub Term: Form = { "true" => Form::new_bool(true), "false" => Form::new_bool(false), - NUM => Form::new_int(i32::from_str(<>).unwrap()), - SYM => Rc::new(Form::Symbol(<>.to_owned())), + NUM => Form::new_int(isize::from_str(<>).unwrap()), + SYM => Form::new_symbol(<>), "(" ")" => <>.unwrap_or(Form::new_nil()), - "'" => Rc::new(Form::Pair(Rc::new(Form::Symbol("quote".to_owned())), Rc::new(Form::Pair(<>, Form::new_nil())))), + "'" => Form::new_pair(Form::new_symbol("quote"), Form::new_pair(<>, Form::new_nil())), "!" => { h.append(t).unwrap() }, }; -ListInside: Rc = { - => Rc::new(Form::Pair(<>, Form::new_nil())), - => Rc::new(Form::Pair(h, t)), - "." => Rc::new(Form::Pair(a, d)), +ListInside: Form = { + =>Form::new_pair(<>, Form::new_nil()), + => Form::new_pair(h, t), + "." => Form::new_pair(a, d), } match { "true", diff --git a/slj/src/lib.rs b/slj/src/lib.rs index 0d4d4d5..cff7c03 100644 --- a/slj/src/lib.rs +++ b/slj/src/lib.rs @@ -1,24 +1,211 @@ -use std::rc::Rc; use std::collections::{BTreeSet,BTreeMap}; use std::fmt; use anyhow::{anyhow,bail,Result}; +use std::sync::Mutex; +use std::marker::PhantomData; +use std::ops::Deref; +use std::ptr::{self, NonNull}; +use std::mem::{self, ManuallyDrop}; +use std::alloc::{self, Layout}; +use std::cell::Cell; + +use once_cell::sync::Lazy; + +#[repr(C)] +pub struct Cvec { + ptr: NonNull, + cap: usize, + len: usize, +} +unsafe impl Send for Cvec {} +unsafe impl Sync for Cvec {} +impl Cvec { + pub fn new() -> Self { + assert!(mem::size_of::() != 0, "no ZST"); + Cvec { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + } + } + fn grow(&mut self) { + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) + } else { + let new_cap = 2 * self.cap; + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) + }; + assert!(new_layout.size() <= isize::MAX as usize, "allocation too large"); + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; + } + pub fn push(&mut self, elem: T) { + if self.len == self.cap { self.grow(); } + unsafe { + ptr::write(self.ptr.as_ptr().add(self.len), elem); + } + self.len += 1; + } + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.len -= 1; + unsafe { + Some(ptr::read(self.ptr.as_ptr().add(self.len))) + } + } + } +} +impl Drop for Cvec { + fn drop(&mut self) { + if self.cap != 0 { + while let Some(_) = self.pop() {} + let layout = Layout::array::(self.cap).unwrap(); + unsafe { + alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); + } + } + } +} +impl Deref for Cvec { + type Target = [T]; + fn deref(&self) -> &[T] { + unsafe { + std::slice::from_raw_parts(self.ptr.as_ptr(), self.len) + } + } +} +impl Clone for Cvec { + fn clone(&self) -> Cvec { + let layout = Layout::array::(self.cap).unwrap(); + let ptr = match NonNull::new(unsafe { alloc::alloc(layout) } as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(layout), + }; + for i in 0..self.len { + unsafe { ptr::write(ptr.as_ptr().add(i), self[i].clone()); } + } + Self { ptr, cap: self.cap, len: self.len } + } +} +impl PartialEq for Cvec { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} +impl Eq for Cvec {} +// insert, remove, into_iter, and drain all missing +impl fmt::Display for Cvec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for x in self.iter() { + write!(f, " {}", x)?; + } + write!(f, " ]")?; + Ok(()) + } +} +impl fmt::Debug for Cvec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for x in self.iter() { + write!(f, " {:?}", x)?; + } + write!(f, " ]")?; + Ok(()) + } +} -// This first Simple Lisp really is -// -// No fexprs, no mutation, no continuations, no macros, no strings. -// Int/Bool/Nil/Pair/Symbol/Closure/Prim. -// -// Figuring out GC between a JIT and Rust will be tricky. -// Can start with a like tracing-JIT-into-bytecode -// let's make our own Box, Rc, maybe Arc, Vec too? -// rustonomicon +#[repr(C)] +pub struct Crc { + ptr: NonNull>, + phantom: PhantomData> +} +#[repr(C)] +pub struct CrcInner { + rc: Cell, + data: T, +} +impl CrcInner { + pub unsafe fn increment(&self) { + let old = self.rc.get(); + self.rc.set(old + 1); + if old > isize::MAX as usize { + std::process::abort(); + } + } +} +impl Crc { + pub fn new(data: T) -> Crc { + let boxed = Box::new(CrcInner { rc: Cell::new(1), data }); + Crc { + ptr: NonNull::new(Box::into_raw(boxed)).unwrap(), + phantom: PhantomData, + } + } + pub fn into_ptr(self) -> *mut CrcInner { + ManuallyDrop::new(self).ptr.as_ptr() as *mut CrcInner + } + pub fn from_ptr(ptr: *mut CrcInner) -> Self { + Crc { + ptr: NonNull::new(ptr).unwrap(), + phantom: PhantomData, + } + } +} +unsafe impl Send for Crc {} +unsafe impl Sync for Crc {} +impl Deref for Crc { + type Target = T; + fn deref(&self) -> &T { + let inner = unsafe { self.ptr.as_ref() }; + &inner.data + } +} +impl fmt::Debug for Crc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.deref()) + } +} +impl Clone for Crc { + fn clone(&self) -> Crc { + unsafe { self.ptr.as_ref().increment(); } + Self { + ptr: self.ptr, + phantom: PhantomData, + } + } +} +impl Drop for Crc { + fn drop(&mut self) { + let inner = unsafe { self.ptr.as_mut() }; + let old = inner.rc.get(); + inner.rc.set(old - 1); + if old != 1 { + return; + } + unsafe { drop(Box::from_raw(self.ptr.as_ptr())); } + } +} #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] #[repr(transparent)] pub struct ID { - id: i64 + pub id: i64 } impl fmt::Display for ID { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -26,18 +213,26 @@ impl fmt::Display for ID { } } -#[derive(Debug)] -pub enum Form { - Nil, - Int(i32), - Bool(bool), - Symbol(String), - Pair(Rc, Rc), - Closure(Vec, Rc, Rc, ID), - Prim(Prim), +#[repr(C)] +pub struct Closure { + params: Cvec, + e: Form, + body: Form, + id: ID, } +#[repr(C)] +pub struct Form { + data: *const Form, + phantom: PhantomData +} +#[repr(C)] +struct FormPair { + car: Form, + cdr: Form, +} #[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[repr(usize)] pub enum Prim { Add, Sub, @@ -57,135 +252,197 @@ impl Prim { } } } -fn eval_prim(f: Prim, b: Rc, a: Option>) -> Result> { - Ok(match f { - Prim::Car => b.car()?, - Prim::Cdr => b.cdr()?, - _ => { - let a = a.unwrap(); - match f { - Prim::Add => Form::new_int(a.int()? + b.int()?), - Prim::Sub => Form::new_int(a.int()? - b.int()?), - Prim::Mul => Form::new_int(a.int()? * b.int()?), - Prim::Div => Form::new_int(a.int()? / b.int()?), - Prim::Mod => Form::new_int(a.int()? % b.int()?), - Prim::Cons => Form::new_pair(a, b), - Prim::Eq => Form::new_bool(a.my_eq(&b)), - _ => unreachable!(), - } - } - }) -} +/* + * this better be a 64 bit platform + * huh, if we only support i32s, then we have a lot more room for tags + * 8 byte alignment gets us 3 bits, or uh 8 options + * we'll choose 000 for ints to make math easy + * + * 000 - Int + * 001 - Nil + * 010 - Bool(false) // this is needlessly wasteful of the bits but hay - should take one of them over as a String probs + * 011 - Bool(true) + * 100 - Symbol - will want to convert into an Crc around a StringRawParts struct + * 101 - Pair - an Crc around a Pair struct + * 110 - Closure- eek: Closure(Cvec, Crc, Crc, ID), + * xxxx 111 - Prim (xxxx for which one) + * + * I don't actually think we need our own repr(C) Cvec implementation, at least not for now - we can + * make do with a CvecRawParts struct (without implementations) + * Hay I did it anyway + * + * in both cases, StringRawParts and CvecRawParts, we can rebuild slices from the raw parts for + * read-only access, which is all we need (until Drop, at which point we should re-constitute them + * from their raw parts, which is stable) + * + * For symbols, it would actually make sense to create the String, then leak it so it lasts for the + * program, then deduplicate to it and pass the static const slice around + * Could even fit entirely in the Form if the max length of a symbol is 2^16 + */ +const TAG_OFFSET: usize = 3; + +const SYM_LEN_OFFSET: usize = 3; +const SYM_LEN_MASK: usize = 0xFF; // could be bigger +const SYM_PTR_OFFSET: usize = 11; + +const TAG_MASK: usize = 0b111; +const TAG_INT: usize = 0b000; +const TAG_NIL: usize = 0b001; +const TAG_BOOL_FALSE: usize = 0b010; +const TAG_BOOL_TRUE: usize = 0b011; +const TAG_SYMBOL: usize = 0b100; +const TAG_PAIR: usize = 0b101; +const TAG_CLOSURE: usize = 0b110; +const TAG_PRIM: usize = 0b111; + + +static SYMBOLS: Lazy>> = Lazy::new(Mutex::default); impl Form { - fn my_eq(&self, o: &Rc) -> bool { - match self { - Form::Nil => o.is_nil(), - Form::Int(i) => if let Ok(oi) = o.int() { *i == oi } else { false }, - Form::Bool(b) => if let Ok(ob) = o.bool() { *b == ob } else { false }, - Form::Symbol(s) => if let Ok(os) = o.sym() { s == os } else { false }, - Form::Pair(a,b) => if let Ok((oa,ob)) = o.pair() { a.my_eq(&oa) && b.my_eq(&ob) } else { false }, - Form::Closure(_, _, _, _) => false, - Form::Prim(p) => match &**o { Form::Prim(op) => p == op, _ => false }, + pub fn new_int(x: isize) -> Self { + Self { data: (x << TAG_OFFSET) as *const Form, phantom: PhantomData } + } + pub fn new_nil() -> Self { + Self { data: TAG_NIL as *const Form, phantom: PhantomData } + } + pub fn new_bool(b: bool) -> Self { + Self { data: (if b { TAG_BOOL_TRUE } else { TAG_BOOL_FALSE }) as *const Form, phantom: PhantomData } + } + pub fn new_pair(car: Form, cdr: Form) -> Self { + let p = Crc::new(FormPair { car, cdr }).into_ptr() as usize; + assert!(p & TAG_MASK == 0); + Self { data: (p | TAG_PAIR) as *const Form, phantom: PhantomData } + } + fn new_closure(params: Cvec, e: Form, body: Form, ctx: &mut Ctx) -> Self { + let p = Crc::new(Closure { params, e, body, id: ctx.alloc_id() }).into_ptr() as usize; + assert!(p & TAG_MASK == 0); + Self { data: (p | TAG_CLOSURE) as *const Form, phantom: PhantomData } + } + pub fn new_prim(p: Prim) -> Self { + Self { data: (((p as usize) << TAG_OFFSET) | TAG_PRIM) as *const Form, phantom: PhantomData } + } + pub fn new_symbol(s: &str) -> Form { + assert!(s.len() < SYM_LEN_MASK); + let mut symbols = SYMBOLS.lock().unwrap(); + let ds = if let Some(ds) = symbols.get(s) { + ds + } else { + // here we leak the memory of a new owned copy of s, + // and then transmute it into an &'static str that we keep in our global + // map for deduplication. Spicy stuff. + let mut value = ManuallyDrop::new(s.to_owned()); + value.shrink_to_fit(); + let slice = unsafe { std::mem::transmute(value.as_str()) }; + symbols.insert(s.to_owned(), slice); + slice + }; + //println!("Deduped {s} to {ds}"); + Self { data: (((ds.as_ptr() as usize) << SYM_PTR_OFFSET) | (ds.len() << SYM_LEN_OFFSET) | TAG_SYMBOL) as *const Form, phantom: PhantomData } + } + + pub fn int(&self) -> Result { + if self.data as usize & TAG_MASK == TAG_INT { + Ok(self.data as isize >> 3) + } else { + Err(anyhow!("car on not a pair")) } } - pub fn new_pair(car: Rc, cdr: Rc) -> Rc { - Rc::new(Form::Pair(car, cdr)) + pub fn car(&self) -> Result<&Form> { + if self.data as usize & TAG_MASK == TAG_PAIR { + Ok(unsafe { &(*((self.data as usize & !TAG_MASK) as *mut CrcInner)).data.car }) + } else { + Err(anyhow!("car on not a pair")) + } } - pub fn new_nil() -> Rc { - Rc::new(Form::Nil) + pub fn cdr(&self) -> Result<&Form> { + if self.data as usize & TAG_MASK == TAG_PAIR { + Ok(unsafe { &(*((self.data as usize & !TAG_MASK) as *mut CrcInner)).data.cdr }) + } else { + Err(anyhow!("cdr on not a pair")) + } } - pub fn new_int(i: i32) -> Rc { - Rc::new(Form::Int(i)) + pub fn closure(&self) -> Result<&Closure> { + if self.data as usize & TAG_MASK == TAG_CLOSURE { + Ok(unsafe { &(*((self.data as usize & !TAG_MASK) as *mut CrcInner)).data }) + } else { + Err(anyhow!("closure on on not a closure")) + } } - pub fn new_bool(b: bool) -> Rc { - Rc::new(Form::Bool(b)) + pub fn prim(&self) -> Result { + if self.data as usize & TAG_MASK == TAG_PRIM { + Ok(unsafe { *(&((self.data as usize) >> TAG_OFFSET) as *const usize as *const Prim) }) + } else { + Err(anyhow!("prim on on not a prim")) + } } - fn new_closure(params: Vec, env: Rc, body: Rc, ctx: &mut Ctx) -> Rc { - Rc::new(Form::Closure(params, env, body, ctx.alloc_id())) + pub fn sym(&self) -> Result<&str> { + if self.data as usize & TAG_MASK == TAG_SYMBOL { + let len = ((self.data as usize) >> SYM_LEN_OFFSET) & SYM_LEN_MASK; + let ptr = ((self.data as usize) >> SYM_PTR_OFFSET) as *const u8; + Ok(std::str::from_utf8(unsafe { std::slice::from_raw_parts(ptr, len) }).unwrap()) + } else { + Err(anyhow!("sym on on not a str")) + } } fn truthy(&self) -> bool { - match self { - Form::Bool(b) => *b, - Form::Nil => false, - _ => true, + match self.data as usize & TAG_MASK { + TAG_NIL => false, + TAG_BOOL_FALSE => false, + TAG_BOOL_TRUE => true, + _ => true, } } fn bool(&self) -> Result { - match self { - Form::Bool(b) => Ok(*b), - _ => Err(anyhow!("bool on not a bool")), + match self.data as usize & TAG_MASK { + TAG_BOOL_FALSE => Ok(false), + TAG_BOOL_TRUE => Ok(true), + _ => Err(anyhow!("bool on not a bool")), } } - fn int(&self) -> Result { - match self { - Form::Int(i) => Ok(*i), - _ => Err(anyhow!("int on not a int")), - } - } - fn prim(&self) -> Result { - match self { - Form::Prim(p) => Ok(*p), - _ => Err(anyhow!("prim on not a prim")), - } - } - fn sym(&self) -> Result<&str> { - match self { - Form::Symbol(s) => Ok(s), - _ => Err(anyhow!("sym on not a sym")), - } - } - fn pair(&self) -> Result<(Rc,Rc)> { - match self { - Form::Pair(car, cdr) => Ok((Rc::clone(car),Rc::clone(cdr))), - _ => Err(anyhow!("pair on not a pair {self}")), - } - } - fn car(&self) -> Result> { - match self { - Form::Pair(car, _cdr) => Ok(Rc::clone(car)), - _ => Err(anyhow!("car on not a pair")), - } - } - fn cdr(&self) -> Result> { - match self { - Form::Pair(_car, cdr) => Ok(Rc::clone(cdr)), - _ => Err(anyhow!("cdr on not a pair")), + fn pair(&self) -> Result<(&Form,&Form)> { + if self.data as usize & TAG_MASK == TAG_PAIR { + let crc_ptr = (self.data as usize & !TAG_MASK) as *mut CrcInner; + Ok(unsafe { (&(*crc_ptr).data.car,&(*crc_ptr).data.cdr) }) + } else { + Err(anyhow!("pair on not a pair")) } } fn is_nil(&self) -> bool { - match self { - Form::Nil => true, - _ => false, + match self.data as usize & TAG_MASK { + TAG_NIL => true, + _ => false, } } - pub fn append(&self, x: Rc) -> Result> { - match self { - Form::Pair(car, cdr) => cdr.append(x).map(|x| Rc::new(Form::Pair(Rc::clone(car), x))), - Form::Nil => Ok(Rc::new(Form::Pair(x, Rc::new(Form::Nil)))), - _ => Err(anyhow!("append to not a pair")), + pub fn define(&self, s: &str, v: Form) -> Form { + Form::new_pair(Form::new_pair(Form::new_symbol(s), v), self.clone()) + } + pub fn append(&self, x: Form) -> Result { + match self.data as usize & TAG_MASK { + TAG_PAIR => self.cdr().unwrap().append(x).map(|x| Form::new_pair(self.car().unwrap().clone(), x)), + TAG_NIL => Ok(Form::new_pair(x, Form::new_nil())), + _ => Err(anyhow!("append to not a pair")), } } - pub fn root_env() -> Rc { + pub fn root_env() -> Form { let mut e = Form::new_nil(); for (s, v) in [ - ("+", Rc::new(Form::Prim(Prim::Add))), - ("-", Rc::new(Form::Prim(Prim::Sub))), - ("*", Rc::new(Form::Prim(Prim::Mul))), - ("/", Rc::new(Form::Prim(Prim::Div))), - ("%", Rc::new(Form::Prim(Prim::Mod))), - ("cons", Rc::new(Form::Prim(Prim::Cons))), - ("cdr", Rc::new(Form::Prim(Prim::Cdr))), - ("car", Rc::new(Form::Prim(Prim::Car))), - ("=", Rc::new(Form::Prim(Prim::Eq))), + ("+", Form::new_prim(Prim::Add)), + ("-", Form::new_prim(Prim::Sub)), + ("*", Form::new_prim(Prim::Mul)), + ("/", Form::new_prim(Prim::Div)), + ("%", Form::new_prim(Prim::Mod)), + ("cons", Form::new_prim(Prim::Cons)), + ("cdr", Form::new_prim(Prim::Cdr)), + ("car", Form::new_prim(Prim::Car)), + ("=", Form::new_prim(Prim::Eq)), ("nil", Form::new_nil()), ] { - e = e.define(s.to_string(), v); + e = e.define(s, v); } e } - pub fn lookup(self: &Rc, s: &str) -> Result> { - let mut e = Rc::clone(self); + pub fn lookup(&self, s: &str) -> Result<&Form> { + let mut e = self; loop { let (kv, ne) = e.pair()?; let (sp, v) = kv.pair()?; @@ -195,10 +452,156 @@ impl Form { e = ne; } } - pub fn define(self: &Rc, s: String, v: Rc) -> Rc { - Form::new_pair(Form::new_pair(Rc::new(Form::Symbol(s)), v), Rc::clone(self)) +} +impl Drop for Form { + fn drop(&mut self) { + match self.data as usize & TAG_MASK { + TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { /*println!("dropping simple {self}"); */ }, // doing nothing for symbol is fine + // since it's deduplicated + TAG_PAIR => { + let _ = Crc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut CrcInner ); + }, + TAG_CLOSURE => { + let _ = Crc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut CrcInner ); + }, + _ => unreachable!(), + } } } +impl Clone for Form { + fn clone(&self) -> Self { + match self.data as usize & TAG_MASK { + TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { Self { data: self.data, phantom: PhantomData } }, + TAG_PAIR => { + unsafe { (*((self.data as usize & !TAG_MASK) as *mut CrcInner)).increment(); } + Self { data: self.data, phantom: PhantomData } + }, + TAG_CLOSURE => { + unsafe { (*((self.data as usize & !TAG_MASK) as *mut CrcInner)).increment(); } + Self { data: self.data, phantom: PhantomData } + }, + _ => unreachable!(), + } + } +} +impl PartialEq for Form { + fn eq(&self, other: &Self) -> bool { + match self.data as usize & TAG_MASK { + TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { self.data == other.data }, + TAG_PAIR => { + if other.data as usize & TAG_MASK != TAG_PAIR { + return false; + } + self.car().unwrap() == other.car().unwrap() && self.cdr().unwrap() == other.cdr().unwrap() + }, + TAG_CLOSURE => { + if other.data as usize & TAG_MASK != TAG_CLOSURE { + return false; + } + let Closure { params, e, body, id, } = self.closure().unwrap(); + let Closure { params: oparams, e: oe, body: obody, id: oid, } = self.closure().unwrap(); + params == oparams && e == oe && body == obody && id == oid + }, + _ => unreachable!(), + } + } +} +impl Eq for Form {} +impl fmt::Display for Form { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.data as usize & TAG_MASK { + TAG_INT => { + write!(f, "{}", self.data as isize >> 3)?; + }, + TAG_NIL => { + write!(f, "nil")?; + }, + TAG_BOOL_FALSE => { + write!(f, "false")?; + }, + TAG_BOOL_TRUE => { + write!(f, "true")?; + }, + TAG_PAIR => { + write!(f, "({}", self.car().unwrap())?; + let mut traverse = self.cdr().unwrap(); + loop { + match traverse.data as usize & TAG_MASK { + TAG_PAIR => { + write!(f, " {}", traverse.car().unwrap())?; + traverse = traverse.cdr().unwrap(); + }, + TAG_NIL => { + write!(f, ")")?; + return Ok(()); + }, + _ => { + write!(f, ". {traverse})")?; + return Ok(()); + } + } + } + }, + TAG_PRIM => { + write!(f, "{:?}", self.prim().unwrap())?; + }, + TAG_SYMBOL => { + write!(f, "'{}", self.sym().unwrap())?; + }, + TAG_CLOSURE => { + let Closure { params, e, body, id, } = self.closure().unwrap(); + write!(f, "<{params} {e} {body} {id}>")?; + }, + _ => unreachable!(), + } + Ok(()) + } +} +impl fmt::Debug for Form { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Form({self})") + } +} +impl From for Form { fn from(item: String) -> Self { Form::new_symbol(&item) } } +impl From<&str> for Form { fn from(item: &str) -> Self { Form::new_symbol(item) } } +impl From for Form { fn from(item: isize) -> Self { Form::new_int(item) } } +impl From for Form { fn from(item: bool) -> Self { Form::new_bool(item) } } +impl, B: Into> From<(A, B)> for Form { + fn from(item: (A, B)) -> Self { + Form::new_pair(item.0.into(), item.1.into()) + } +} + + +// This first Simple Lisp really is +// +// No fexprs, no mutation, no continuations, no macros, no strings. +// Int/Bool/Nil/Pair/Symbol/Closure/Prim. +// +// Figuring out GC between a JIT and Rust will be tricky. +// Can start with a like tracing-JIT-into-bytecode +// let's make our own Box, Crc, maybe Arc, Vec too? +// rustonomicon + +fn eval_prim(f: Prim, b: Form, a: Option) -> Result { + Ok(match f { + Prim::Car => b.car()?.clone(), + Prim::Cdr => b.cdr()?.clone(), + _ => { + let a = a.unwrap(); + match f { + Prim::Add => Form::new_int(a.int()? + b.int()?), + Prim::Sub => Form::new_int(a.int()? - b.int()?), + Prim::Mul => Form::new_int(a.int()? * b.int()?), + Prim::Div => Form::new_int(a.int()? / b.int()?), + Prim::Mod => Form::new_int(a.int()? % b.int()?), + Prim::Cons => Form::new_pair(a, b), + Prim::Eq => Form::new_bool(a == b), + _ => unreachable!(), + } + } + }) +} // JIT Decisions // JIT Closure vs JIT Closure-Template @@ -215,26 +618,39 @@ impl Form { // currently we basically just have lazy EBB bytecode construction // which I like! +#[derive(Debug)] +enum Op { + Guard { const_value: Form, side_val: Option, side_cont: Crc, side_id: ID, tbk: TraceBookkeeping }, + Debug, + Define { sym: String }, + Const (Form), + Drop, + Lookup { sym: String }, + Call { len: usize, statik: Option, nc: Crc, nc_id: ID }, + InlinePrim(Prim), + Tail(usize,Option), + Return, +} impl fmt::Display for Op { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Op::Guard { const_value, side_val:_, side_cont:_, side_id, tbk:_ } => write!(f, "Guard{side_id}({const_value})"), - Op::Debug => write!(f, "Debug"), - Op::Define { sym } => write!(f, "Define({sym})"), - Op::Const ( con ) => write!(f, "Const_{con}"), - Op::Drop => write!(f, "Drop"), - Op::Lookup { sym } => write!(f, "Lookup({sym})"), - Op::Call { len, nc:_, nc_id, statik } => write!(f, "Call{nc_id}({len},{statik:?})"), - Op::InlinePrim(prim) => write!(f, "{prim:?}"), - Op::Tail(len,oid) => write!(f, "Tail({len},{oid:?})"), - Op::Return => write!(f, "Return"), + Op::Debug => write!(f, "Debug"), + Op::Define { sym } => write!(f, "Define({sym})"), + Op::Const ( con ) => write!(f, "Const_{con}"), + Op::Drop => write!(f, "Drop"), + Op::Lookup { sym } => write!(f, "Lookup({sym})"), + Op::Call { len, nc:_, nc_id, statik } => write!(f, "Call{nc_id}({len},{statik:?})"), + Op::InlinePrim(prim) => write!(f, "{prim:?}"), + Op::Tail(len,oid) => write!(f, "Tail({len},{oid:?})"), + Op::Return => write!(f, "Return"), } } } impl Op { - fn cnst(&self) -> Result> { + fn cnst(&self) -> Result { match self { - Op::Const(c) => Ok(Rc::clone(c)), + Op::Const(c) => Ok(c.clone()), _ => Err(anyhow!("const on not a const")), } } @@ -245,18 +661,23 @@ struct TraceBookkeeping { stack_const: Vec, defined_names: BTreeSet, } -#[derive(Debug)] -enum Op { - Guard { const_value: Rc, side_val: Option>, side_cont: Rc, side_id: ID, tbk: TraceBookkeeping }, - Debug, - Define { sym: String }, - Const ( Rc ), - Drop, - Lookup { sym: String }, - Call { len: usize, statik: Option, nc: Rc, nc_id: ID }, - InlinePrim(Prim), - Tail(usize,Option), - Return, + +#[derive(Clone,Debug)] +enum Cont { + MetaRet, + Ret { id: ID, }, + Eval { c: Crc }, + Prim { s: &'static str, to_go: Form, c: Crc }, + Call { n: usize, to_go: Form, c: Crc }, + Frame { syms: Cvec, id: ID, c: Crc }, +} +impl Cont { + fn is_ret(&self) -> bool { + match self { + Cont::Ret { id: _ } => true, + _ => false, + } + } } #[derive(Debug)] @@ -290,7 +711,6 @@ impl fmt::Display for Trace { Ok(()) } } - #[derive(Debug)] struct Ctx { id_counter: i64, @@ -332,19 +752,20 @@ impl Ctx { // - tracing, Static,nontail-self- emit call (static) // - tracing, Dynamic, tail - emit tail // - tracing, Dynamic, non-tail - emit call - fn trace_call(&mut self, call_len: usize, tmp_stack: &Vec>, nc: &Rc) -> Option { + fn trace_call(&mut self, call_len: usize, tmp_stack: &Cvec, nc: &Crc) -> Option { // Needs to take and use parameters for mid-trace // needs to guard on function called if non-constant println!("trace_call call_len={call_len},trace={:?}, tmp_stack {tmp_stack:?}", self.tracing); if let Some(trace) = &mut self.tracing { - let statik = if trace.tbk.stack_const[trace.tbk.stack_const.len()-call_len] { // const - TODO: for now, we don't inline but we will want to later (based on what // metrics? can we run them simultaniously, heirarchially? with our new approach on // prims maybe (heck we may need to go farther, and remove the InlinePrim!) - match &*tmp_stack[tmp_stack.len()-call_len] { - Form::Prim(p) => { + let func = &tmp_stack[tmp_stack.len()-call_len]; + match func.data as usize & TAG_MASK { + TAG_PRIM => { + let p = func.prim().unwrap(); if (&trace.tbk.stack_const[trace.tbk.stack_const.len()-call_len..]).iter().all(|x| *x) { trace.tbk.stack_const.truncate(trace.tbk.stack_const.len()-call_len); let b = trace.ops[trace.ops.len()-1].cnst().unwrap(); @@ -355,17 +776,18 @@ impl Ctx { trace.ops.push(Op::Drop); } - trace.ops.push(Op::Const(eval_prim(*f, b, a).unwrap())); + trace.ops.push(Op::Const(eval_prim(f, b, a).unwrap())); trace.tbk.stack_const.push(true); } else { trace.tbk.stack_const.truncate(trace.tbk.stack_const.len()-call_len); - trace.ops.push(Op::InlinePrim(*p)); + trace.ops.push(Op::InlinePrim(p)); trace.tbk.stack_const.push(false); } return None; }, - Form::Closure(_ps, _e, _b, id) => { + TAG_CLOSURE => { + let Closure { id, .. } = func.closure().unwrap(); if nc.is_ret() { if *id == trace.tbk.func_id { // we removed the loop opcode because this trace needs to know the @@ -397,7 +819,7 @@ impl Ctx { } else { trace.tbk.stack_const.truncate(trace.tbk.stack_const.len()-call_len); self.id_counter += 1; let nc_id = ID { id: self.id_counter }; // HACK - I can't use the method cuz trace is borrowed - trace.ops.push(Op::Call { len: call_len, statik, nc: Rc::clone(nc), nc_id }); + trace.ops.push(Op::Call { len: call_len, statik, nc: Crc::clone(nc), nc_id }); println!("Ending trace at call!"); println!("\t{}", trace); self.trace_resume_data.insert(nc_id, trace.tbk.clone()); @@ -407,7 +829,7 @@ impl Ctx { } None } - fn trace_frame(&mut self, syms: &Vec, id: ID) { + fn trace_frame(&mut self, syms: &Cvec, id: ID) { let inline = self.tracing.is_some(); let entry = self.cont_count.entry(id).or_insert(0); println!("tracing call start for {id}, has been called {} times so far", *entry); @@ -421,6 +843,24 @@ impl Ctx { } self.trace_drop(inline); } + fn trace_define(&mut self, sym: &str, pop: bool) { + if let Some(trace) = &mut self.tracing { + trace.ops.push(Op::Define { sym: sym.to_owned() }); + trace.tbk.defined_names.insert(sym.to_owned()); + if pop { + trace.tbk.stack_const.pop().unwrap(); + } + + } + } + fn trace_drop(&mut self, pop: bool) { + if let Some(trace) = &mut self.tracing { + trace.ops.push(Op::Drop); + if pop { + trace.tbk.stack_const.pop().unwrap(); + } + } + } fn trace_call_end(&mut self, id: ID, follow_on_trace_data: Option) { println!("tracing call end for {id} followon {follow_on_trace_data:?}"); if let Some(trace) = &mut self.tracing { @@ -448,12 +888,12 @@ impl Ctx { } } } - fn trace_guard + std::fmt::Debug>(&mut self, value: T, other: impl Fn()->(Option>,Rc)) { + fn trace_guard + std::fmt::Debug>(&mut self, value: T, other: impl Fn()->(Option,Crc)) { println!("Tracing guard {value:?}"); if let Some(trace) = &mut self.tracing { let (side_val, side_cont) = other(); self.id_counter += 1; let side_id = ID { id: self.id_counter }; // HACK - I can't use the method cuz trace is borrowed - trace.ops.push(Op::Guard { const_value: Rc::new(value.into()), side_val, side_cont, side_id, tbk: trace.tbk.clone() }); + trace.ops.push(Op::Guard { const_value: value.into(), side_val, side_cont, side_id, tbk: trace.tbk.clone() }); } } fn trace_debug(&mut self) { @@ -461,17 +901,7 @@ impl Ctx { trace.ops.push(Op::Debug); } } - fn trace_define(&mut self, sym: &str, pop: bool) { - if let Some(trace) = &mut self.tracing { - trace.ops.push(Op::Define { sym: sym.to_owned() }); - trace.tbk.defined_names.insert(sym.to_owned()); - if pop { - trace.tbk.stack_const.pop().unwrap(); - } - - } - } - fn trace_lookup(&mut self, s: &str, f: &Rc) { + fn trace_lookup(&mut self, s: &str, f: &Form) { if let Some(trace) = &mut self.tracing { // constant depends on which env, and I think this is the only spot that cares for // closure jit vs lambda jit @@ -479,40 +909,32 @@ impl Ctx { trace.ops.push(Op::Lookup { sym: s.to_owned() }); trace.tbk.stack_const.push(false); } else { - trace.ops.push(Op::Const(Rc::clone(f))); + trace.ops.push(Op::Const(f.clone())); trace.tbk.stack_const.push(true); } } } - fn trace_drop(&mut self, pop: bool) { + fn trace_constant(&mut self, c: &Form) { if let Some(trace) = &mut self.tracing { - trace.ops.push(Op::Drop); - if pop { - trace.tbk.stack_const.pop().unwrap(); - } - } - } - - fn trace_constant(&mut self, c: &Rc) { - if let Some(trace) = &mut self.tracing { - trace.ops.push(Op::Const(Rc::clone(c))); + trace.ops.push(Op::Const(c.clone())); trace.tbk.stack_const.push(true); } } - fn trace_lambda(&mut self, _params: &[String], _e: &Rc, _body: &Rc) { + fn trace_lambda(&mut self, _params: &[String], _e: &Form, _body: &Form) { if let Some(_trace) = &mut self.tracing { // TODO // kinda both also unimplemented!("trace lambda"); } } - // returns f, e, c for interp fn execute_trace_if_exists(&mut self, id: ID, - e: &Rc, - tmp_stack: &mut Vec>, - ret_stack: &mut Vec<(Rc, Rc, Option)>) -> Result, Rc, Cont)>> { + e: &Form, + tmp_stack: &mut Cvec, + ret_stack: &mut Cvec<(Form, Crc, Option)>) -> Result> { + Ok(None) + /* if self.trace_running() { println!("Not playing back trace because recording trace"); return Ok(None); // can't trace while running a trace for now (we don't inline now anyway), @@ -521,7 +943,7 @@ impl Ctx { } if let Some(mut trace) = self.traces.get(&id) { println!("Starting trace playback"); - let mut e = Rc::clone(e); + let mut e = Crc::clone(e); loop { println!("Running trace {trace}, \n\ttmp_stack:{tmp_stack:?}"); for b in trace.ops.iter() { @@ -541,7 +963,7 @@ impl Ctx { assert!(self.tracing.is_none()); let mut ntrace = Trace::follow_on(*side_id,tbk.clone()); if let Some(side_val) = side_val { - *tmp_stack.last_mut().unwrap() = Rc::clone(side_val); + *tmp_stack.last_mut().unwrap() = Crc::clone(side_val); *ntrace.tbk.stack_const.last_mut().unwrap() = false; // this might be able to be // more precise, actually } @@ -560,7 +982,7 @@ impl Ctx { } Op::Const ( con ) => { println!("Const(op) {con}"); - tmp_stack.push(Rc::clone(con)); + tmp_stack.push(Crc::clone(con)); } Op::Drop => { println!("Drop(op) {}", tmp_stack.last().unwrap()); @@ -581,25 +1003,25 @@ impl Ctx { println!("Call(op)"); if let Some(static_call_id) = statik { if let Some(new_trace) = self.traces.get(static_call_id) { - ret_stack.push((Rc::clone(&e), (*nc).clone(), Some(*nc_id))); + ret_stack.push((Crc::clone(&e), (*nc).clone(), Some(*nc_id))); println!("\tchaining to call trace b/c Call with statik"); trace = new_trace; break; // break out of this trace and let infinate loop spin } } - match &*Rc::clone(&tmp_stack[tmp_stack.len()-*len]) { + match &*Crc::clone(&tmp_stack[tmp_stack.len()-*len]) { Form::Closure(ps, ie, b, call_id) => { if ps.len() != *len-1 { bail!("arguments length doesn't match"); } - ret_stack.push((Rc::clone(&e), (*nc).clone(), Some(*nc_id))); + ret_stack.push((Crc::clone(&e), (*nc).clone(), Some(*nc_id))); if let Some(new_trace) = self.traces.get(call_id) { println!("\tchaining to call trace b/c Call with dyamic but traced"); - e = Rc::clone(ie); + e = Crc::clone(ie); trace = new_trace; break; // break out of this trace and let infinate loop spin } else { - return Ok(Some((Rc::clone(&b), Rc::clone(ie), Cont::Frame { syms: ps.clone(), id: *call_id, c: Rc::new(Cont::Eval { c: Rc::new(Cont::Ret { id: *call_id }) }) }))); + return Ok(Some((Crc::clone(&b), Crc::clone(ie), Cont::Frame { syms: ps.clone(), id: *call_id, c: Crc::new(Cont::Eval { c: Crc::new(Cont::Ret { id: *call_id }) }) }))); } }, Form::Prim(p) => { @@ -649,34 +1071,18 @@ impl Ctx { } else { Ok(None) } - } -} -#[derive(Clone,Debug)] -enum Cont { - MetaRet, - Ret { id: ID, }, - Eval { c: Rc }, - Prim { s: &'static str, to_go: Rc, c: Rc }, - Call { n: usize, to_go: Rc, c: Rc }, - Frame { syms: Vec, id: ID, c: Rc }, -} -impl Cont { - fn is_ret(&self) -> bool { - match self { - Cont::Ret { id: _ } => true, - _ => false, - } + */ } } -pub fn eval(f: Rc) -> Result> { +pub fn eval(f: Form) -> Result { let mut ctx = Ctx::new(); let mut f = f; let mut e = Form::root_env(); - let mut c = Cont::Eval { c: Rc::new(Cont::MetaRet) }; + let mut c = Cont::Eval { c: Crc::new(Cont::MetaRet) }; - let mut ret_stack: Vec<(Rc, Rc, Option)> = vec![]; - let mut tmp_stack: Vec> = vec![]; + let mut ret_stack: Cvec<(Form, Crc, Option)> = Cvec::new(); + let mut tmp_stack: Cvec = Cvec::new(); loop { match c { @@ -691,13 +1097,13 @@ pub fn eval(f: Rc) -> Result> { let thn = to_go.car()?; let els = to_go.cdr()?.car()?; if f.truthy() { - ctx.trace_guard(true, || (Some(Rc::clone(&els)), Rc::new(Cont::Eval { c: Rc::clone(&nc) }))); + ctx.trace_guard(true, || (Some(els.clone()), Crc::new(Cont::Eval { c: Crc::clone(&nc) }))); ctx.trace_drop(true); - f = thn; + f = thn.clone(); } else { - ctx.trace_guard(false, ||(Some(Rc::clone(&thn)), Rc::new(Cont::Eval { c: Rc::clone(&nc) }))); + ctx.trace_guard(false, ||(Some(thn.clone()), Crc::new(Cont::Eval { c: Crc::clone(&nc) }))); ctx.trace_drop(true); - f = els; + f = els.clone(); } c = Cont::Eval { c: nc }; }, @@ -706,10 +1112,10 @@ pub fn eval(f: Rc) -> Result> { if !f.truthy() { ctx.trace_guard(false, || (None, nc.clone())); ctx.trace_drop(true); - f = other; + f = other.clone(); c = Cont::Eval { c: nc }; } else { - ctx.trace_guard(true, || (Some(Rc::clone(&other)), Rc::new(Cont::Eval { c: Rc::clone(&nc) }))); + ctx.trace_guard(true, || (Some(other.clone()), Crc::new(Cont::Eval { c: Crc::clone(&nc) }))); c = (*nc).clone(); } }, @@ -718,10 +1124,10 @@ pub fn eval(f: Rc) -> Result> { if f.truthy() { ctx.trace_guard(true, || (None, nc.clone())); ctx.trace_drop(true); - f = other; + f = other.clone(); c = Cont::Eval { c: nc }; } else { - ctx.trace_guard(false, || (Some(Rc::clone(&other)), Rc::new(Cont::Eval { c: Rc::clone(&nc) }))); + ctx.trace_guard(false, || (Some(other.clone()), Crc::new(Cont::Eval { c: Crc::clone(&nc) }))); c = (*nc).clone(); } }, @@ -730,8 +1136,8 @@ pub fn eval(f: Rc) -> Result> { c = (*nc).clone(); } else { ctx.trace_drop(true); - f = to_go.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "begin", to_go: to_go.cdr()?, c: nc }) }; + f = to_go.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "begin", to_go: to_go.cdr()?.clone(), c: nc }) }; } }, "debug" => { @@ -740,9 +1146,9 @@ pub fn eval(f: Rc) -> Result> { c = (*nc).clone(); }, "define" => { - let sym = to_go.sym()?.to_string(); + let sym = to_go.sym()?; ctx.trace_define(&sym, true); - e = e.define(sym, Rc::clone(&f)); + e = e.define(sym, f.clone()); c = (*nc).clone(); }, _ => { @@ -772,12 +1178,22 @@ pub fn eval(f: Rc) -> Result> { tmp_stack.push(f); if to_go.is_nil() { let resume_data = ctx.trace_call(n, &mut tmp_stack, &nc); - match &*Rc::clone(&tmp_stack[tmp_stack.len()-n]) { - Form::Closure(ps, ie, b, id) => { + let func = tmp_stack[tmp_stack.len()-n].clone(); + match func.data as usize & TAG_MASK { + TAG_PRIM => { + let p = func.prim().unwrap(); + let b = tmp_stack.pop().unwrap(); + let a = if n == 2 { None } else { assert!(n == 3); Some(tmp_stack.pop().unwrap()) }; + f = eval_prim(p, b, a)?; + tmp_stack.pop().unwrap(); // for the prim itself + c = (*nc).clone(); + }, + TAG_CLOSURE => { + let Closure { params: ps, e: ie, body: b, id, } = func.closure().unwrap(); if ps.len() != n-1 { bail!("arguments length doesn't match"); } - ret_stack.push((Rc::clone(&e), nc, resume_data)); + ret_stack.push((e.clone(), nc, resume_data)); if let Some((fp, ep, cp)) = ctx.execute_trace_if_exists(*id, ie, &mut tmp_stack, &mut ret_stack)? { f = fp; e = ep; @@ -785,96 +1201,104 @@ pub fn eval(f: Rc) -> Result> { println!("After executing trace, f={f}, tmp_stack is {tmp_stack:?}"); } else { println!("replacing {e} with {ie}"); - e = Rc::clone(ie); - c = Cont::Frame { syms: ps.clone(), id: *id, c: Rc::new(Cont::Eval { c: Rc::new(Cont::Ret { id: *id }) }) }; - f = Rc::clone(&b); + e = ie.clone(); + c = Cont::Frame { syms: ps.clone(), id: *id, c: Crc::new(Cont::Eval { c: Crc::new(Cont::Ret { id: *id }) }) }; + f = b.clone(); } }, - Form::Prim(p) => { - let b = tmp_stack.pop().unwrap(); - let a = if n == 2 { None } else { assert!(n == 3); Some(tmp_stack.pop().unwrap()) }; - f = eval_prim(*p, b, a)?; - tmp_stack.pop().unwrap(); // for the prim itself - c = (*nc).clone(); - }, ncomb => { println!("Current stack is {tmp_stack:?}"); bail!("tried to call a non-comb {ncomb}") }, } } else { - f = to_go.car()?; - c = Cont::Eval { c: Rc::new(Cont::Call { n: n+1, to_go: to_go.cdr()?, c: nc }) }; + f = to_go.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Call { n: n+1, to_go: to_go.cdr()?.clone(), c: nc }) }; } } Cont::Frame { syms, id, c: nc } => { ctx.trace_frame(&syms, id); for s in syms.into_iter().rev() { - e = e.define(s, tmp_stack.pop().unwrap()); + e = e.define(&s, tmp_stack.pop().unwrap()); } tmp_stack.pop().unwrap(); // for the func value c = (*nc).clone(); } Cont::Eval { c: nc } => { let tmp = f; - match &*tmp { - Form::Symbol(s) => { - f = e.lookup(s)?; + match tmp.data as usize & TAG_MASK { + TAG_SYMBOL => { + let s = tmp.sym().unwrap(); + f = e.lookup(s)?.clone(); ctx.trace_lookup(s, &f); c = (*nc).clone(); }, - Form::Pair(car, cdr) => { - match &**car { - Form::Symbol(s) if s == "if" => { - f = cdr.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "if", to_go: cdr.cdr()?, c: nc }) }; - } - // and/or has to short-circut, so special form - // just like Scheme (bad ;) ) - Form::Symbol(s) if s == "or" => { - f = cdr.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "or", to_go: cdr.cdr()?, c: nc }) }; - } - Form::Symbol(s) if s == "and" => { - f = cdr.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "and", to_go: cdr.cdr()?, c: nc }) }; - } - Form::Symbol(s) if s == "begin" => { - f = cdr.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "begin", to_go: cdr.cdr()?, c: nc }) }; - } - Form::Symbol(s) if s == "debug" => { - f = cdr.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "debug", to_go: cdr.cdr()?, c: nc }) }; - } - Form::Symbol(s) if s == "define" => { - // note the swap, evaluating the second not the first (define a value..) - f = cdr.cdr()?.car()?; - c = Cont::Eval { c: Rc::new(Cont::Prim { s: "define", to_go: cdr.car()?, c: nc }) }; - } - Form::Symbol(s) if s == "quote" => { - f = cdr.car()?; - ctx.trace_constant(&f); - c = (*nc).clone(); - } - // (lambda (a b) body) - Form::Symbol(s) if s == "lambda" => { - let mut params_vec = vec![]; - let mut params = cdr.car()?; - while let Ok((ncar, ncdr)) = params.pair() { - params_vec.push(ncar.sym()?.to_string()); - params = ncdr; + TAG_PAIR => { + let (car, cdr) = tmp.pair().unwrap(); + match car.data as usize & TAG_MASK { + TAG_SYMBOL => { + let s = car.sym().unwrap(); + match s { + "if" => { + f = cdr.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "if", to_go: cdr.cdr()?.clone(), c: nc }) }; + continue; + } + // and/or has to short-circut, so special form + // just like Scheme (bad ;) ) + "or" => { + f = cdr.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "or", to_go: cdr.cdr()?.clone(), c: nc }) }; + continue; + } + "and" => { + f = cdr.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "and", to_go: cdr.cdr()?.clone(), c: nc }) }; + continue; + } + "begin" => { + f = cdr.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "begin", to_go: cdr.cdr()?.clone(), c: nc }) }; + continue; + } + "debug" => { + f = cdr.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "debug", to_go: cdr.cdr()?.clone(), c: nc }) }; + continue; + } + "define" => { + // note the swap, evaluating the second not the first (define a value..) + f = cdr.cdr()?.car()?.clone(); + c = Cont::Eval { c: Crc::new(Cont::Prim { s: "define", to_go: cdr.car()?.clone(), c: nc }) }; + continue; + } + "quote" => { + f = cdr.car()?.clone(); + ctx.trace_constant(&f); + c = (*nc).clone(); + continue; + } + // (lambda (a b) body) + "lambda" => { + let mut params_vec = Cvec::new(); + let mut params = cdr.car()?; + while let Ok((ncar, ncdr)) = params.pair() { + params_vec.push(ncar.sym()?.to_string()); + params = ncdr; + } + let body = cdr.cdr()?.car()?; + ctx.trace_lambda(¶ms_vec, &e, &body); + f = Form::new_closure(params_vec, e.clone(), body.clone(), &mut ctx); + c = (*nc).clone(); + continue; + } + _ => { /* fallthrough */ } } - let body = cdr.cdr()?.car()?; - ctx.trace_lambda(¶ms_vec, &e, &body); - f = Form::new_closure(params_vec, Rc::clone(&e), body, &mut ctx); - c = (*nc).clone(); - } - _ => { - f = Rc::clone(car); - c = Cont::Eval { c: Rc::new(Cont::Call { n: 1, to_go: Rc::clone(cdr), c: nc }) }; } + _ => { /* fallthrough */ } } + f = car.clone(); + c = Cont::Eval { c: Crc::new(Cont::Call { n: 1, to_go: cdr.clone(), c: nc }) }; }, _ => { // value, no eval @@ -887,60 +1311,3 @@ pub fn eval(f: Rc) -> Result> { } } } - -impl From for Form { fn from(item: String) -> Self { Form::Symbol(item) } } -impl From<&str> for Form { fn from(item: &str) -> Self { Form::Symbol(item.to_owned()) } } -impl From for Form { fn from(item: i32) -> Self { Form::Int(item) } } -impl From for Form { fn from(item: bool) -> Self { Form::Bool(item) } } -impl, B: Into> From<(A, B)> for Form { - fn from(item: (A, B)) -> Self { - Form::Pair(Rc::new(item.0.into()), Rc::new(item.1.into())) - } -} - -impl fmt::Display for Form { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Form::Nil => write!(f, "nil"), - Form::Int(i) => write!(f, "{i}"), - Form::Bool(b) => write!(f, "{b}"), - Form::Symbol(s) => write!(f, "'{s}"), - Form::Pair(car, cdr) => { - write!(f, "({}", car)?; - let mut traverse: Rc = Rc::clone(cdr); - loop { - match &*traverse { - Form::Pair(ref carp, ref cdrp) => { - write!(f, " {}", carp)?; - traverse = Rc::clone(cdrp); - }, - Form::Nil => { - write!(f, ")")?; - return Ok(()); - }, - x => { - write!(f, ". {x})")?; - return Ok(()); - }, - } - } - }, - Form::Closure(params, _inner_env, _code, id) => { - write!(f, "", id, params) - } - Form::Prim(p) => { - match p { - Prim::Add => write!(f, "+"), - Prim::Sub => write!(f, "-"), - Prim::Mul => write!(f, "*"), - Prim::Div => write!(f, "/"), - Prim::Mod => write!(f, "%"), - Prim::Cons => write!(f, "cons"), - Prim::Car => write!(f, "car"), - Prim::Cdr => write!(f, "cdr"), - Prim::Eq => write!(f, "="), - } - } - } - } -} diff --git a/slj/src/main.rs b/slj/src/main.rs index b6f072a..26a8d13 100644 --- a/slj/src/main.rs +++ b/slj/src/main.rs @@ -1,386 +1,11 @@ #[macro_use] extern crate lalrpop_util; lalrpop_mod!(pub grammar); -use std::fmt; -use std::collections::BTreeMap; -use std::sync::Mutex; -use std::marker::PhantomData; -use std::ops::Deref; -use std::ptr::{self, NonNull}; -use std::mem::{self, ManuallyDrop}; -use std::alloc::{self, Layout}; -use std::cell::Cell; -//use std::rc::Rc; - -use once_cell::sync::Lazy; use anyhow::Result; -use sl::eval; +use sl::{eval,Form,Crc,Cvec,Prim,ID}; -#[repr(C)] -pub struct Vec { - ptr: NonNull, - cap: usize, - len: usize, -} -unsafe impl Send for Vec {} -unsafe impl Sync for Vec {} -impl Vec { - pub fn new() -> Self { - assert!(mem::size_of::() != 0, "no ZST"); - Vec { - ptr: NonNull::dangling(), - len: 0, - cap: 0, - } - } - fn grow(&mut self) { - let (new_cap, new_layout) = if self.cap == 0 { - (1, Layout::array::(1).unwrap()) - } else { - let new_cap = 2 * self.cap; - let new_layout = Layout::array::(new_cap).unwrap(); - (new_cap, new_layout) - }; - assert!(new_layout.size() <= isize::MAX as usize, "allocation too large"); - let new_ptr = if self.cap == 0 { - unsafe { alloc::alloc(new_layout) } - } else { - let old_layout = Layout::array::(self.cap).unwrap(); - let old_ptr = self.ptr.as_ptr() as *mut u8; - unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } - }; - self.ptr = match NonNull::new(new_ptr as *mut T) { - Some(p) => p, - None => alloc::handle_alloc_error(new_layout), - }; - self.cap = new_cap; - } - pub fn push(&mut self, elem: T) { - if self.len == self.cap { self.grow(); } - unsafe { - ptr::write(self.ptr.as_ptr().add(self.len), elem); - } - self.len += 1; - } - pub fn pop(&mut self) -> Option { - if self.len == 0 { - None - } else { - self.len -= 1; - unsafe { - Some(ptr::read(self.ptr.as_ptr().add(self.len))) - } - } - } -} -impl Drop for Vec { - fn drop(&mut self) { - if self.cap != 0 { - while let Some(_) = self.pop() {} - let layout = Layout::array::(self.cap).unwrap(); - unsafe { - alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); - } - } - } -} -impl Deref for Vec { - type Target = [T]; - fn deref(&self) -> &[T] { - unsafe { - std::slice::from_raw_parts(self.ptr.as_ptr(), self.len) - } - } -} -// insert, remove, into_iter, and drain all missing -impl fmt::Display for Vec { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[")?; - for x in self.iter() { - write!(f, " {}", x)?; - } - write!(f, " ]")?; - Ok(()) - } -} - - -#[repr(C)] -pub struct Rc { - ptr: NonNull>, - phantom: PhantomData> -} -#[repr(C)] -pub struct RcInner { - rc: Cell, - data: T, -} -impl Rc { - pub fn new(data: T) -> Rc { - let boxed = Box::new(RcInner { rc: Cell::new(1), data }); - Rc { - ptr: NonNull::new(Box::into_raw(boxed)).unwrap(), - phantom: PhantomData, - } - } - pub fn into_ptr(self) -> *mut RcInner { - ManuallyDrop::new(self).ptr.as_ptr() as *mut RcInner - } - pub fn from_ptr(ptr: *mut RcInner) -> Self { - Rc { - ptr: NonNull::new(ptr).unwrap(), - phantom: PhantomData, - } - } -} -unsafe impl Send for Rc {} -unsafe impl Sync for Rc {} -impl Deref for Rc { - type Target = T; - fn deref(&self) -> &T { - let inner = unsafe { self.ptr.as_ref() }; - &inner.data - } -} -impl Clone for Rc { - fn clone(&self) -> Rc { - let inner = unsafe { self.ptr.as_ref() }; - let old = inner.rc.get(); - inner.rc.set(old + 1); - if old > isize::MAX as usize { - std::process::abort(); - } - Self { - ptr: self.ptr, - phantom: PhantomData, - } - } -} -impl Drop for Rc { - fn drop(&mut self) { - let inner = unsafe { self.ptr.as_mut() }; - let old = inner.rc.get(); - inner.rc.set(old - 1); - if old != 1 { - return; - } - unsafe { drop(Box::from_raw(self.ptr.as_ptr())); } - } -} - -#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] -#[repr(transparent)] -pub struct ID { - id: i64 -} -impl fmt::Display for ID { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.id) - } -} - -#[repr(C)] -struct Closure { - params: Vec, - e: Form, - body: Form, - id: ID, -} - -#[repr(C)] -struct Form { - data: *const Form, - phantom: PhantomData -} -#[repr(C)] -struct FormPair { - car: Form, - cdr: Form, -} -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -#[repr(usize)] -pub enum Prim { - Add, - Sub, - Mul, - Div, - Mod, - Eq, - Cons, - Car, - Cdr, -} -impl Prim { - fn two_params(self) -> bool { - match self { - Prim::Car | Prim::Cdr => false, - _ => true, - } - } -} -/* - * this better be a 64 bit platform - * huh, if we only support i32s, then we have a lot more room for tags - * 8 byte alignment gets us 3 bits, or uh 8 options - * we'll choose 000 for ints to make math easy - * - * 000 - Int - * 001 - Nil - * 010 - Bool(false) // this is needlessly wasteful of the bits but hay - should take one of them over as a String probs - * 011 - Bool(true) - * 100 - Symbol - will want to convert into an Rc around a StringRawParts struct - * 101 - Pair - an Rc around a Pair struct - * 110 - Closure- eek: Closure(Vec, Rc, Rc, ID), - * xxxx 111 - Prim (xxxx for which one) - * - * I don't actually think we need our own repr(C) Vec implementation, at least not for now - we can - * make do with a VecRawParts struct (without implementations) - * Hay I did it anyway - * - * in both cases, StringRawParts and VecRawParts, we can rebuild slices from the raw parts for - * read-only access, which is all we need (until Drop, at which point we should re-constitute them - * from their raw parts, which is stable) - * - * For symbols, it would actually make sense to create the String, then leak it so it lasts for the - * program, then deduplicate to it and pass the static const slice around - * Could even fit entirely in the Form if the max length of a symbol is 2^16 - */ -const TAG_OFFSET: usize = 3; - -const SYM_LEN_OFFSET: usize = 3; -const SYM_LEN_MASK: usize = 0xFF; // could be bigger -const SYM_PTR_OFFSET: usize = 11; - -const TAG_MASK: usize = 0b111; -const TAG_INT: usize = 0b000; -const TAG_NIL: usize = 0b001; -const TAG_BOOL_FALSE: usize = 0b010; -const TAG_BOOL_TRUE: usize = 0b011; -const TAG_SYMBOL: usize = 0b100; -const TAG_PAIR: usize = 0b101; -const TAG_CLOSURE: usize = 0b110; -const TAG_PRIM: usize = 0b111; - - -static SYMBOLS: Lazy>> = Lazy::new(Mutex::default); - -impl Form { - fn new_int(x: isize) -> Self { - Self { data: (x << TAG_OFFSET) as *const Form, phantom: PhantomData } - } - fn new_nil() -> Self { - Self { data: TAG_NIL as *const Form, phantom: PhantomData } - } - fn new_bool(b: bool) -> Self { - Self { data: (if b { TAG_BOOL_TRUE } else { TAG_BOOL_FALSE }) as *const Form, phantom: PhantomData } - } - fn new_pair(car: Form, cdr: Form) -> Self { - let p = Rc::new(FormPair { car, cdr }).into_ptr() as usize; - assert!(p & TAG_MASK == 0); - Self { data: (p | TAG_PAIR) as *const Form, phantom: PhantomData } - } - fn new_closure(params: Vec, e: Form, body: Form, id: ID) -> Self { - let p = Rc::new(Closure { params, e, body, id }).into_ptr() as usize; - assert!(p & TAG_MASK == 0); - Self { data: (p | TAG_CLOSURE) as *const Form, phantom: PhantomData } - } - fn new_prim(p: Prim) -> Self { - Self { data: (((p as usize) << TAG_OFFSET) | TAG_PRIM) as *const Form, phantom: PhantomData } - } - fn new_symbol(s: &str) -> Form { - assert!(s.len() < SYM_LEN_MASK); - let mut symbols = SYMBOLS.lock().unwrap(); - let ds = if let Some(ds) = symbols.get(s) { - ds - } else { - // here we leak the memory of a new owned copy of s, - // and then transmute it into an &'static str that we keep in our global - // map for deduplication. Spicy stuff. - let mut value = ManuallyDrop::new(s.to_owned()); - value.shrink_to_fit(); - let slice = unsafe { std::mem::transmute(value.as_str()) }; - symbols.insert(s.to_owned(), slice); - slice - }; - Self { data: (((ds.as_ptr() as usize) << SYM_PTR_OFFSET) | (ds.len() << SYM_LEN_OFFSET) | TAG_SYMBOL) as *const Form, phantom: PhantomData } - } - - fn car(&self) -> &Form { - assert!(self.data as usize & TAG_MASK == TAG_PAIR); - unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data.car } - } - fn cdr(&self) -> &Form { - assert!(self.data as usize & TAG_MASK == TAG_PAIR); - unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data.cdr } - } - fn closure(&self) -> &Closure { - assert!(self.data as usize & TAG_MASK == TAG_CLOSURE); - unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data } - } - fn prim(&self) -> Prim { - assert!(self.data as usize & TAG_MASK == TAG_PRIM); - unsafe { *(&((self.data as usize) >> TAG_OFFSET) as *const usize as *const Prim) } - } - fn str(&self) -> &str { - assert!(self.data as usize & TAG_MASK == TAG_SYMBOL); - let len = ((self.data as usize) >> SYM_LEN_OFFSET) & SYM_LEN_OFFSET; - let ptr = ((self.data as usize) >> SYM_PTR_OFFSET) as *const u8; - std::str::from_utf8(unsafe { std::slice::from_raw_parts(ptr, len) }).unwrap() - } -} -impl Drop for Form { - fn drop(&mut self) { - match self.data as usize & TAG_MASK { - TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { println!("dropping simple {self}"); }, // doing nothing for symbol is fine - // since it's deduplicated - TAG_PAIR => { - let _ = Rc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner ); - }, - TAG_CLOSURE => { - let _ = Rc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner ); - }, - _ => unreachable!(), - } - } -} -impl fmt::Display for Form { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.data as usize & TAG_MASK { - TAG_INT => { - write!(f, "{}", self.data as isize >> 3)?; - }, - TAG_NIL => { - write!(f, "nil")?; - }, - TAG_BOOL_FALSE => { - write!(f, "false")?; - }, - TAG_BOOL_TRUE => { - write!(f, "true")?; - }, - TAG_PAIR => { - write!(f, "({} . {}", self.car(), self.cdr())?; - }, - TAG_PRIM => { - write!(f, "{:?}", self.prim())?; - }, - TAG_SYMBOL => { - write!(f, "{}", self.str())?; - }, - TAG_CLOSURE => { - let Closure { params, e, body, id, } = self.closure(); - write!(f, "<{params} {e} {body} {id}>")?; - }, - _ => unreachable!(), - } - Ok(()) - } -} - - - -fn alias(a: Rc, b: Rc) { +fn alias(a: Crc, b: Crc) { println!("a: {}, b: {}", *a, *b); } @@ -388,11 +13,14 @@ fn main() -> Result<()> { // our Form shennigins will only work on 64 bit platforms assert!(std::mem::size_of::() == 8); - let x = Rc::new(1); - alias(Rc::clone(&x), x); - let rc_u64_size = std::mem::size_of::>(); + let x = Crc::new(1); + alias(Crc::clone(&x), x); + let rc_u64_size = std::mem::size_of::>(); assert!(rc_u64_size == 8); - println!("for our Rc, we have size {}", rc_u64_size); + println!("for our Crc, we have size {}", rc_u64_size); + + let begn = Form::new_symbol("begin"); + println!("this should be begin {begn}"); let i = Form::new_int(23); let n = Form::new_nil(); @@ -407,14 +35,13 @@ fn main() -> Result<()> { let s = Form::new_symbol("woopwpp"); - let mut params = Vec::new(); + let mut params = Cvec::new(); params.push("a".to_owned()); params.push("b".to_owned()); - let c = Form::new_closure(params, Form::new_nil(), Form::new_nil(), ID { id: 9 }); - println!("{i} {n} {bf} {bt} {p} {pra} {pre} {s} {c}"); + println!("{i} {n} {bf} {bt} {p} {pra} {pre} {s}"); - let mut my_vec: Vec = Vec::new(); + let mut my_vec: Cvec = Cvec::new(); my_vec.push(i); my_vec.push(n); my_vec.push(bf); @@ -423,7 +50,7 @@ fn main() -> Result<()> { my_vec.push(pra); my_vec.push(pre); my_vec.push(s); - my_vec.push(c); + my_vec.push(begn); println!(" from vec {}", my_vec[3]); @@ -434,7 +61,6 @@ fn main() -> Result<()> { - /* let input = " (begin (debug 1) @@ -490,11 +116,11 @@ fn main() -> Result<()> { (or false false ) ) "; - let parsed_input = Rc::new(grammar::TermParser::new().parse(input)?); + let parsed_input = grammar::TermParser::new().parse(input)?; //println!("Hello, world: {parsed_input:?}"); println!("Hello, world: {parsed_input}"); - let evaled = eval(Rc::clone(&parsed_input))?; + println!("Yep that was all?"); + let evaled = eval(parsed_input.clone())?; println!("evaled: {evaled}"); - */ Ok(()) }