From 0696ad55941ffd2a0f206a9b198fd054bb36b426 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sun, 28 Jan 2024 21:51:01 -0500 Subject: [PATCH] Finished-ish writing the new JIT-friendly, packed Form, this time with extra repr(C) Vec implementation --- slj/src/lib.rs | 1 + slj/src/main.rs | 308 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 273 insertions(+), 36 deletions(-) diff --git a/slj/src/lib.rs b/slj/src/lib.rs index 9bea60e..0d4d4d5 100644 --- a/slj/src/lib.rs +++ b/slj/src/lib.rs @@ -16,6 +16,7 @@ use anyhow::{anyhow,bail,Result}; // rustonomicon #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] +#[repr(transparent)] pub struct ID { id: i64 } diff --git a/slj/src/main.rs b/slj/src/main.rs index 1c27f6d..b6f072a 100644 --- a/slj/src/main.rs +++ b/slj/src/main.rs @@ -1,18 +1,110 @@ #[macro_use] extern crate lalrpop_util; lalrpop_mod!(pub grammar); -use anyhow::Result; +use std::fmt; +use std::collections::BTreeMap; +use std::sync::Mutex; +use std::marker::PhantomData; +use std::ops::Deref; +use std::ptr::{self, NonNull}; +use std::mem::{self, ManuallyDrop}; +use std::alloc::{self, Layout}; use std::cell::Cell; //use std::rc::Rc; +use once_cell::sync::Lazy; +use anyhow::Result; + use sl::eval; -use std::marker::PhantomData; -use std::ops::Deref; -use std::ptr::NonNull; -use std::mem::ManuallyDrop; +#[repr(C)] +pub struct Vec { + ptr: NonNull, + cap: usize, + len: usize, +} +unsafe impl Send for Vec {} +unsafe impl Sync for Vec {} +impl Vec { + pub fn new() -> Self { + assert!(mem::size_of::() != 0, "no ZST"); + Vec { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + } + } + fn grow(&mut self) { + let (new_cap, new_layout) = if self.cap == 0 { + (1, Layout::array::(1).unwrap()) + } else { + let new_cap = 2 * self.cap; + let new_layout = Layout::array::(new_cap).unwrap(); + (new_cap, new_layout) + }; + assert!(new_layout.size() <= isize::MAX as usize, "allocation too large"); + let new_ptr = if self.cap == 0 { + unsafe { alloc::alloc(new_layout) } + } else { + let old_layout = Layout::array::(self.cap).unwrap(); + let old_ptr = self.ptr.as_ptr() as *mut u8; + unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } + }; + self.ptr = match NonNull::new(new_ptr as *mut T) { + Some(p) => p, + None => alloc::handle_alloc_error(new_layout), + }; + self.cap = new_cap; + } + pub fn push(&mut self, elem: T) { + if self.len == self.cap { self.grow(); } + unsafe { + ptr::write(self.ptr.as_ptr().add(self.len), elem); + } + self.len += 1; + } + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.len -= 1; + unsafe { + Some(ptr::read(self.ptr.as_ptr().add(self.len))) + } + } + } +} +impl Drop for Vec { + fn drop(&mut self) { + if self.cap != 0 { + while let Some(_) = self.pop() {} + let layout = Layout::array::(self.cap).unwrap(); + unsafe { + alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); + } + } + } +} +impl Deref for Vec { + type Target = [T]; + fn deref(&self) -> &[T] { + unsafe { + std::slice::from_raw_parts(self.ptr.as_ptr(), self.len) + } + } +} +// insert, remove, into_iter, and drain all missing +impl fmt::Display for Vec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for x in self.iter() { + write!(f, " {}", x)?; + } + write!(f, " ]")?; + Ok(()) + } +} -use std::fmt; #[repr(C)] pub struct Rc { @@ -77,18 +169,25 @@ impl Drop for Rc { } } - -/* -pub enum Form { - Nil, - Int(i32), - Bool(bool), - Symbol(String), - Pair(Rc
, Rc), - Closure(Vec, Rc, Rc, ID), - Prim(Prim), +#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] +#[repr(transparent)] +pub struct ID { + id: i64 } -*/ +impl fmt::Display for ID { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.id) + } +} + +#[repr(C)] +struct Closure { + params: Vec, + e: Form, + body: Form, + id: ID, +} + #[repr(C)] struct Form { data: *const Form, @@ -99,6 +198,27 @@ struct FormPair { car: Form, cdr: Form, } +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[repr(usize)] +pub enum Prim { + Add, + Sub, + Mul, + Div, + Mod, + Eq, + Cons, + Car, + Cdr, +} +impl Prim { + fn two_params(self) -> bool { + match self { + Prim::Car | Prim::Cdr => false, + _ => true, + } + } +} /* * this better be a 64 bit platform * huh, if we only support i32s, then we have a lot more room for tags @@ -107,7 +227,7 @@ struct FormPair { * * 000 - Int * 001 - Nil - * 010 - Bool(false) // this is needlessly wasteful of the bits but hay + * 010 - Bool(false) // this is needlessly wasteful of the bits but hay - should take one of them over as a String probs * 011 - Bool(true) * 100 - Symbol - will want to convert into an Rc around a StringRawParts struct * 101 - Pair - an Rc around a Pair struct @@ -116,34 +236,109 @@ struct FormPair { * * I don't actually think we need our own repr(C) Vec implementation, at least not for now - we can * make do with a VecRawParts struct (without implementations) + * Hay I did it anyway * * in both cases, StringRawParts and VecRawParts, we can rebuild slices from the raw parts for * read-only access, which is all we need (until Drop, at which point we should re-constitute them * from their raw parts, which is stable) + * + * For symbols, it would actually make sense to create the String, then leak it so it lasts for the + * program, then deduplicate to it and pass the static const slice around + * Could even fit entirely in the Form if the max length of a symbol is 2^16 */ +const TAG_OFFSET: usize = 3; + +const SYM_LEN_OFFSET: usize = 3; +const SYM_LEN_MASK: usize = 0xFF; // could be bigger +const SYM_PTR_OFFSET: usize = 11; + +const TAG_MASK: usize = 0b111; +const TAG_INT: usize = 0b000; +const TAG_NIL: usize = 0b001; +const TAG_BOOL_FALSE: usize = 0b010; +const TAG_BOOL_TRUE: usize = 0b011; +const TAG_SYMBOL: usize = 0b100; +const TAG_PAIR: usize = 0b101; +const TAG_CLOSURE: usize = 0b110; +const TAG_PRIM: usize = 0b111; + + +static SYMBOLS: Lazy>> = Lazy::new(Mutex::default); + impl Form { fn new_int(x: isize) -> Self { - Self { data: (x << 3) as *const Form, phantom: PhantomData } + Self { data: (x << TAG_OFFSET) as *const Form, phantom: PhantomData } } fn new_nil() -> Self { - Self { data: 0b001 as *const Form, phantom: PhantomData } + Self { data: TAG_NIL as *const Form, phantom: PhantomData } } fn new_bool(b: bool) -> Self { - Self { data: (if b { 0b011 } else { 0b010 }) as *const Form, phantom: PhantomData } + Self { data: (if b { TAG_BOOL_TRUE } else { TAG_BOOL_FALSE }) as *const Form, phantom: PhantomData } } fn new_pair(car: Form, cdr: Form) -> Self { let p = Rc::new(FormPair { car, cdr }).into_ptr() as usize; - assert!(p & 0b111 == 0); - Self { data: (p | 0b101) as *const Form, phantom: PhantomData } + assert!(p & TAG_MASK == 0); + Self { data: (p | TAG_PAIR) as *const Form, phantom: PhantomData } + } + fn new_closure(params: Vec, e: Form, body: Form, id: ID) -> Self { + let p = Rc::new(Closure { params, e, body, id }).into_ptr() as usize; + assert!(p & TAG_MASK == 0); + Self { data: (p | TAG_CLOSURE) as *const Form, phantom: PhantomData } + } + fn new_prim(p: Prim) -> Self { + Self { data: (((p as usize) << TAG_OFFSET) | TAG_PRIM) as *const Form, phantom: PhantomData } + } + fn new_symbol(s: &str) -> Form { + assert!(s.len() < SYM_LEN_MASK); + let mut symbols = SYMBOLS.lock().unwrap(); + let ds = if let Some(ds) = symbols.get(s) { + ds + } else { + // here we leak the memory of a new owned copy of s, + // and then transmute it into an &'static str that we keep in our global + // map for deduplication. Spicy stuff. + let mut value = ManuallyDrop::new(s.to_owned()); + value.shrink_to_fit(); + let slice = unsafe { std::mem::transmute(value.as_str()) }; + symbols.insert(s.to_owned(), slice); + slice + }; + Self { data: (((ds.as_ptr() as usize) << SYM_PTR_OFFSET) | (ds.len() << SYM_LEN_OFFSET) | TAG_SYMBOL) as *const Form, phantom: PhantomData } + } + + fn car(&self) -> &Form { + assert!(self.data as usize & TAG_MASK == TAG_PAIR); + unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data.car } + } + fn cdr(&self) -> &Form { + assert!(self.data as usize & TAG_MASK == TAG_PAIR); + unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data.cdr } + } + fn closure(&self) -> &Closure { + assert!(self.data as usize & TAG_MASK == TAG_CLOSURE); + unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner)).data } + } + fn prim(&self) -> Prim { + assert!(self.data as usize & TAG_MASK == TAG_PRIM); + unsafe { *(&((self.data as usize) >> TAG_OFFSET) as *const usize as *const Prim) } + } + fn str(&self) -> &str { + assert!(self.data as usize & TAG_MASK == TAG_SYMBOL); + let len = ((self.data as usize) >> SYM_LEN_OFFSET) & SYM_LEN_OFFSET; + let ptr = ((self.data as usize) >> SYM_PTR_OFFSET) as *const u8; + std::str::from_utf8(unsafe { std::slice::from_raw_parts(ptr, len) }).unwrap() } } impl Drop for Form { fn drop(&mut self) { - match self.data as usize & 0b111 { - 0b000 | 0b001 | 0b010 | 0b011 => { println!("dropping simple {self}"); }, // int, nil, false, true - 0b101 => { - // pair - let _ = Rc::::from_ptr( (self.data as usize & !0b111) as *mut RcInner ); + match self.data as usize & TAG_MASK { + TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { println!("dropping simple {self}"); }, // doing nothing for symbol is fine + // since it's deduplicated + TAG_PAIR => { + let _ = Rc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner ); + }, + TAG_CLOSURE => { + let _ = Rc::::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner ); }, _ => unreachable!(), } @@ -151,21 +346,31 @@ impl Drop for Form { } impl fmt::Display for Form { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.data as usize & 0b111 { - 0b000 => { + match self.data as usize & TAG_MASK { + TAG_INT => { write!(f, "{}", self.data as isize >> 3)?; }, - 0b001 => { + TAG_NIL => { write!(f, "nil")?; }, - 0b010 => { + TAG_BOOL_FALSE => { write!(f, "false")?; }, - 0b011 => { + TAG_BOOL_TRUE => { write!(f, "true")?; }, - 0b101 => { - write!(f, "pair")?; + TAG_PAIR => { + write!(f, "({} . {}", self.car(), self.cdr())?; + }, + TAG_PRIM => { + write!(f, "{:?}", self.prim())?; + }, + TAG_SYMBOL => { + write!(f, "{}", self.str())?; + }, + TAG_CLOSURE => { + let Closure { params, e, body, id, } = self.closure(); + write!(f, "<{params} {e} {body} {id}>")?; }, _ => unreachable!(), } @@ -195,7 +400,38 @@ fn main() -> Result<()> { let bt = Form::new_bool(true); let p = Form::new_pair(Form::new_int(50), Form::new_nil()); - println!("{i} {n} {bf} {bt} {p}"); + + let pra = Form::new_prim(Prim::Add); + let pre = Form::new_prim(Prim::Eq); + + let s = Form::new_symbol("woopwpp"); + + + let mut params = Vec::new(); + params.push("a".to_owned()); + params.push("b".to_owned()); + let c = Form::new_closure(params, Form::new_nil(), Form::new_nil(), ID { id: 9 }); + + println!("{i} {n} {bf} {bt} {p} {pra} {pre} {s} {c}"); + + let mut my_vec: Vec = Vec::new(); + my_vec.push(i); + my_vec.push(n); + my_vec.push(bf); + my_vec.push(bt); + my_vec.push(p); + my_vec.push(pra); + my_vec.push(pre); + my_vec.push(s); + my_vec.push(c); + + + println!(" from vec {}", my_vec[3]); + for i in my_vec.iter() { + println!(" from vec {}", i); + } + println!("{my_vec}"); + /*