Finished-ish writing the new JIT-friendly, packed Form, this time with extra repr(C) Vec implementation

This commit is contained in:
2024-01-28 21:51:01 -05:00
parent a16e126aa1
commit 0696ad5594
2 changed files with 273 additions and 36 deletions

View File

@@ -16,6 +16,7 @@ use anyhow::{anyhow,bail,Result};
// rustonomicon // rustonomicon
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
#[repr(transparent)]
pub struct ID { pub struct ID {
id: i64 id: i64
} }

View File

@@ -1,18 +1,110 @@
#[macro_use] extern crate lalrpop_util; #[macro_use] extern crate lalrpop_util;
lalrpop_mod!(pub grammar); lalrpop_mod!(pub grammar);
use anyhow::Result; use std::fmt;
use std::collections::BTreeMap;
use std::sync::Mutex;
use std::marker::PhantomData;
use std::ops::Deref;
use std::ptr::{self, NonNull};
use std::mem::{self, ManuallyDrop};
use std::alloc::{self, Layout};
use std::cell::Cell; use std::cell::Cell;
//use std::rc::Rc; //use std::rc::Rc;
use once_cell::sync::Lazy;
use anyhow::Result;
use sl::eval; use sl::eval;
use std::marker::PhantomData; #[repr(C)]
use std::ops::Deref; pub struct Vec<T> {
use std::ptr::NonNull; ptr: NonNull<T>,
use std::mem::ManuallyDrop; cap: usize,
len: usize,
}
unsafe impl<T: Send> Send for Vec<T> {}
unsafe impl<T: Sync> Sync for Vec<T> {}
impl<T> Vec<T> {
pub fn new() -> Self {
assert!(mem::size_of::<T>() != 0, "no ZST");
Vec {
ptr: NonNull::dangling(),
len: 0,
cap: 0,
}
}
fn grow(&mut self) {
let (new_cap, new_layout) = if self.cap == 0 {
(1, Layout::array::<T>(1).unwrap())
} else {
let new_cap = 2 * self.cap;
let new_layout = Layout::array::<T>(new_cap).unwrap();
(new_cap, new_layout)
};
assert!(new_layout.size() <= isize::MAX as usize, "allocation too large");
let new_ptr = if self.cap == 0 {
unsafe { alloc::alloc(new_layout) }
} else {
let old_layout = Layout::array::<T>(self.cap).unwrap();
let old_ptr = self.ptr.as_ptr() as *mut u8;
unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) }
};
self.ptr = match NonNull::new(new_ptr as *mut T) {
Some(p) => p,
None => alloc::handle_alloc_error(new_layout),
};
self.cap = new_cap;
}
pub fn push(&mut self, elem: T) {
if self.len == self.cap { self.grow(); }
unsafe {
ptr::write(self.ptr.as_ptr().add(self.len), elem);
}
self.len += 1;
}
pub fn pop(&mut self) -> Option<T> {
if self.len == 0 {
None
} else {
self.len -= 1;
unsafe {
Some(ptr::read(self.ptr.as_ptr().add(self.len)))
}
}
}
}
impl<T> Drop for Vec<T> {
fn drop(&mut self) {
if self.cap != 0 {
while let Some(_) = self.pop() {}
let layout = Layout::array::<T>(self.cap).unwrap();
unsafe {
alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout);
}
}
}
}
impl<T> Deref for Vec<T> {
type Target = [T];
fn deref(&self) -> &[T] {
unsafe {
std::slice::from_raw_parts(self.ptr.as_ptr(), self.len)
}
}
}
// insert, remove, into_iter, and drain all missing
impl<T: fmt::Display> fmt::Display for Vec<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "[")?;
for x in self.iter() {
write!(f, " {}", x)?;
}
write!(f, " ]")?;
Ok(())
}
}
use std::fmt;
#[repr(C)] #[repr(C)]
pub struct Rc<T> { pub struct Rc<T> {
@@ -77,18 +169,25 @@ impl<T> Drop for Rc<T> {
} }
} }
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
/* #[repr(transparent)]
pub enum Form { pub struct ID {
Nil, id: i64
Int(i32),
Bool(bool),
Symbol(String),
Pair(Rc<Form>, Rc<Form>),
Closure(Vec<String>, Rc<Form>, Rc<Form>, ID),
Prim(Prim),
} }
*/ impl fmt::Display for ID {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.id)
}
}
#[repr(C)]
struct Closure {
params: Vec<String>,
e: Form,
body: Form,
id: ID,
}
#[repr(C)] #[repr(C)]
struct Form { struct Form {
data: *const Form, data: *const Form,
@@ -99,6 +198,27 @@ struct FormPair {
car: Form, car: Form,
cdr: Form, cdr: Form,
} }
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
#[repr(usize)]
pub enum Prim {
Add,
Sub,
Mul,
Div,
Mod,
Eq,
Cons,
Car,
Cdr,
}
impl Prim {
fn two_params(self) -> bool {
match self {
Prim::Car | Prim::Cdr => false,
_ => true,
}
}
}
/* /*
* this better be a 64 bit platform * this better be a 64 bit platform
* huh, if we only support i32s, then we have a lot more room for tags * huh, if we only support i32s, then we have a lot more room for tags
@@ -107,7 +227,7 @@ struct FormPair {
* *
* 000 - Int * 000 - Int
* 001 - Nil * 001 - Nil
* 010 - Bool(false) // this is needlessly wasteful of the bits but hay * 010 - Bool(false) // this is needlessly wasteful of the bits but hay - should take one of them over as a String probs
* 011 - Bool(true) * 011 - Bool(true)
* 100 - Symbol - will want to convert into an Rc around a StringRawParts struct * 100 - Symbol - will want to convert into an Rc around a StringRawParts struct
* 101 - Pair - an Rc around a Pair struct * 101 - Pair - an Rc around a Pair struct
@@ -116,34 +236,109 @@ struct FormPair {
* *
* I don't actually think we need our own repr(C) Vec implementation, at least not for now - we can * I don't actually think we need our own repr(C) Vec implementation, at least not for now - we can
* make do with a VecRawParts struct (without implementations) * make do with a VecRawParts struct (without implementations)
* Hay I did it anyway
* *
* in both cases, StringRawParts and VecRawParts, we can rebuild slices from the raw parts for * in both cases, StringRawParts and VecRawParts, we can rebuild slices from the raw parts for
* read-only access, which is all we need (until Drop, at which point we should re-constitute them * read-only access, which is all we need (until Drop, at which point we should re-constitute them
* from their raw parts, which is stable) * from their raw parts, which is stable)
*
* For symbols, it would actually make sense to create the String, then leak it so it lasts for the
* program, then deduplicate to it and pass the static const slice around
* Could even fit entirely in the Form if the max length of a symbol is 2^16
*/ */
const TAG_OFFSET: usize = 3;
const SYM_LEN_OFFSET: usize = 3;
const SYM_LEN_MASK: usize = 0xFF; // could be bigger
const SYM_PTR_OFFSET: usize = 11;
const TAG_MASK: usize = 0b111;
const TAG_INT: usize = 0b000;
const TAG_NIL: usize = 0b001;
const TAG_BOOL_FALSE: usize = 0b010;
const TAG_BOOL_TRUE: usize = 0b011;
const TAG_SYMBOL: usize = 0b100;
const TAG_PAIR: usize = 0b101;
const TAG_CLOSURE: usize = 0b110;
const TAG_PRIM: usize = 0b111;
static SYMBOLS: Lazy<Mutex<BTreeMap<String,&'static str>>> = Lazy::new(Mutex::default);
impl Form { impl Form {
fn new_int(x: isize) -> Self { fn new_int(x: isize) -> Self {
Self { data: (x << 3) as *const Form, phantom: PhantomData } Self { data: (x << TAG_OFFSET) as *const Form, phantom: PhantomData }
} }
fn new_nil() -> Self { fn new_nil() -> Self {
Self { data: 0b001 as *const Form, phantom: PhantomData } Self { data: TAG_NIL as *const Form, phantom: PhantomData }
} }
fn new_bool(b: bool) -> Self { fn new_bool(b: bool) -> Self {
Self { data: (if b { 0b011 } else { 0b010 }) as *const Form, phantom: PhantomData } Self { data: (if b { TAG_BOOL_TRUE } else { TAG_BOOL_FALSE }) as *const Form, phantom: PhantomData }
} }
fn new_pair(car: Form, cdr: Form) -> Self { fn new_pair(car: Form, cdr: Form) -> Self {
let p = Rc::new(FormPair { car, cdr }).into_ptr() as usize; let p = Rc::new(FormPair { car, cdr }).into_ptr() as usize;
assert!(p & 0b111 == 0); assert!(p & TAG_MASK == 0);
Self { data: (p | 0b101) as *const Form, phantom: PhantomData } Self { data: (p | TAG_PAIR) as *const Form, phantom: PhantomData }
}
fn new_closure(params: Vec<String>, e: Form, body: Form, id: ID) -> Self {
let p = Rc::new(Closure { params, e, body, id }).into_ptr() as usize;
assert!(p & TAG_MASK == 0);
Self { data: (p | TAG_CLOSURE) as *const Form, phantom: PhantomData }
}
fn new_prim(p: Prim) -> Self {
Self { data: (((p as usize) << TAG_OFFSET) | TAG_PRIM) as *const Form, phantom: PhantomData }
}
fn new_symbol(s: &str) -> Form {
assert!(s.len() < SYM_LEN_MASK);
let mut symbols = SYMBOLS.lock().unwrap();
let ds = if let Some(ds) = symbols.get(s) {
ds
} else {
// here we leak the memory of a new owned copy of s,
// and then transmute it into an &'static str that we keep in our global
// map for deduplication. Spicy stuff.
let mut value = ManuallyDrop::new(s.to_owned());
value.shrink_to_fit();
let slice = unsafe { std::mem::transmute(value.as_str()) };
symbols.insert(s.to_owned(), slice);
slice
};
Self { data: (((ds.as_ptr() as usize) << SYM_PTR_OFFSET) | (ds.len() << SYM_LEN_OFFSET) | TAG_SYMBOL) as *const Form, phantom: PhantomData }
}
fn car(&self) -> &Form {
assert!(self.data as usize & TAG_MASK == TAG_PAIR);
unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner<FormPair>)).data.car }
}
fn cdr(&self) -> &Form {
assert!(self.data as usize & TAG_MASK == TAG_PAIR);
unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner<FormPair>)).data.cdr }
}
fn closure(&self) -> &Closure {
assert!(self.data as usize & TAG_MASK == TAG_CLOSURE);
unsafe { &(*((self.data as usize & !TAG_MASK) as *mut RcInner<Closure>)).data }
}
fn prim(&self) -> Prim {
assert!(self.data as usize & TAG_MASK == TAG_PRIM);
unsafe { *(&((self.data as usize) >> TAG_OFFSET) as *const usize as *const Prim) }
}
fn str(&self) -> &str {
assert!(self.data as usize & TAG_MASK == TAG_SYMBOL);
let len = ((self.data as usize) >> SYM_LEN_OFFSET) & SYM_LEN_OFFSET;
let ptr = ((self.data as usize) >> SYM_PTR_OFFSET) as *const u8;
std::str::from_utf8(unsafe { std::slice::from_raw_parts(ptr, len) }).unwrap()
} }
} }
impl Drop for Form { impl Drop for Form {
fn drop(&mut self) { fn drop(&mut self) {
match self.data as usize & 0b111 { match self.data as usize & TAG_MASK {
0b000 | 0b001 | 0b010 | 0b011 => { println!("dropping simple {self}"); }, // int, nil, false, true TAG_INT | TAG_NIL | TAG_BOOL_FALSE | TAG_BOOL_TRUE | TAG_PRIM | TAG_SYMBOL => { println!("dropping simple {self}"); }, // doing nothing for symbol is fine
0b101 => { // since it's deduplicated
// pair TAG_PAIR => {
let _ = Rc::<FormPair>::from_ptr( (self.data as usize & !0b111) as *mut RcInner<FormPair> ); let _ = Rc::<FormPair>::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner<FormPair> );
},
TAG_CLOSURE => {
let _ = Rc::<Closure>::from_ptr( (self.data as usize & !TAG_MASK) as *mut RcInner<Closure> );
}, },
_ => unreachable!(), _ => unreachable!(),
} }
@@ -151,21 +346,31 @@ impl Drop for Form {
} }
impl fmt::Display for Form { impl fmt::Display for Form {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.data as usize & 0b111 { match self.data as usize & TAG_MASK {
0b000 => { TAG_INT => {
write!(f, "{}", self.data as isize >> 3)?; write!(f, "{}", self.data as isize >> 3)?;
}, },
0b001 => { TAG_NIL => {
write!(f, "nil")?; write!(f, "nil")?;
}, },
0b010 => { TAG_BOOL_FALSE => {
write!(f, "false")?; write!(f, "false")?;
}, },
0b011 => { TAG_BOOL_TRUE => {
write!(f, "true")?; write!(f, "true")?;
}, },
0b101 => { TAG_PAIR => {
write!(f, "pair")?; write!(f, "({} . {}", self.car(), self.cdr())?;
},
TAG_PRIM => {
write!(f, "{:?}", self.prim())?;
},
TAG_SYMBOL => {
write!(f, "{}", self.str())?;
},
TAG_CLOSURE => {
let Closure { params, e, body, id, } = self.closure();
write!(f, "<{params} {e} {body} {id}>")?;
}, },
_ => unreachable!(), _ => unreachable!(),
} }
@@ -195,7 +400,38 @@ fn main() -> Result<()> {
let bt = Form::new_bool(true); let bt = Form::new_bool(true);
let p = Form::new_pair(Form::new_int(50), Form::new_nil()); let p = Form::new_pair(Form::new_int(50), Form::new_nil());
println!("{i} {n} {bf} {bt} {p}");
let pra = Form::new_prim(Prim::Add);
let pre = Form::new_prim(Prim::Eq);
let s = Form::new_symbol("woopwpp");
let mut params = Vec::new();
params.push("a".to_owned());
params.push("b".to_owned());
let c = Form::new_closure(params, Form::new_nil(), Form::new_nil(), ID { id: 9 });
println!("{i} {n} {bf} {bt} {p} {pra} {pre} {s} {c}");
let mut my_vec: Vec<Form> = Vec::new();
my_vec.push(i);
my_vec.push(n);
my_vec.push(bf);
my_vec.push(bt);
my_vec.push(p);
my_vec.push(pra);
my_vec.push(pre);
my_vec.push(s);
my_vec.push(c);
println!(" from vec {}", my_vec[3]);
for i in my_vec.iter() {
println!(" from vec {}", i);
}
println!("{my_vec}");
/* /*