import io:* import mem:* import map:* import hash_map:* import stack:* import string:* import util:* import tree:* import symbol:* import ast_nodes:* // for error with syntax tree import pass_common:* import poset:* fun type_size(t: *type): ulong return type_size_and_alignment(t).first fun type_size_and_alignment(t: *type): pair { if (t->indirection) return make_pair(#sizeof<*void>, #sizeof<*void>) match (t->base) { base_type::object() { var total_size: ulong = 0 var max_size: ulong = 0 var max_align: ulong = 0 t->type_def->type_def.variables.for_each(fun(i: *ast_node) { var individual = type_size_and_alignment(i->declaration_statement.identifier->identifier.type) max_size = max(max_size, individual.first) max_align = max(max_align, individual.second) // increase total size by the individual size + padding to get alignment var padding = 0 if (individual.second != 0) padding = (individual.second - (total_size % individual.second)) % individual.second total_size += individual.first + padding }) if (t->type_def->type_def.is_union) total_size = max_size // pad the end so that consecutive objects in memory are aligned if (max_align != 0) total_size += (max_align - (total_size % max_align)) % max_align return make_pair(total_size, max_align) } base_type::function() return make_pair(#sizeof<*void>, #sizeof<*void>) base_type::boolean() return make_pair(#sizeof, #sizeof) base_type::character() return make_pair(#sizeof, #sizeof) base_type::ucharacter() return make_pair(#sizeof, #sizeof) base_type::short_int() return make_pair(#sizeof, #sizeof) base_type::ushort_int() return make_pair(#sizeof, #sizeof) base_type::integer() return make_pair(#sizeof, #sizeof) base_type::uinteger() return make_pair(#sizeof, #sizeof) base_type::long_int() return make_pair(#sizeof, #sizeof) base_type::ulong_int() return make_pair(#sizeof, #sizeof) base_type::floating() return make_pair(#sizeof, #sizeof) base_type::double_precision() return make_pair(#sizeof, #sizeof) } error(string("Invalid type for type_size: ") + t->to_string()) } fun offset_into_struct(struct_type: *type, ident: *ast_node): ulong { var offset: ulong = 0 if (struct_type->type_def->type_def.is_union) return offset for (var i = 0; i < struct_type->type_def->type_def.variables.size; i++;) { var size_and_align = type_size_and_alignment(struct_type->type_def->type_def.variables[i]->declaration_statement.identifier->identifier.type) var align = size_and_align.second if (align != 0) offset += (align - (offset % align)) % align if (struct_type->type_def->type_def.variables[i]->declaration_statement.identifier == ident) break else offset += size_and_align.first } return offset } var register_size = #sizeof<*void> adt operand_size { b8, b16, b32, b64 } fun size_to_operand_size(size: ulong): operand_size { if (size == 1) return operand_size::b8() if (size == 2) return operand_size::b16() if (size == 4) return operand_size::b32() if (size == 8) return operand_size::b64() error("invalid operand size") } adt byte_inst { nop, imm: imm, add: add, ldr: ldr, str: str, jmp: jmp, jz: jz, call: call, ret } obj imm { var reg: int var val: long } obj add { var to_reg: int var a: int var b: int } obj ldr { var to_reg: int var from_reg: int var offset: long var size: operand_size } obj str { var to_reg: int var offset: long var from_reg: int var size: operand_size } obj jmp { var offset: long } obj jz { var reg: int var offset: long } obj call { var reg: int } fun to_string(s: operand_size): string { match (s) { operand_size::b8() return string("8") operand_size::b16() return string("16") operand_size::b32() return string("32") operand_size::b64() return string("64") } return string("missed operand size") } fun to_string(b: byte_inst): string { match (b) { byte_inst::nop() return string("nop") byte_inst::imm(i) return string("r") + i.reg + " = imm " + i.val byte_inst::add(a) return string("r") + a.to_reg + " = r" + a.a + " + r" + a.b byte_inst::ldr(l) return string("r") + l.to_reg + " = ldr" + to_string(l.size) + " r" + l.from_reg + " (" + l.offset + ")" byte_inst::str(s) return "str" + to_string(s.size) + " (r" + s.to_reg + "(" + s.offset + ") <= r" + s.from_reg + ")" byte_inst::jmp(j) return string("jmp(pc += ") + j.offset + ")" byte_inst::jz(j) return string("jmp(r") + j.reg + " == 0, pc += " + j.offset + ")" byte_inst::call(c) return string("call pc = r") + c.reg byte_inst::ret() return string("ret") } return string("Missed byte_inst case in to_string") } fun bytecode_to_string(functions: ref vector, instructions: ref vector): string { return string("\n").join(functions.map(fun(bb: ref bytecode_function): string return bb.to_string(instructions);)) } fun bytecode_function(name: ref string, start: int): bytecode_function { var to_ret.construct(name, start): bytecode_function return to_ret } obj bytecode_function (Object) { var name: string var instruction_start: int var instruction_end: int var var_to_frame_offset: map<*ast_node, int> var frame_size: int fun construct(): *bytecode_function { instruction_start = 0 instruction_end = 0 name.construct() var_to_frame_offset.construct() frame_size = 0 return this } fun construct(name_in: ref string, instruction_start_in: int): *bytecode_function { instruction_start = instruction_start_in instruction_end = 0 name.copy_construct(&name_in) var_to_frame_offset.construct() frame_size = 0 return this } fun copy_construct(old: *bytecode_function) { instruction_start = old->instruction_start instruction_end = old->instruction_end name.copy_construct(&old->name) var_to_frame_offset.copy_construct(&old->var_to_frame_offset) frame_size = old->frame_size } fun operator=(other: ref bytecode_function) { destruct() copy_construct(&other) } fun destruct() { name.destruct() var_to_frame_offset.destruct() } fun to_string(instructions: ref vector): string { var res = name + "(frame size " + frame_size + "):\n" res += "\t frame layout\n" res += "\t\tsaved RBP : RPB = 0\n" var_to_frame_offset.for_each(fun(n: *ast_node, o: int) { res += "\t\t" + n->identifier.name + ": RBP - " + o + "\n" }) res += "\n\t bytecode\n" for (var i = instruction_start; i < instruction_end; i++;) res += string("\t\t") + i + string(": ") + to_string(instructions[i]) + "\n" return res } } obj bytecode_generator (Object) { var reg_counter: int var reg_max: int var id_counter: int var ast_name_map: hash_map<*ast_node, string> var functions: vector var node_function_idx: map<*ast_node, int> var instructions: vector var fixup_function_addresses: vector> fun construct(): *bytecode_generator { id_counter = 0 ast_name_map.construct() functions.construct() node_function_idx.construct() instructions.construct() fixup_function_addresses.construct() reg_counter = 3 reg_max = 3 return this } fun copy_construct(old: *bytecode_generator) { reg_counter = old->reg_counter reg_max = old->reg_max id_counter = old->id_counter ast_name_map.copy_construct(&old->ast_name_map) functions.copy_construct(&old->functions) node_function_idx.copy_construct(&old->node_function_idx) instructions.copy_construct(&old->instructions) fixup_function_addresses.copy_construct(&old->fixup_function_addresses) } fun operator=(other: ref bytecode_generator) { destruct() copy_construct(&other) } fun destruct() { ast_name_map.destruct() functions.destruct() node_function_idx.destruct() instructions.destruct() fixup_function_addresses.destruct() } fun get_id(): string return to_string(id_counter++); fun get_reg(): int return reg_counter++; fun reset_reg() { if (reg_counter > reg_max) { reg_max = reg_counter } reg_counter = 3 } /*fun generate_bytecode(name_ast_map: map,*ast_node>>): pair, vector> {*/ fun generate_bytecode(name_ast_map: map,*ast_node>>) { // iterate through asts name_ast_map.for_each(fun(name: string, tree_pair: pair<*tree,*ast_node>) { // iterate through children for each ast // do lambdas seperatly, so we can reconstitute the enclosing object if it has one tree_pair.second->translation_unit.lambdas.for_each(fun(child: *ast_node) { generate_function_definition(child) }) tree_pair.second->translation_unit.children.for_each(fun(child: *ast_node) { match (*child) { ast_node::declaration_statement(backing) generate_declaration_statement(child) ast_node::compiler_intrinsic(backing) generate_compiler_intrinsic(child) ast_node::function(backing) generate_function_definition(child) ast_node::template(backing) { backing.instantiated.for_each(fun(node: *ast_node) { match (*node) { ast_node::function(backing) generate_function_definition(node) ast_node::type_def(backing) { backing.methods.for_each(fun(method: *ast_node) { if (is_template(method)) method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) else generate_function_definition(method) }) } } }) } ast_node::type_def(backing) { backing.methods.for_each(fun(method: *ast_node) { if (is_template(method)) method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) else generate_function_definition(method) }) } } }) }) fixup_function_addresses.for_each(fun(p: pair) { instructions[p.first].imm.val = functions[node_function_idx[p.second]].instruction_start }) for (var i = 0; i < functions.size - 1; i++;) functions[i].instruction_end = functions[i+1].instruction_start functions.last().instruction_end = instructions.size /*return make_pair(functions, instructions)*/ } fun generate_function_definition(node: *ast_node): int { reset_reg() node_function_idx[node] = functions.size functions.add(bytecode_function(get_name(node), instructions.size)) node->function.parameters.for_each(fun(p: *ast_node) { functions.last().var_to_frame_offset[p] = functions.last().frame_size functions.last().frame_size += type_size(p->identifier.type) }) emit_add(0, 0, emit_imm(-register_size)) // these two lines push rbp onto the stack, which grows towards negative emit_str(0, 0, 1, operand_size::b64()) // rsp[0] <= rbp emit_add(1, 0, emit_imm(0)) // note that we start the frame size at register_size for this reason var push_frame_idx = instructions.size emit_add(0, 0, emit_imm(0)) // this has to be fixed afterwards to be the -frame_size generate(node->function.body_statement) instructions[push_frame_idx].imm.val = -functions.last().frame_size return -1 } fun generate_declaration_statement(node: *ast_node): int { var identifier = node->declaration_statement.identifier var ident_type = identifier->identifier.type functions.last().frame_size += type_size(ident_type) functions.last().var_to_frame_offset[identifier] = functions.last().frame_size if (node->declaration_statement.expression) { emit_str(1, functions.last().var_to_frame_offset[identifier], generate(node->declaration_statement.expression), size_to_operand_size(type_size(get_ast_type(identifier)))) } return -1 } fun generate_assignment_statement(node: *ast_node): int { /*var to = generate(node->assignment_statement.to, true)*/ var from = generate(node->assignment_statement.from) var to = generate(node->assignment_statement.to, true) emit_str(to, 0, from, size_to_operand_size(type_size(get_ast_type(node->assignment_statement.to)))) return -1 } fun generate_if_statement(node: *ast_node): int { var cond_reg = generate(node->if_statement.condition) var jz_index = instructions.size emit_jz(cond_reg,0) generate(node->if_statement.then_part) if (node->if_statement.else_part) { var jmp_index = instructions.size emit_jmp(0) instructions[jz_index].jz.offset = instructions.size - jz_index generate(node->if_statement.else_part) instructions[jmp_index].jmp.offset = instructions.size - jmp_index } else { instructions[jz_index].jz.offset = instructions.size - jz_index } return -1 } fun generate_while_loop(node: *ast_node): int { generate(node->while_loop.condition) generate(node->while_loop.statement) return -1 } fun generate_for_loop(node: *ast_node): int { if (node->for_loop.init) generate(node->for_loop.init) if (node->for_loop.condition) generate(node->for_loop.condition) if (node->for_loop.update) generate(node->for_loop.update) generate(node->for_loop.body) return -1 } fun generate_identifier(node: *ast_node, lvalue: bool): int { if (lvalue) { return emit_add(1, emit_imm(-functions.last().var_to_frame_offset[node])) } else { return emit_ldr(1, -functions.last().var_to_frame_offset[node], size_to_operand_size(type_size(get_ast_type(node)))) } } fun generate_return_statement(node: *ast_node): int { if (node->return_statement.return_value) { /*emit_str(1, register_size, generate(node->return_statement.return_value))*/ emit_add(2, emit_imm(0), generate(node->return_statement.return_value)) emit_add(0, 1, emit_imm(register_size)) emit_ldr(1, 1, 0, operand_size::b64()) emit_ret() } else { emit_ret() } return -1 } fun generate_branching_statement(node: *ast_node): int { match(node->branching_statement.b_type) { branching_type::break_stmt() instructions.add(byte_inst::nop()) branching_type::continue_stmt() instructions.add(byte_inst::nop()) } return -1 } fun generate_cast(node: *ast_node): int { return generate(node->cast.value) } fun generate_value(node: *ast_node): int { if (node->value.value_type->is_bool()) return emit_imm((node->value.string_value == "true") cast int) else return emit_imm(string_to_num(node->value.string_value)) } fun generate_code_block(node: *ast_node): int { node->code_block.children.for_each(fun(child: *ast_node) { // registers aren't used between statements (only stack reg) reset_reg() generate(child) }) return -1 } // this generates the function as a value, not the actual function fun generate_function(node: *ast_node): int { fixup_function_addresses.add(make_pair(instructions.size,node)) return emit_imm(-2) } fun generate_function_call(node: *ast_node, lvalue: bool): int { node->function_call.parameters.for_each(fun(child: *ast_node) generate(child);) return emit_call(generate_function(node->function_call.func)) } fun generate_compiler_intrinsic(node: *ast_node): int { instructions.add(byte_inst::nop()) return -1 } fun generate(node: *ast_node): int return generate(node, false) fun generate(node: *ast_node, lvalue: bool): int { match (*node) { ast_node::declaration_statement(backing) return generate_declaration_statement(node) ast_node::assignment_statement(backing) return generate_assignment_statement(node) ast_node::if_statement(backing) return generate_if_statement(node) ast_node::while_loop(backing) return generate_while_loop(node) ast_node::for_loop(backing) return generate_for_loop(node) ast_node::function(backing) return generate_function(node) ast_node::function_call(backing) return generate_function_call(node, lvalue) ast_node::compiler_intrinsic(backing) return generate_compiler_intrinsic(node) ast_node::code_block(backing) return generate_code_block(node) ast_node::return_statement(backing) return generate_return_statement(node) ast_node::branching_statement(backing) return generate_branching_statement(node) ast_node::cast(backing) return generate_cast(node) ast_node::value(backing) return generate_value(node) ast_node::identifier(backing) return generate_identifier(node, lvalue) } error("Bad node") } fun get_name(node: *ast_node): string { var maybe_it = ast_name_map.get_ptr_or_null(node); if (maybe_it) return *maybe_it var result = get_ast_name(node) + get_id() if (is_function(node) && node->function.name == "main") result = "main" ast_name_map.set(node, result) return result } fun emit_imm(value: ulong): int { return emit_imm((value) cast int); } fun emit_imm(value: int): int { var i: imm i.reg = get_reg() i.val = value instructions.add(byte_inst::imm(i)) return i.reg } fun emit_add(a: int, b: int): int { return emit_add(get_reg(), a, b) } fun emit_add(dest: int, a: int, b: int): int { var i: add i.to_reg = dest i.a = a i.b = b instructions.add(byte_inst::add(i)) return i.to_reg } fun emit_ldr(reg: int, offset: int, size: operand_size): int { return emit_ldr(get_reg(), reg, offset, size); } fun emit_ldr(dest: int, reg: int, offset: int, size: operand_size): int { var l: ldr l.to_reg = dest l.from_reg = reg l.offset = offset l.size = size instructions.add(byte_inst::ldr(l)) return l.to_reg } fun emit_str(to_reg: int, offset: int, from_reg: int, size: operand_size): int { var s: str s.to_reg = to_reg s.offset = offset s.from_reg = from_reg s.size = size instructions.add(byte_inst::str(s)) return -1 } fun emit_jmp(offset: int): int { var j: jmp j.offset = offset instructions.add(byte_inst::jmp(j)) return -1 } fun emit_jz(reg: int, offset: int): int { var j: jz j.reg = reg j.offset = offset instructions.add(byte_inst::jz(j)) return -1 } fun emit_ret(): int { instructions.add(byte_inst::ret()) return -1 } fun emit_call(reg: int): int { var c: call c.reg = reg instructions.add(byte_inst::call(c)) return 2 } // Stack ABI // it's system v x64, but all params passed on stack fun evaluate(): int { println("evaling main") println(bytecode_to_string(functions, instructions)) var main_entry = functions.find_first_satisfying(fun(block: bytecode_function): bool return block.name == "main";) var registers.construct(reg_max): vector registers.size = reg_max registers[0] = -register_size // with the stack being zeroed out, this makes it a return address of 0 registers[1] = 0xdeadbeefcafebabe var stack_size = 8 * 1024 * 1024 var stack = new(stack_size) + stack_size for (var i = 0; i < stack_size; i++;) stack[-i + -1] = 0 for (var i = main_entry.instruction_start; i < instructions.size; i++;) { println(string("evaling: ") + i + ": " + to_string(instructions[i])) match(instructions[i]) { byte_inst::nop() {} byte_inst::imm(i) registers[i.reg] = i.val byte_inst::add(a) registers[a.to_reg] = registers[a.a] + registers[a.b] byte_inst::ldr(l) match (l.size) { operand_size::b8() registers[l.to_reg] = *(stack + registers[l.from_reg] + l.offset) cast *char operand_size::b16() registers[l.to_reg] = *(stack + registers[l.from_reg] + l.offset) cast *short operand_size::b32() registers[l.to_reg] = *(stack + registers[l.from_reg] + l.offset) cast *int operand_size::b64() registers[l.to_reg] = *(stack + registers[l.from_reg] + l.offset) cast *long } byte_inst::str(s) match (s.size) { operand_size::b8() *(stack + registers[s.to_reg] + s.offset) cast *uchar = registers[s.from_reg] operand_size::b16() *(stack + registers[s.to_reg] + s.offset) cast *ushort = registers[s.from_reg] operand_size::b32() *(stack + registers[s.to_reg] + s.offset) cast *uint = registers[s.from_reg] operand_size::b64() *(stack + registers[s.to_reg] + s.offset) cast *ulong = registers[s.from_reg] } byte_inst::jmp(j) i += j.offset - 1 // to counteract pc inc byte_inst::jz(j) if (registers[j.reg] == 0) i += j.offset - 1 // to counteract pc inc byte_inst::call(c) { /*registers[0] -= register_size*/ registers[0] = registers[0] - register_size *(stack + registers[0]) cast *long = i + 1 i = registers[c.reg] - 1 } byte_inst::ret() { var pc = *(stack + registers[0]) cast *long /*registers[0] += register_size*/ registers[0] = registers[0] + register_size print("returning! return value is\n\t") var value = registers[2] println(value) println("first part of memory is") for (var i = 0; i < 8*8; i+=8;) { print(string("-") + i + string(": ")) for (var j = 0; j < 8; j++;) { if (j == 4) print(" ") print(*(stack - (i+j)*#sizeof - 1) cast *uchar) print(" ") } println() } println("Done") if (pc == 0) { return value } else { i = pc - 1 println(string("returning to ") + pc) } } } } return -1 } }