import io:* import os:* import mem:* import map:* import hash_map:* import stack:* import str:* import util:* import tree:* import symbol:* import ast_nodes:* // for error with syntax tree import pass_common:* fun type_size(t: *type): ulong return type_size_and_alignment(t).first fun type_size_and_alignment(t: *type): pair { if (t->indirection) return make_pair(#sizeof<*void>, #sizeof<*void>) match (t->base) { base_type::object() { var total_size: ulong = 0 var max_size: ulong = 0 var max_align: ulong = 0 t->type_def->type_def.variables.for_each(fun(i: *ast_node) { var individual = type_size_and_alignment(i->declaration_statement.identifier->identifier.type) max_size = max(max_size, individual.first) max_align = max(max_align, individual.second) // increase total size by the individual size + padding to get alignment var padding = 0 if (individual.second != 0) padding = (individual.second - (total_size % individual.second)) % individual.second total_size += individual.first + padding }) if (t->type_def->type_def.is_union) total_size = max_size // pad the end so that consecutive objects in memory are aligned if (max_align != 0) total_size += (max_align - (total_size % max_align)) % max_align return make_pair(total_size, max_align) } base_type::function() return make_pair(#sizeof<*void>, #sizeof<*void>) base_type::boolean() return make_pair(#sizeof, #sizeof) base_type::character() return make_pair(#sizeof, #sizeof) base_type::ucharacter() return make_pair(#sizeof, #sizeof) base_type::short_int() return make_pair(#sizeof, #sizeof) base_type::ushort_int() return make_pair(#sizeof, #sizeof) base_type::integer() return make_pair(#sizeof, #sizeof) base_type::uinteger() return make_pair(#sizeof, #sizeof) base_type::long_int() return make_pair(#sizeof, #sizeof) base_type::ulong_int() return make_pair(#sizeof, #sizeof) base_type::floating() return make_pair(#sizeof, #sizeof) base_type::double_precision() return make_pair(#sizeof, #sizeof) } error(str("Invalid type for type_size: ") + t->to_string()) } fun offset_into_struct(struct_type: *type, ident: *ast_node): ulong { var offset: ulong = 0 if (struct_type->type_def->type_def.is_union) return offset for (var i = 0; i < struct_type->type_def->type_def.variables.size; i++;) { var size_and_align = type_size_and_alignment(struct_type->type_def->type_def.variables[i]->declaration_statement.identifier->identifier.type) var align = size_and_align.second if (align != 0) offset += (align - (offset % align)) % align if (struct_type->type_def->type_def.variables[i]->declaration_statement.identifier == ident) break else offset += size_and_align.first } return offset } var register_size = #sizeof<*void> var malloc_addr = -1 var free_addr = -2 var memmove_addr = -3 var printf_addr = -4 var fprintf_addr = -5 var fflush_addr = -6 var fgets_addr = -7 var fopen_addr = -8 var fclose_addr = -9 var ftell_addr = -10 var fseek_addr = -11 var fread_addr = -12 var fwrite_addr = -13 var system_addr = -14 var exit_addr = -15 var popen_addr = -16 var pclose_addr = -17 var snprintf_addr = -18 adt operand_size { b8, b16, b32, b64 } fun size_to_operand_size(size: ulong): operand_size { if (size == 1) return operand_size::b8() if (size == 2) return operand_size::b16() if (size == 4) return operand_size::b32() if (size == 8) return operand_size::b64() error(str("invalid operand size ") + size) } adt byte_inst { nop, imm: imm, add: reg2, addi: reg1i, smul: reg2, umul: reg2, sdiv: reg2, udiv: reg2, mod: reg2, shl: reg2, shr: reg2, sar: reg2, and: reg2, or: reg2, xor: reg2, not: reg1, gz: reg1, lz: reg1, ez: reg1, // also logical not ldr: reg1is, str: reg1is, jmp: long, jz: test, call: int, ret } obj imm { var to_reg: int var val: long } obj reg1 { var to_reg: int var a: int } obj reg1i { var to_reg: int var a: int var bi:long } obj reg1is { var reg: int var base_reg: int var offset: long var size: operand_size } obj reg2 { var to_reg: int var a: int var b: int } obj test { var reg: int var offset: long } fun to_string(s: operand_size): str { match (s) { operand_size::b8() return str("8") operand_size::b16() return str("16") operand_size::b32() return str("32") operand_size::b64() return str("64") } return str("missed operand size") } fun to_string(b: byte_inst): str { match (b) { byte_inst::nop() return str("nop") byte_inst::imm(i) return str("r") + i.to_reg + " = imm " + i.val byte_inst::add(a) return str("r") + a.to_reg + " = r" + a.a + " + r" + a.b byte_inst::addi(a) return str("r") + a.to_reg + " = r" + a.a + " + " + a.bi byte_inst::smul(a) return str("r") + a.to_reg + " = r" + a.a + " * r" + a.b byte_inst::umul(a) return str("r") + a.to_reg + " = r" + a.a + " u* r"+ a.b byte_inst::sdiv(a) return str("r") + a.to_reg + " = r" + a.a + " / r" + a.b byte_inst::udiv(a) return str("r") + a.to_reg + " = r" + a.a + " u/ r"+ a.b byte_inst::mod(a) return str("r") + a.to_reg + " = r" + a.a + " % r" + a.b byte_inst::and(a) return str("r") + a.to_reg + " = r" + a.a + " & r" + a.b byte_inst::shl(a) return str("r") + a.to_reg + " = r" + a.a + " u<< r" + a.b byte_inst::shr(a) return str("r") + a.to_reg + " = r" + a.a + " u>> r" + a.b byte_inst::sar(a) return str("r") + a.to_reg + " = r" + a.a + " s>> r" + a.b byte_inst::or(a) return str("r") + a.to_reg + " = r" + a.a + " | r" + a.b byte_inst::xor(a) return str("r") + a.to_reg + " = r" + a.a + " ^ r" + a.b byte_inst::not(a) return str("r") + a.to_reg + " = ~r" + a.a byte_inst::gz(a) return str("r") + a.to_reg + " = r" + a.a + " > 0" byte_inst::lz(a) return str("r") + a.to_reg + " = r" + a.a + " < 0" byte_inst::ez(a) return str("r") + a.to_reg + " = r" + a.a + " == 0" byte_inst::ldr(l) return str("r") + l.reg + " = ldr" + to_string(l.size) + " r" + l.base_reg + " (" + l.offset + ")" byte_inst::str(s) return "str" + to_string(s.size) + " (r" + s.base_reg + "(" + s.offset + ") <= r" + s.reg + ")" byte_inst::jmp(j) return str("jmp(pc += ") + j + ")" byte_inst::jz(j) return str("jmp(r") + j.reg + " == 0, pc += " + j.offset + ")" byte_inst::call(c) return str("call pc = r") + c byte_inst::ret() return str("ret") } return str("Missed byte_inst case in to_string") } fun bytecode_to_string(functions: ref vec, instructions: ref vec): str { return str("\n").join(functions.map(fun(bb: ref bytecode_function): str return bb.to_string(instructions);)) } fun bytecode_function(name: ref str, start: int): bytecode_function { var to_ret.construct(name, start): bytecode_function return to_ret } obj bytecode_function (Object) { var name: str var instruction_start: int var instruction_end: int var var_to_frame_offset: map<*ast_node, int> var frame_size: int fun construct(): *bytecode_function { instruction_start = 0 instruction_end = 0 name.construct() var_to_frame_offset.construct() frame_size = 0 return this } fun construct(name_in: ref str, instruction_start_in: int): *bytecode_function { instruction_start = instruction_start_in instruction_end = 0 name.copy_construct(&name_in) var_to_frame_offset.construct() frame_size = 0 return this } fun copy_construct(old: *bytecode_function) { instruction_start = old->instruction_start instruction_end = old->instruction_end name.copy_construct(&old->name) var_to_frame_offset.copy_construct(&old->var_to_frame_offset) frame_size = old->frame_size } fun operator=(other: ref bytecode_function) { destruct() copy_construct(&other) } fun destruct() { name.destruct() var_to_frame_offset.destruct() } fun to_string(instructions: ref vec): str { var res = name + "(frame size " + frame_size + "):\n" res += "\t frame layout\n" res += "\t\tsaved RBP : RPB = 0\n" var_to_frame_offset.for_each(fun(n: *ast_node, o: int) { res += "\t\t" + n->identifier.name + ": RBP + " + o + "\n" }) res += "\n\t bytecode\n" for (var i = instruction_start; i < instruction_end; i++;) res += str("\t\t") + i + str(": ") + to_string(instructions[i]) + "\n" return res } } obj bytecode_generator (Object) { var reg_counter: int var reg_max: int var id_counter: int var ast_name_map: hash_map<*ast_node, str> var functions: vec var node_function_idx: map<*ast_node, int> var instructions: vec var fixup_function_addresses: vec> var fixup_break_addresses: stack> var fixup_continue_addresses: stack> fun construct(): *bytecode_generator { id_counter = 0 ast_name_map.construct() functions.construct() node_function_idx.construct() instructions.construct() fixup_function_addresses.construct() fixup_break_addresses.construct() fixup_continue_addresses.construct() reg_counter = 3 reg_max = 3 return this } fun copy_construct(old: *bytecode_generator) { reg_counter = old->reg_counter reg_max = old->reg_max id_counter = old->id_counter ast_name_map.copy_construct(&old->ast_name_map) functions.copy_construct(&old->functions) node_function_idx.copy_construct(&old->node_function_idx) instructions.copy_construct(&old->instructions) fixup_function_addresses.copy_construct(&old->fixup_function_addresses) fixup_break_addresses.copy_construct(&old->fixup_break_addresses) fixup_continue_addresses.copy_construct(&old->fixup_continue_addresses) } fun operator=(other: ref bytecode_generator) { destruct() copy_construct(&other) } fun destruct() { ast_name_map.destruct() functions.destruct() node_function_idx.destruct() instructions.destruct() fixup_function_addresses.destruct() fixup_break_addresses.destruct() fixup_continue_addresses.destruct() } fun get_id(): str return to_string(id_counter++); fun get_reg(): int return reg_counter++; fun peek_reg(): int return reg_counter; fun reset_reg() reset_reg(3); fun reset_reg(to: int) { if (reg_counter > reg_max) { reg_max = reg_counter } reg_counter = to } /*fun generate_bytecode(name_ast_map: map,*ast_node>>): pair, vec> {*/ fun generate_bytecode(name_ast_map: map,*ast_node>>) { // iterate through asts name_ast_map.for_each(fun(name: str, tree_pair: pair<*tree,*ast_node>) { // iterate through children for each ast tree_pair.second->translation_unit.lambdas.for_each(fun(child: *ast_node) { generate_function_definition(child) }) tree_pair.second->translation_unit.children.for_each(fun(child: *ast_node) { match (*child) { ast_node::declaration_statement(backing) generate_declaration_statement(child) ast_node::compiler_intrinsic(backing) generate_compiler_intrinsic(child) ast_node::function(backing) generate_function_definition(child) ast_node::template(backing) { backing.instantiated.for_each(fun(node: *ast_node) { match (*node) { ast_node::function(backing) generate_function_definition(node) ast_node::type_def(backing) { backing.methods.for_each(fun(method: *ast_node) { if (is_template(method)) method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) else generate_function_definition(method) }) } } }) } ast_node::type_def(backing) { backing.methods.for_each(fun(method: *ast_node) { if (is_template(method)) method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) else generate_function_definition(method) }) } } }) }) fixup_function_addresses.for_each(fun(p: pair) { if (p.second->function.is_extern) { if (p.second->function.name == "malloc") instructions[p.first].imm.val = malloc_addr else if (p.second->function.name == "free") instructions[p.first].imm.val = free_addr else if (p.second->function.name == "memmove") instructions[p.first].imm.val = memmove_addr else if (p.second->function.name == "printf") instructions[p.first].imm.val = printf_addr else if (p.second->function.name == "fprintf") instructions[p.first].imm.val = fprintf_addr else if (p.second->function.name == "fflush") instructions[p.first].imm.val = fflush_addr else if (p.second->function.name == "fgets") instructions[p.first].imm.val = fgets_addr else if (p.second->function.name == "fopen") instructions[p.first].imm.val = fopen_addr else if (p.second->function.name == "fclose") instructions[p.first].imm.val = fclose_addr else if (p.second->function.name == "ftell") instructions[p.first].imm.val = ftell_addr else if (p.second->function.name == "fseek") instructions[p.first].imm.val = fseek_addr else if (p.second->function.name == "fread") instructions[p.first].imm.val = fread_addr else if (p.second->function.name == "fwrite") instructions[p.first].imm.val = fwrite_addr else if (p.second->function.name == "system") instructions[p.first].imm.val = system_addr else if (p.second->function.name == "exit") instructions[p.first].imm.val = exit_addr else if (p.second->function.name == "popen") instructions[p.first].imm.val = popen_addr else if (p.second->function.name == "pclose") instructions[p.first].imm.val = pclose_addr else if (p.second->function.name == "snprintf") instructions[p.first].imm.val = snprintf_addr else error("bad extern function used: " + p.second->function.name) } else { instructions[p.first].imm.val = functions[node_function_idx[p.second]].instruction_start } }) for (var i = 0; i < functions.size - 1; i++;) functions[i].instruction_end = functions[i+1].instruction_start functions.last().instruction_end = instructions.size /*return make_pair(functions, instructions)*/ } fun generate_function_definition(node: *ast_node) { if (node->function.is_extern) { println("Skipping extern function " + node->function.name) return } reset_reg() node_function_idx[node] = functions.size functions.add(bytecode_function(get_name(node), instructions.size)) var parameter_offset = (register_size*2) cast int // have to pass saved RBP and return address var return_type = get_ast_type(node)->return_type // if we're returning an object, our caller passes the address // we should save our return value as as the first parameter if (return_type->is_object() && return_type->indirection == 0) { var ptr_type = return_type->clone_with_increased_indirection() functions.last().var_to_frame_offset[_ident("bytecode_struct_return_temp_address", ptr_type, null())] = parameter_offset parameter_offset += type_size(ptr_type) } node->function.parameters.for_each(fun(p: *ast_node) { functions.last().var_to_frame_offset[p] = parameter_offset parameter_offset += type_size(p->identifier.type) }) emit_addi(0, 0, -register_size) // these two lines push rbp onto the stack, which grows towards negative emit_str(0, 0, 1, operand_size::b64()) // rsp[0] <= rbp emit_addi(1, 0, 0) // note that we start the frame size at register_size for this reason var push_frame_idx = instructions.size emit_addi(0, 0, 0) // this has to be fixed afterwards to be the -frame_size generate(node->function.body_statement) // generate a return just in case the function itself doesn't have one generate_return_statement(_return(null())) instructions[push_frame_idx].addi.bi = -functions.last().frame_size } fun generate_declaration_statement(node: *ast_node): int { var identifier = node->declaration_statement.identifier var ident_type = identifier->identifier.type functions.last().frame_size += type_size(ident_type) functions.last().var_to_frame_offset[identifier] = -functions.last().frame_size if (node->declaration_statement.expression) { // STRUCT HERE if (ident_type->is_object() && ident_type->indirection == 0) emit_struct_copy(1, functions.last().var_to_frame_offset[identifier], generate(node->declaration_statement.expression), 0, get_ast_type(identifier)) else emit_str(1, functions.last().var_to_frame_offset[identifier], generate(node->declaration_statement.expression), size_to_operand_size(type_size(get_ast_type(identifier)))) reset_reg() } return -1 } fun generate_assignment_statement(node: *ast_node): int { var from = generate(node->assignment_statement.from) var to = generate(node->assignment_statement.to, true) var var_type = get_ast_type(node->assignment_statement.to) // STRUCT HERE if (var_type->is_object() && var_type->indirection == 0) emit_struct_copy(to, 0, from, 0, var_type) else emit_str(to, 0, from, size_to_operand_size(type_size(var_type))) reset_reg() return -1 } fun generate_if_statement(node: *ast_node): int { var cond_reg = generate(node->if_statement.condition) var jz_index = instructions.size emit_jz(cond_reg,0) reset_reg() generate(node->if_statement.then_part) reset_reg() if (node->if_statement.else_part) { var jmp_index = instructions.size emit_jmp(0) instructions[jz_index].jz.offset = instructions.size - jz_index generate(node->if_statement.else_part) instructions[jmp_index].jmp = instructions.size - jmp_index reset_reg() } else { instructions[jz_index].jz.offset = instructions.size - jz_index } return -1 } fun generate_while_loop(node: *ast_node): int { var top_index = instructions.size var cond_reg = generate(node->while_loop.condition) var jz_index = instructions.size emit_jz(cond_reg,0) reset_reg() fixup_break_addresses.push(vec()) fixup_continue_addresses.push(vec()) generate(node->while_loop.statement) reset_reg() emit_jmp(top_index - instructions.size) instructions[jz_index].jz.offset = instructions.size - jz_index fixup_continue_addresses.pop().for_each(fun(i: int) { instructions[i].jmp = instructions.size - i }) fixup_break_addresses.pop().for_each(fun(i: int) { instructions[i].jmp = instructions.size - i }) return -1 } fun generate_for_loop(node: *ast_node): int { if (node->for_loop.init) generate(node->for_loop.init) reset_reg() var top_index = instructions.size var cond_reg = 0 if (node->for_loop.condition) cond_reg = generate(node->for_loop.condition) else cond_reg = emit_imm(1) var jz_index = instructions.size emit_jz(cond_reg,0) reset_reg() fixup_break_addresses.push(vec()) fixup_continue_addresses.push(vec()) generate(node->for_loop.body) reset_reg() fixup_continue_addresses.pop().for_each(fun(i: int) { instructions[i].jmp = instructions.size - i }) if (node->for_loop.update) { generate(node->for_loop.update) reset_reg() } emit_jmp(top_index - instructions.size) instructions[jz_index].jz.offset = instructions.size - jz_index fixup_break_addresses.pop().for_each(fun(i: int) { instructions[i].jmp = instructions.size - i }) return -1 } fun generate_branching_statement(node: *ast_node): int { match(node->branching_statement.b_type) { branching_type::break_stmt() { fixup_break_addresses.top().add(instructions.size) emit_jmp(0) } branching_type::continue_stmt() { fixup_continue_addresses.top().add(instructions.size) emit_jmp(0) } } return -1 } fun generate_identifier(node: *ast_node, lvalue: bool): int { // STRUCT HERE var ident_type = get_ast_type(node) if (node->identifier.is_extern) { var addr_reg = -1 if (node->identifier.name == "stderr") addr_reg = emit_imm( (&stderr) cast long) else if (node->identifier.name == "stdin") addr_reg = emit_imm( (&stdin) cast long) else error("does not support external identifier " + node->identifier.name + " in bytecode") if (lvalue || (ident_type->is_object() && ident_type->indirection == 0)) return addr_reg else return emit_ldr(addr_reg, 0, size_to_operand_size(type_size(ident_type))) } else { if (lvalue || (ident_type->is_object() && ident_type->indirection == 0)) return emit_addi(1, functions.last().var_to_frame_offset[node]) else return emit_ldr(1, functions.last().var_to_frame_offset[node], size_to_operand_size(type_size(ident_type))) } } fun generate_return_statement(node: *ast_node): int { // STRUCT HERE if (node->return_statement.return_value) { var return_value_reg = generate(node->return_statement.return_value) var return_type = get_ast_type(node->return_statement.return_value) if (return_type->is_object() && return_type->indirection == 0) { // fix this up to be nicer // this is the hardcoded offset of the "first parameter" which // is the address into which we should save our resulting struct emit_struct_copy(emit_ldr(1, (register_size*2) cast int, size_to_operand_size(register_size)), 0, return_value_reg, 0, return_type) } else { emit_addi(2, return_value_reg, 0) } } emit_addi(0, 1, register_size) emit_ldr(1, 1, 0, operand_size::b64()) reset_reg() emit_ret() return -1 } fun generate_cast(node: *ast_node): int { return generate(node->cast.value) } fun generate_value(node: *ast_node): int { if (node->value.value_type->is_bool()) return emit_imm((node->value.string_value == "true") cast long) else if (node->value.value_type->base == base_type::character() && node->value.value_type->indirection == 1) return emit_imm((node->value.string_value.toCharArray()) cast long) else if (node->value.value_type->base == base_type::character() && node->value.value_type->indirection == 0) return emit_imm((node->value.string_value[0]) cast long) else if (node->value.value_type->base == base_type::floating() && node->value.value_type->indirection == 0) { var double_temp: double = ((string_to_double(node->value.string_value)) cast float) return emit_imm(*(&double_temp) cast *long) } else if (node->value.value_type->base == base_type::double_precision() && node->value.value_type->indirection == 0) { var double_temp: double = string_to_double(node->value.string_value) return emit_imm(*(&double_temp) cast *long) } else return emit_imm(string_to_num(node->value.string_value)) } fun generate_code_block(node: *ast_node): int { node->code_block.children.for_each(fun(child: *ast_node) { // registers aren't used between statements (only stack reg) reset_reg() generate(child) }) return -1 } // this generates the function as a value, not the actual function fun generate_function(node: *ast_node): int { if (!is_function(node)) error("trying to generate a function but isn't a function") fixup_function_addresses.add(make_pair(instructions.size,node)) return emit_imm(-2) } fun generate_function_call(node: *ast_node, lvalue: bool): int { var func = node->function_call.func if (is_function(func) && !func->function.is_extern && func->function.body_statement == null()) { var name = func->function.name var parameter_nodes = node->function_call.parameters // generate with lvalue=true to make return a pointer if (name == "&" && parameter_nodes.size == 1) return generate(parameter_nodes[0], true) if (name == "++" || name == "++p" || name == "--" || name == "--p") { var op_size = size_to_operand_size(type_size(get_ast_type(parameter_nodes[0]))) var addr_reg = generate(parameter_nodes[0], true) var value_reg = emit_ldr(addr_reg, 0, op_size) var mod_reg = -1 if (name[0] == '+') mod_reg = emit_addi(value_reg, 1) else mod_reg = emit_addi(value_reg, -1) emit_str(addr_reg, 0, mod_reg, op_size) // if preincrement, return modified value, else unmodified if (name.length() == 2) return mod_reg else return value_reg } // STRUCT HERE if (name == "." || name == "->") { var base = generate(parameter_nodes[0]) if (!is_identifier(parameter_nodes[1])) error("trying to access not an identifier") var val_type = get_ast_type(parameter_nodes[1]) var offset = offset_into_struct(get_ast_type(parameter_nodes[0]), parameter_nodes[1]) var member_ptr = emit_addi(base, offset) if (lvalue || (val_type->is_object() && val_type->indirection == 0)) return member_ptr return emit_ldr(member_ptr, 0, size_to_operand_size(type_size(get_ast_type(parameter_nodes[1])))) } var params = parameter_nodes.map(fun(n: *ast_node): int return generate(n);) var lhs_type = get_ast_type(parameter_nodes[0]) if (name == "+") { if (params.size == 1) { return emit_smul(params[0], emit_or(emit_gz(params[0]), emit_smul(emit_lz(params[0]), emit_imm(-1)))) } else { if (lhs_type->indirection == 0) { return emit_add(params[0], params[1]) } else { return emit_add(params[0], emit_smul(params[1], emit_imm(type_size(lhs_type->clone_with_decreased_indirection())))) } } } else if (name == "-") { if (params.size == 1) { return emit_addi(emit_not(params[0]), 1) } else { if (lhs_type->indirection == 0) { return emit_add(params[0], emit_addi(emit_not(params[1]), 1)) } else { return emit_add(params[0], emit_addi(emit_not(emit_smul(params[1], emit_imm(type_size(lhs_type->clone_with_decreased_indirection())))), 1)) } } } else if (name == "!") { return emit_ez(params[0]) } else if (name == "[]" || (name == "*" && params.size == 1)) { var derefed_type = lhs_type->clone_with_decreased_indirection() var derefed_size = type_size(derefed_type) if (name == "[]") { var offset_reg = params[1] if (derefed_size != 1) offset_reg = emit_smul(offset_reg, emit_imm(derefed_size)) var addr_reg = emit_add(params[0], offset_reg) if (lvalue || (derefed_type->is_object() && derefed_type->indirection == 0)) return addr_reg else return emit_ldr(addr_reg, 0, size_to_operand_size(derefed_size)) } if (name == "*") { if (lvalue || (derefed_type->is_object() && derefed_type->indirection == 0)) return params[0] else return emit_ldr(params[0], 0, size_to_operand_size(derefed_size)) } } else if (name == "==" || name == "<=" || name == ">=" || name == "!=" || name == "<" || name == ">") { var diff = emit_add(params[0], emit_addi(emit_not(params[1]), 1)) if (name == "==") return emit_ez(diff) if (name == "<=") return emit_or(emit_ez(diff), emit_lz(diff)) if (name == ">=") return emit_or(emit_ez(diff), emit_gz(diff)) if (name == "!=") return emit_ez(emit_ez(diff)) if (name == "<") return emit_lz(diff) if (name == ">") return emit_gz(diff) } else if (name == "|" || (name == "&" && params.size == 2) || name == "^" || name == "~") { if (name == "|") return emit_or(params[0], params[1]) if (name == "&") return emit_and(params[0], params[1]) if (name == "^") return emit_xor(params[0], params[1]) if (name == "~") return emit_not(params[0]) } else if (name == ">>" || name == "<<") { if (name == "<<") return emit_shl(params[0], params[1]) if (get_ast_type(parameter_nodes[0])->is_signed_type()) return emit_sar(params[0], params[1]) else return emit_shr(params[0], params[1]) } else if (name == "/" || name == "%" || (name == "*" && params.size == 2)) { if (get_ast_type(parameter_nodes[0])->is_signed_type()) { if (name == "/") return emit_sdiv(params[0], params[1]) if (name == "*") return emit_smul(params[0], params[1]) } else { if (name == "/") return emit_udiv(params[0], params[1]) if (name == "*") return emit_umul(params[0], params[1]) } if (name == "%") return emit_mod(params[0], params[1]) } error("unknown operator " + name) } else { // if this function returns a struct, we have to allocate space for it on the top of the stack // before we save registers, as it has to persist beyond the call (for whatever happens to it next) // We stick it in the function as if this is the declaration of a temporary variable, basically var return_type = get_ast_type(func)->return_type var struct_return_temp_ident = null() if (return_type->is_object() && return_type->indirection == 0) { struct_return_temp_ident = _ident("bytecode_struct_return_temp", return_type, null()) functions.last().frame_size += type_size(return_type) functions.last().var_to_frame_offset[struct_return_temp_ident] = -functions.last().frame_size } // save regs var save_til = peek_reg() var save_size = (save_til - 3) * register_size if (save_size != 0) { emit_addi(0, 0, -save_size) for (var i = 3; i < save_til; i++;) { emit_str(0, ((i-3)*register_size) cast int, i, operand_size::b64()) } } // STRUCT HERE // reverse order var total_param_size = 0 node->function_call.parameters.reverse().for_each(fun(child: *ast_node) { // push param onto stack var param_type = get_ast_type(child) var param_size = type_size(param_type) var param_reg = generate(child) emit_addi(0, 0, -param_size) total_param_size += param_size if (param_type->is_object() && param_type->indirection == 0) emit_struct_copy(0, 0, param_reg, 0, param_type) else emit_str(0, 0, param_reg, size_to_operand_size(param_size)) reset_reg(save_til) }) // pass the address to save the struct into as a parameter if (return_type->is_object() && return_type->indirection == 0) { emit_addi(0, 0, -(register_size) cast int) total_param_size += (register_size) cast int emit_str(0, 0, emit_addi(1, functions.last().var_to_frame_offset[struct_return_temp_ident]), size_to_operand_size(register_size)) reset_reg(save_til) } var return_reg = emit_call(generate(node->function_call.func)) if (return_type->is_object() && return_type->indirection == 0) { // if returned struct, then the struct was saved where we asked for it to be // return pointer with that address return_reg = emit_addi(1, functions.last().var_to_frame_offset[struct_return_temp_ident]) } else { // returning through r2 every time doesn't give unique regs for functions used together in an expression // so get a new one for this time return_reg = emit_addi(return_reg, 0) } emit_addi(0, 0, total_param_size) // restore regs for (var i = 3; i < save_til; i++;) { emit_ldr(i, 0, ((i-3)*register_size) cast int, operand_size::b64()) } if (save_size != 0) emit_addi(0, 0, save_size) return return_reg } } fun generate_compiler_intrinsic(node: *ast_node): int { if (node->compiler_intrinsic.intrinsic == "sizeof") { if (node->compiler_intrinsic.parameters.size || node->compiler_intrinsic.type_parameters.size != 1) error("wrong parameters to sizeof compiler intrinsic") return emit_imm(type_size(node->compiler_intrinsic.type_parameters[0])) } error("bad compiler intrinsic " + node->compiler_intrinsic.intrinsic) } fun generate(node: *ast_node): int return generate(node, false) fun generate(node: *ast_node, lvalue: bool): int { match (*node) { ast_node::declaration_statement(backing) return generate_declaration_statement(node) ast_node::assignment_statement(backing) return generate_assignment_statement(node) ast_node::if_statement(backing) return generate_if_statement(node) ast_node::while_loop(backing) return generate_while_loop(node) ast_node::for_loop(backing) return generate_for_loop(node) ast_node::function(backing) return generate_function(node) ast_node::function_call(backing) return generate_function_call(node, lvalue) ast_node::compiler_intrinsic(backing) return generate_compiler_intrinsic(node) ast_node::code_block(backing) return generate_code_block(node) ast_node::return_statement(backing) return generate_return_statement(node) ast_node::branching_statement(backing) return generate_branching_statement(node) ast_node::cast(backing) return generate_cast(node) ast_node::value(backing) return generate_value(node) ast_node::identifier(backing) return generate_identifier(node, lvalue) } error("Bad node") } fun get_name(node: *ast_node): str { var maybe_it = ast_name_map.get_ptr_or_null(node); if (maybe_it) return *maybe_it var result = get_ast_name(node) + get_id() if (is_function(node) && node->function.name == "main") result = "main" ast_name_map.set(node, result) return result } fun emit_struct_copy(to: int, to_offset: int, from: int, from_offset: int, t: *type) { for (var i = 0; i < t->type_def->type_def.variables.size; i++;) { var member_var = t->type_def->type_def.variables[i]->declaration_statement.identifier var member_type = get_ast_type(member_var) var member_offset = offset_into_struct(t, member_var) if (member_type->is_object() && member_type->indirection == 0) { emit_struct_copy(to, (to_offset + member_offset) cast int, from, (from_offset + member_offset) cast int, member_type) } else { var member_size = size_to_operand_size(type_size(member_type)) emit_str(to, (to_offset + member_offset) cast int, emit_ldr(from, (from_offset + member_offset) cast int, member_size), member_size) } } } fun emit_imm(value: ulong): int { return emit_imm((value) cast long); } fun emit_imm(value: int): int { return emit_imm((value) cast long); } fun emit_imm(value: long): int { var i: imm i.to_reg = get_reg() i.val = value instructions.add(byte_inst::imm(i)) return i.to_reg } fun emit_add(a: int, b: int): int { return emit_add(get_reg(), a, b); } fun emit_add(dest: int, a: int, b: int): int { var i: reg2 i.to_reg = dest i.a = a i.b = b instructions.add(byte_inst::add(i)) return i.to_reg } fun emit_addi(a: int, bi: long): int { return emit_addi(get_reg(), a, bi); } fun emit_addi(a: int, bi: int): int { return emit_addi(get_reg(), a, (bi) cast long); } fun emit_addi(dest: int, a: int, bi: int): int { return emit_addi(dest, a, (bi) cast long); } fun emit_addi(a: int, bi: ulong): int { return emit_addi(get_reg(), a, (bi) cast long); } fun emit_addi(dest: int, a: int, bi: ulong): int { return emit_addi(dest, a, (bi) cast long); } fun emit_addi(dest: int, a: int, bi: long): int { var i: reg1i i.to_reg = dest i.a = a i.bi = bi instructions.add(byte_inst::addi(i)) return i.to_reg } fun emit_umul(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::umul(i)) return i.to_reg } fun emit_smul(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::smul(i)) return i.to_reg } fun emit_udiv(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::udiv(i)) return i.to_reg } fun emit_sdiv(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::sdiv(i)) return i.to_reg } fun emit_mod(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::mod(i)) return i.to_reg } fun emit_shr(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::shr(i)) return i.to_reg } fun emit_sar(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::sar(i)) return i.to_reg } fun emit_shl(a: int, b: int): int { var i: reg2 i.to_reg = get_reg() i.a = a i.b = b instructions.add(byte_inst::shl(i)) return i.to_reg } fun emit_and(a: int, b: int): int { return emit_and(get_reg(), a, b); } fun emit_and(dest: int, a: int, b: int): int { var i: reg2 i.to_reg = dest i.a = a i.b = b instructions.add(byte_inst::and(i)) return i.to_reg } fun emit_or(a: int, b: int): int { return emit_or(get_reg(), a, b); } fun emit_or(dest: int, a: int, b: int): int { var i: reg2 i.to_reg = dest i.a = a i.b = b instructions.add(byte_inst::or(i)) return i.to_reg } fun emit_xor(a: int, b: int): int { return emit_xor(get_reg(), a, b); } fun emit_xor(dest: int, a: int, b: int): int { var i: reg2 i.to_reg = dest i.a = a i.b = b instructions.add(byte_inst::xor(i)) return i.to_reg } fun emit_not(a: int): int { return emit_not(get_reg(), a); } fun emit_not(dest: int, a: int): int { var i: reg1 i.to_reg = dest i.a = a instructions.add(byte_inst::not(i)) return i.to_reg } fun emit_gz(a: int): int { var i: reg1 i.to_reg = get_reg() i.a = a instructions.add(byte_inst::gz(i)) return i.to_reg } fun emit_lz(a: int): int { var i: reg1 i.to_reg = get_reg() i.a = a instructions.add(byte_inst::lz(i)) return i.to_reg } fun emit_ez(a: int): int { var i: reg1 i.to_reg = get_reg() i.a = a instructions.add(byte_inst::ez(i)) return i.to_reg } fun emit_ldr(reg: int, offset: int, size: operand_size): int { return emit_ldr(get_reg(), reg, offset, size); } fun emit_ldr(dest: int, reg: int, offset: int, size: operand_size): int { var l: reg1is l.reg = dest l.base_reg = reg l.offset = offset l.size = size instructions.add(byte_inst::ldr(l)) return l.reg } fun emit_str(to_reg: int, offset: int, from_reg: int, size: operand_size): int { var s: reg1is s.reg = from_reg s.offset = offset s.base_reg = to_reg s.size = size instructions.add(byte_inst::str(s)) return -1 } fun emit_jmp(offset: int): int { return emit_jmp((offset) cast long); } fun emit_jmp(offset: long): int { instructions.add(byte_inst::jmp(offset)) return -1 } fun emit_jz(reg: int, offset: int): int { var j: test j.reg = reg j.offset = offset instructions.add(byte_inst::jz(j)) return -1 } fun emit_ret(): int { instructions.add(byte_inst::ret()) return -1 } fun emit_call(reg: int): int { instructions.add(byte_inst::call(reg)) return 2 } // Stack ABI // it's system v x64ish, but all params passed on stack fun evaluate(): int { println("evaling main") println(bytecode_to_string(functions, instructions)) var main_entry = functions.find_first_satisfying(fun(block: bytecode_function): bool return block.name == "main";) var registers.construct(reg_max): vec registers.size = reg_max registers[1] = 0xdeadbeefcafebabe var stack_size = 8 * 1024 * 1024 var got_malloc = null() var stack = new(stack_size) + stack_size for (var i = 0; i < stack_size; i++;) stack[-i + -1] = 0 registers[0] = (stack-register_size) cast long // with the stack being zeroed out, this makes it a return address of 0 for (var i = main_entry.instruction_start; i < instructions.size; i++;) { /*println(str("evaling: ") + i + ": " + to_string(instructions[i]))*/ match(instructions[i]) { byte_inst::nop() {} byte_inst::imm(i) registers[i.to_reg] = i.val byte_inst::add(a) registers[a.to_reg] = registers[a.a] + registers[a.b] byte_inst::addi(a) registers[a.to_reg] = registers[a.a] + a.bi byte_inst::umul(a) registers[a.to_reg] = (registers[a.a]) cast ulong * (registers[a.b]) cast ulong byte_inst::smul(a) registers[a.to_reg] = registers[a.a] * registers[a.b] byte_inst::udiv(a) registers[a.to_reg] = (registers[a.a]) cast ulong / (registers[a.b]) cast ulong byte_inst::sdiv(a) registers[a.to_reg] = registers[a.a] / registers[a.b] byte_inst::mod(a) registers[a.to_reg] = (registers[a.a]) cast ulong % (registers[a.b]) cast ulong byte_inst::shr(a) registers[a.to_reg] = (registers[a.a]) cast ulong >>(registers[a.b]) cast ulong byte_inst::sar(a) registers[a.to_reg] = registers[a.a] >> registers[a.b] byte_inst::shl(a) registers[a.to_reg] = (registers[a.a]) cast ulong <<(registers[a.b]) cast ulong byte_inst::and(a) registers[a.to_reg] = registers[a.a] & registers[a.b] byte_inst::or(a) registers[a.to_reg] = registers[a.a] | registers[a.b] byte_inst::xor(a) registers[a.to_reg] = registers[a.a] ^ registers[a.b] byte_inst::not(a) registers[a.to_reg] = ~registers[a.a] byte_inst::gz(a) registers[a.to_reg] = registers[a.a] > 0 byte_inst::lz(a) registers[a.to_reg] = registers[a.a] < 0 byte_inst::ez(a) registers[a.to_reg] = registers[a.a] == 0 byte_inst::ldr(l) match (l.size) { operand_size::b8() registers[l.reg] = *(registers[l.base_reg] + l.offset) cast *char operand_size::b16() registers[l.reg] = *(registers[l.base_reg] + l.offset) cast *short operand_size::b32() registers[l.reg] = *(registers[l.base_reg] + l.offset) cast *int operand_size::b64() registers[l.reg] = *(registers[l.base_reg] + l.offset) cast *long } byte_inst::str(s) match (s.size) { operand_size::b8() *(registers[s.base_reg] + s.offset) cast *uchar = registers[s.reg] operand_size::b16() *(registers[s.base_reg] + s.offset) cast *ushort = registers[s.reg] operand_size::b32() *(registers[s.base_reg] + s.offset) cast *uint = registers[s.reg] operand_size::b64() *(registers[s.base_reg] + s.offset) cast *ulong = registers[s.reg] } byte_inst::jmp(offset) i += offset - 1 // to counteract pc inc byte_inst::jz(j) if (registers[j.reg] == 0) i += j.offset - 1 // to counteract pc inc byte_inst::call(c) { var func_start = registers[c] // extern call /*ext fun malloc(size: ulong): *void*/ /*ext fun free(size: *void)*/ /*ext fun memmove(dest: *void, src: *void, size: ulong): *void*/ /*ext fun printf(fmt_str: *char, ...): int*/ /*ext fun fprintf(file: *void, format: *char, ...): int*/ /*ext fun fflush(file: int): int*/ /*ext fun fgets(buff: *char, size: int, file: *void): *char*/ /*ext var stderr: *void*/ /*ext var stdin: *void*/ /*ext fun fopen(path: *char, mode: *char): *void*/ /*ext fun fclose(file: *void): int*/ /*ext fun ftell(file: *void): long*/ /*ext fun fseek(file: *void, offset: long, whence: int): int*/ /*ext fun fread(ptr: *void, size: ulong, nmemb: ulong, file: *void): ulong*/ /*ext fun fwrite(ptr: *void, size: ulong, nmemb: ulong, file: *void): ulong*/ /*ext fun system(call_string: *char): int*/ /*ext fun exit(code: int):void*/ /*ext fun popen(command: *char, mode: *char): *void*/ /*ext fun pclose(file: *void): int*/ /*ext fun snprintf(to_str: *char, num: ulong, format: *char, ...): int*/ if (func_start < 0) { if (func_start == malloc_addr) registers[2] = (malloc(*(registers[0]) cast *ulong)) cast long else if (func_start == free_addr) free(*(registers[0]) cast **void) else if (func_start == memmove_addr) registers[2] = (memmove(*(registers[0]) cast **void, *(registers[0] + #sizeof<*void>) cast **void, *(registers[0] + 2*#sizeof<*void>) cast *ulong)) cast long else if (func_start == printf_addr) registers[2] = (printf( *(registers[0]) cast **char, *(registers[0] + #sizeof<*char>) cast **char)) cast long else if (func_start == fprintf_addr) registers[2] = (fprintf(*(registers[0]) cast **void, *(registers[0] + #sizeof<*void>) cast **char, *(registers[0] + #sizeof<*void> + #sizeof<*char>) cast **char)) cast long else if (func_start == fflush_addr) registers[2] = (fflush( *(registers[0]) cast *int)) cast long else if (func_start == fgets_addr) registers[2] = (fgets( *(registers[0]) cast **char, *(registers[0] + #sizeof<*char>) cast *int, *(registers[0] + #sizeof<*char> + #sizeof) cast **void)) cast long else if (func_start == fopen_addr) registers[2] = (fopen( *(registers[0]) cast **char, *(registers[0] + #sizeof<*char>) cast **char)) cast long else if (func_start == fclose_addr) registers[2] = (fclose( *(registers[0]) cast **void)) cast long else if (func_start == ftell_addr) registers[2] = (ftell( *(registers[0]) cast **void)) cast long else if (func_start == fseek_addr) registers[2] = (fseek( *(registers[0]) cast **void, *(registers[0] + #sizeof<*void>) cast *long, *(registers[0] + #sizeof<*void> + #sizeof) cast *int)) cast long else if (func_start == fread_addr) registers[2] = (fread( *(registers[0]) cast **void, *(registers[0] + #sizeof<*void>) cast *ulong, *(registers[0] + #sizeof<*void> + #sizeof) cast *ulong, *(registers[0] + #sizeof<*void>+2*#sizeof) cast **void)) cast long else if (func_start == fwrite_addr) registers[2] = (fwrite( *(registers[0]) cast **void, *(registers[0] + #sizeof<*void>) cast *ulong, *(registers[0] + #sizeof<*void> + #sizeof) cast *ulong, *(registers[0] + #sizeof<*void>+2*#sizeof) cast **void)) cast long else if (func_start == system_addr) registers[2] = (system( *(registers[0]) cast **char)) cast long else if (func_start == exit_addr) exit(*(registers[0]) cast *int) else if (func_start == popen_addr) registers[2] = (popen( *(registers[0]) cast **char, *(registers[0] + #sizeof<*char>) cast **char)) cast long else if (func_start == pclose_addr) registers[2] = (pclose( *(registers[0]) cast **void)) cast long else if (func_start == snprintf_addr) registers[2] =(snprintf(*(registers[0]) cast **char, *(registers[0] + #sizeof<*char>) cast *ulong, *(registers[0] + #sizeof<*char> + #sizeof) cast **char, *(registers[0] + #sizeof<*char> + #sizeof + #sizeof<*char>) cast *double)) cast long else error(str("bad extern call number") + func_start) } else { /*registers[0] -= register_size*/ registers[0] = registers[0] - register_size *(registers[0]) cast *long = i + 1 i = func_start - 1 /*print("call!")*/ /*println("call: " + functions.find_first_satisfying(fun(f: bytecode_function): bool return f.instruction_start == func_start;).name)*/ /*println("first part of memory is (after push)")*/ /*for (var i = 0; i < 8*8; i+=8;) {*/ /*print(str("-") + i + str(": "))*/ /*for (var j = 0; j < 8; j++;) {*/ /*if (j == 4)*/ /*print(" ")*/ /*print(*(stack - (i+j)*#sizeof - 1) cast *uchar)*/ /*print(" ")*/ /*}*/ /*println()*/ /*}*/ /*println("Done")*/ } } byte_inst::ret() { var pc = *(registers[0]) cast *long /*registers[0] += register_size*/ registers[0] = registers[0] + register_size /*print("returning!")*/ /*println("first part of memory is")*/ /*for (var i = 0; i < 8*8; i+=8;) {*/ /*print(str("-") + i + str(": "))*/ /*for (var j = 0; j < 8; j++;) {*/ /*if (j == 4)*/ /*print(" ")*/ /*print(*(stack - (i+j)*#sizeof - 1) cast *uchar)*/ /*print(" ")*/ /*}*/ /*println()*/ /*}*/ /*println("Done")*/ if (pc == 0) { /*println(str("got malloc is ") + *(got_malloc) cast *int)*/ var value = registers[2] println(str("returning from main, value is ") + value) return value } else { i = pc - 1 /*println(str("returning to ") + pc)*/ } } } } return -1 } }