Files
kraken/stdlib/bytecode_generator.krak

497 lines
19 KiB
Plaintext
Raw Normal View History

2018-02-02 00:26:31 -05:00
import io:*
import mem:*
import map:*
import hash_map:*
import stack:*
import string:*
import util:*
import tree:*
import symbol:*
import ast_nodes:*
// for error with syntax tree
import pass_common:*
import poset:*
fun type_size(t: *type): ulong
return type_size_and_alignment(t).first
fun type_size_and_alignment(t: *type): pair<ulong,ulong> {
if (t->indirection)
return make_pair(#sizeof<*void>, #sizeof<*void>)
match (t->base) {
base_type::object() {
var total_size: ulong = 0
var max_size: ulong = 0
var max_align: ulong = 0
t->type_def->type_def.variables.for_each(fun(i: *ast_node) {
var individual = type_size_and_alignment(i->declaration_statement.identifier->identifier.type)
max_size = max(max_size, individual.first)
max_align = max(max_align, individual.second)
// increase total size by the individual size + padding to get alignment
var padding = 0
if (individual.second != 0)
padding = (individual.second - (total_size % individual.second)) % individual.second
total_size += individual.first + padding
})
if (t->type_def->type_def.is_union)
total_size = max_size
// pad the end so that consecutive objects in memory are aligned
if (max_align != 0)
total_size += (max_align - (total_size % max_align)) % max_align
return make_pair(total_size, max_align)
}
base_type::function() return make_pair(#sizeof<*void>, #sizeof<*void>)
base_type::boolean() return make_pair(#sizeof<bool>, #sizeof<bool>)
base_type::character() return make_pair(#sizeof<char>, #sizeof<char>)
base_type::ucharacter() return make_pair(#sizeof<uchar>, #sizeof<uchar>)
base_type::short_int() return make_pair(#sizeof<short>, #sizeof<short>)
base_type::ushort_int() return make_pair(#sizeof<ushort>, #sizeof<ushort>)
base_type::integer() return make_pair(#sizeof<int>, #sizeof<int>)
base_type::uinteger() return make_pair(#sizeof<uint>, #sizeof<uint>)
base_type::long_int() return make_pair(#sizeof<long>, #sizeof<long>)
base_type::ulong_int() return make_pair(#sizeof<ulong>, #sizeof<ulong>)
base_type::floating() return make_pair(#sizeof<float>, #sizeof<float>)
base_type::double_precision() return make_pair(#sizeof<double>, #sizeof<double>)
}
error(string("Invalid type for type_size: ") + t->to_string())
}
fun offset_into_struct(struct_type: *type, ident: *ast_node): ulong {
var offset: ulong = 0
if (struct_type->type_def->type_def.is_union)
return offset
for (var i = 0; i < struct_type->type_def->type_def.variables.size; i++;) {
var size_and_align = type_size_and_alignment(struct_type->type_def->type_def.variables[i]->declaration_statement.identifier->identifier.type)
var align = size_and_align.second
if (align != 0)
offset += (align - (offset % align)) % align
if (struct_type->type_def->type_def.variables[i]->declaration_statement.identifier == ident)
break
else
offset += size_and_align.first
}
return offset
}
2018-02-02 00:26:31 -05:00
adt byte_inst {
2018-02-03 18:53:13 -05:00
nop,
imm: imm,
add: add,
ldr: ldr,
str: str,
jmp: jmp,
jnz: jnz,
2018-02-03 18:53:13 -05:00
call,
ret
2018-02-03 18:53:13 -05:00
}
obj imm {
var reg: int
var val: int
}
obj add {
var to_reg: int
var a: int
var b: int
}
obj ldr {
var to_reg: int
var from_reg: int
var offset: int
2018-02-03 18:53:13 -05:00
}
obj str {
2018-02-03 18:53:13 -05:00
var to_reg: int
var offset: int
2018-02-03 18:53:13 -05:00
var from_reg: int
}
obj jmp {
var offset: int
}
obj jnz {
var reg: int
var offset: int
}
2018-02-02 00:26:31 -05:00
2018-02-03 18:53:13 -05:00
fun to_string(b: byte_inst): string {
match (b) {
byte_inst::nop() return string("nop")
byte_inst::imm(i) return string("r") + i.reg + " = imm " + i.val
byte_inst::add(a) return string("r") + a.to_reg + " = r" + a.a + " + r" + a.b
byte_inst::ldr(l) return string("r") + l.to_reg + " = ldr r" + l.from_reg + " (" + l.offset + ")"
byte_inst::str(s) return string("str(r") + s.to_reg + "(" + s.offset + ") <= r" + s.from_reg + ")"
byte_inst::jmp(j) return string("jmp(pc += ") + j.offset + ")"
byte_inst::jnz(j) return string("jmp(r") + j.reg + " != 0, pc += " + j.offset + ")"
byte_inst::call() return string("call")
byte_inst::ret() return string("ret")
2018-02-03 18:53:13 -05:00
}
return string("Missed byte_inst case in to_string")
}
fun bytecode_to_string(bytecode: ref vector<bytecode_function>): string {
return string("\n").join(bytecode.map(fun(bb: ref bytecode_function): string return bb.to_string();))
2018-02-03 18:53:13 -05:00
}
fun bytecode_function(name: ref string): bytecode_function {
var to_ret.construct(name): bytecode_function
2018-02-03 18:53:13 -05:00
return to_ret
}
obj bytecode_function (Object) {
2018-02-03 18:53:13 -05:00
var name: string
var instructions: vector<byte_inst>
var var_to_frame_offset: map<*ast_node, int>
var frame_size: int
2018-02-03 18:53:13 -05:00
fun construct(): *bytecode_function {
2018-02-03 18:53:13 -05:00
instructions.construct()
name.construct()
var_to_frame_offset.construct()
frame_size = 0
2018-02-03 18:53:13 -05:00
return this
}
fun construct(name_in: ref string): *bytecode_function {
2018-02-03 18:53:13 -05:00
instructions.construct()
name.copy_construct(&name_in)
var_to_frame_offset.construct()
frame_size = 0
2018-02-03 18:53:13 -05:00
return this
}
fun copy_construct(old: *bytecode_function) {
2018-02-03 18:53:13 -05:00
instructions.copy_construct(&old->instructions)
name.copy_construct(&old->name)
var_to_frame_offset.copy_construct(&old->var_to_frame_offset)
frame_size = old->frame_size
2018-02-03 18:53:13 -05:00
}
fun operator=(other: ref bytecode_function) {
2018-02-03 18:53:13 -05:00
destruct()
copy_construct(&other)
}
fun destruct() {
instructions.destruct()
name.destruct()
var_to_frame_offset.destruct()
2018-02-03 18:53:13 -05:00
}
fun to_string(): string {
var res = name + "(frame size " + frame_size + "):\n"
res += "\t frame layout\n"
var_to_frame_offset.for_each(fun(n: *ast_node, o: int) {
res += "\t\t" + n->identifier.name + ": r0 + " + o
})
res += "\n\t bytecode\n"
var pc = 0
2018-02-03 18:53:13 -05:00
instructions.for_each(fun(b: byte_inst) {
res += string("\t\t") + pc++ + string(": ") + to_string(b) + "\n"
2018-02-03 18:53:13 -05:00
})
return res
}
2018-02-02 00:26:31 -05:00
}
obj bytecode_generator (Object) {
2018-02-03 18:53:13 -05:00
var reg_counter: int
var reg_max: int
2018-02-02 00:26:31 -05:00
var id_counter: int
var ast_name_map: hash_map<*ast_node, string>
var functions: vector<bytecode_function>
2018-02-02 00:26:31 -05:00
fun construct(): *bytecode_generator {
id_counter = 0
ast_name_map.construct()
functions.construct()
reg_counter = 1
reg_max = 1
2018-02-02 00:26:31 -05:00
return this
}
fun copy_construct(old: *bytecode_generator) {
2018-02-03 18:53:13 -05:00
reg_counter = old->reg_counter
reg_max = old->reg_max
2018-02-02 00:26:31 -05:00
id_counter = old->id_counter
ast_name_map.copy_construct(&old->ast_name_map)
functions.copy_construct(&old->functions)
2018-02-02 00:26:31 -05:00
}
fun operator=(other: ref bytecode_generator) {
destruct()
copy_construct(&other)
}
fun destruct() {
ast_name_map.destruct()
functions.destruct()
2018-02-02 00:26:31 -05:00
}
fun get_id(): string return to_string(id_counter++);
2018-02-03 18:53:13 -05:00
fun get_reg(): int return reg_counter++;
fun reset_reg() {
if (reg_counter > reg_max) {
reg_max = reg_counter
}
reg_counter = 1
}
fun generate_bytecode(name_ast_map: map<string, pair<*tree<symbol>,*ast_node>>): vector<bytecode_function> {
2018-02-02 00:26:31 -05:00
// iterate through asts
name_ast_map.for_each(fun(name: string, tree_pair: pair<*tree<symbol>,*ast_node>) {
// iterate through children for each ast
// do lambdas seperatly, so we can reconstitute the enclosing object if it has one
tree_pair.second->translation_unit.lambdas.for_each(fun(child: *ast_node) {
generate_function_definition(child)
})
tree_pair.second->translation_unit.children.for_each(fun(child: *ast_node) {
match (*child) {
ast_node::declaration_statement(backing) generate_declaration_statement(child)
ast_node::compiler_intrinsic(backing) generate_compiler_intrinsic(child)
ast_node::function(backing) generate_function_definition(child)
ast_node::template(backing) {
backing.instantiated.for_each(fun(node: *ast_node) {
match (*node) {
ast_node::function(backing) generate_function_definition(node)
ast_node::type_def(backing) {
backing.methods.for_each(fun(method: *ast_node) {
if (is_template(method))
method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);)
else
generate_function_definition(method)
})
}
}
})
}
ast_node::type_def(backing) {
backing.methods.for_each(fun(method: *ast_node) {
if (is_template(method))
method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);)
else
generate_function_definition(method)
})
}
}
})
})
return functions
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_function_definition(node: *ast_node): int {
functions.add(bytecode_function(get_name(node)))
node->function.parameters.for_each(fun(p: *ast_node) {
functions.last().var_to_frame_offset[p] = functions.last().frame_size
functions.last().frame_size += type_size(p->identifier.type)
})
2018-02-03 18:53:13 -05:00
generate(node->function.body_statement)
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_declaration_statement(node: *ast_node): int {
2018-02-02 00:26:31 -05:00
var identifier = node->declaration_statement.identifier
var ident_type = identifier->identifier.type
functions.last().var_to_frame_offset[identifier] = functions.last().frame_size
functions.last().frame_size += type_size(ident_type)
2018-02-02 00:26:31 -05:00
if (node->declaration_statement.expression) {
emit_str(0, functions.last().var_to_frame_offset[identifier], generate(node->declaration_statement.expression))
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_assignment_statement(node: *ast_node): int {
var to = generate(node->assignment_statement.to, true)
2018-02-03 18:53:13 -05:00
var from = generate(node->assignment_statement.from)
emit_str(to, 0, from)
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_if_statement(node: *ast_node): int {
var cond_reg = generate(node->if_statement.condition)
var jnz_index = functions.last().instructions.size
emit_jnz(cond_reg,0)
2018-02-03 18:53:13 -05:00
generate(node->if_statement.then_part)
if (node->if_statement.else_part) {
var jmp_index = functions.last().instructions.size
emit_jmp(0)
functions.last().instructions[jnz_index].jnz.offset = functions.last().instructions.size - jnz_index
2018-02-03 18:53:13 -05:00
generate(node->if_statement.else_part)
functions.last().instructions[jmp_index].jmp.offset = functions.last().instructions.size - jmp_index
} else {
functions.last().instructions[jnz_index].jnz.offset = functions.last().instructions.size - jnz_index
}
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_while_loop(node: *ast_node): int {
generate(node->while_loop.condition)
generate(node->while_loop.statement)
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_for_loop(node: *ast_node): int {
if (node->for_loop.init)
generate(node->for_loop.init)
if (node->for_loop.condition)
generate(node->for_loop.condition)
if (node->for_loop.update)
generate(node->for_loop.update)
generate(node->for_loop.body)
return -1
2018-02-02 00:26:31 -05:00
}
fun generate_identifier(node: *ast_node, lvalue: bool): int {
if (lvalue) {
return emit_add(0, emit_imm(functions.last().var_to_frame_offset[node]))
} else {
return emit_ldr(0, functions.last().var_to_frame_offset[node])
}
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_return_statement(node: *ast_node): int {
if (node->return_statement.return_value) {
emit_str(0, -(type_size(get_ast_type(node->return_statement.return_value))) cast int / 4, generate(node->return_statement.return_value))
emit_ret()
} else {
emit_ret()
}
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_branching_statement(node: *ast_node): int {
2018-02-02 00:26:31 -05:00
match(node->branching_statement.b_type) {
branching_type::break_stmt() functions.last().instructions.add(byte_inst::nop())
branching_type::continue_stmt() functions.last().instructions.add(byte_inst::nop())
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_cast(node: *ast_node): int {
return generate(node->cast.value)
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_value(node: *ast_node): int {
if (node->value.value_type->is_bool())
return emit_imm((node->value.string_value == "true") cast int)
else
return emit_imm(string_to_num<int>(node->value.string_value))
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_code_block(node: *ast_node): int {
node->code_block.children.for_each(fun(child: *ast_node) {
// registers aren't used between statements (only stack reg)
reset_reg()
generate(child)
})
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
// this generates the function as a value, not the actual function
2018-02-03 18:53:13 -05:00
fun generate_function(node: *ast_node): int {
return emit_imm(-2)
2018-02-02 00:26:31 -05:00
}
fun generate_function_call(node: *ast_node, lvalue: bool): int {
2018-02-03 18:53:13 -05:00
node->function_call.parameters.for_each(fun(child: *ast_node) generate(child);)
return emit_call()
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
fun generate_compiler_intrinsic(node: *ast_node): int {
functions.last().instructions.add(byte_inst::nop())
2018-02-03 18:53:13 -05:00
return -1
2018-02-02 00:26:31 -05:00
}
fun generate(node: *ast_node): int return generate(node, false)
fun generate(node: *ast_node, lvalue: bool): int {
2018-02-02 00:26:31 -05:00
match (*node) {
2018-02-03 18:53:13 -05:00
ast_node::declaration_statement(backing) return generate_declaration_statement(node)
ast_node::assignment_statement(backing) return generate_assignment_statement(node)
ast_node::if_statement(backing) return generate_if_statement(node)
ast_node::while_loop(backing) return generate_while_loop(node)
ast_node::for_loop(backing) return generate_for_loop(node)
ast_node::function(backing) return generate_function(node)
ast_node::function_call(backing) return generate_function_call(node, lvalue)
2018-02-03 18:53:13 -05:00
ast_node::compiler_intrinsic(backing) return generate_compiler_intrinsic(node)
ast_node::code_block(backing) return generate_code_block(node)
ast_node::return_statement(backing) return generate_return_statement(node)
ast_node::branching_statement(backing) return generate_branching_statement(node)
ast_node::cast(backing) return generate_cast(node)
ast_node::value(backing) return generate_value(node)
ast_node::identifier(backing) return generate_identifier(node, lvalue)
2018-02-02 00:26:31 -05:00
}
2018-02-03 18:53:13 -05:00
error("Bad node")
2018-02-02 00:26:31 -05:00
}
fun get_name(node: *ast_node): string {
var maybe_it = ast_name_map.get_ptr_or_null(node);
if (maybe_it)
return *maybe_it
2018-02-03 18:53:13 -05:00
var result = get_ast_name(node) + get_id()
if (is_function(node) && node->function.name == "main")
result = "main"
2018-02-02 00:26:31 -05:00
ast_name_map.set(node, result)
return result
}
2018-02-03 18:53:13 -05:00
fun emit_imm(value: int): int {
var i: imm
i.reg = get_reg()
i.val = value
functions.last().instructions.add(byte_inst::imm(i))
2018-02-03 18:53:13 -05:00
return i.reg
}
fun emit_add(a: int, b: int): int {
var i: add
i.to_reg = get_reg()
i.a = a
i.b = b
functions.last().instructions.add(byte_inst::add(i))
return i.to_reg
}
fun emit_ldr(reg: int, offset: int): int {
var l: ldr
l.to_reg = get_reg()
l.from_reg = reg
l.offset = offset
functions.last().instructions.add(byte_inst::ldr(l))
return l.to_reg
2018-02-03 18:53:13 -05:00
}
fun emit_str(to_reg: int, offset: int, from_reg: int): int {
var s: str
2018-02-03 18:53:13 -05:00
s.to_reg = to_reg
s.offset = offset
2018-02-03 18:53:13 -05:00
s.from_reg = from_reg
functions.last().instructions.add(byte_inst::str(s))
2018-02-03 18:53:13 -05:00
return -1
}
fun emit_jmp(offset: int): int {
var j: jmp
j.offset = offset
functions.last().instructions.add(byte_inst::jmp(j))
return -1
}
fun emit_jnz(reg: int, offset: int): int {
var j: jnz
j.reg = reg
j.offset = offset
functions.last().instructions.add(byte_inst::jnz(j))
return -1
}
fun emit_ret(): int {
functions.last().instructions.add(byte_inst::ret())
2018-02-03 18:53:13 -05:00
return -1
}
fun emit_call(): int {
functions.last().instructions.add(byte_inst::call())
2018-02-03 18:53:13 -05:00
return -1
}
fun evaluate(): int {
println("evaling main")
var main_entry = functions.find_first_satisfying(fun(block: bytecode_function): bool return block.name == "main";)
var registers.construct(reg_counter): vector<int>
registers.size = reg_max
registers[0] = 1 // start RS at 1, as main returns an int of size 1
var stack_mem.construct(10): vector<int>
stack_mem.size = 10
stack_mem.for_each(fun(i: ref int) { i = 0; })
for (var i = 0; i < main_entry.instructions.size; i++;) {
println(string("evaling: ") + i + ": " + to_string(main_entry.instructions[i]))
match(main_entry.instructions[i]) {
byte_inst::nop() {}
byte_inst::imm(i) registers[i.reg] = i.val
byte_inst::add(a) registers[a.to_reg] = registers[a.a] + registers[a.b]
byte_inst::ldr(l) registers[l.to_reg] = stack_mem[registers[l.from_reg] + l.offset]
byte_inst::str(s) stack_mem[registers[s.to_reg] + s.offset] = registers[s.from_reg]
byte_inst::jmp(j) i += j.offset - 1 // to counteract pc inc
byte_inst::jnz(j) if (registers[j.reg] != 0)
i += j.offset - 1 // to counteract pc inc
byte_inst::call() error("call")
/*byte_inst::ret() return stack_mem[registers[0]]*/
byte_inst::ret() {
print("returning! return value is\n\t")
/*println(stack_mem[registers[0]])*/
println(stack_mem[0])
println("total memory is")
stack_mem.for_each(fun(i: int) {
println(string("\t") + i)
})
/*return stack_mem[registers[0]]*/
return stack_mem[0]
}
}
}
return -1
}
2018-02-02 00:26:31 -05:00
}