From 7850f1129766607aaf45e23c3f054f4a498ce97f Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 2 Feb 2018 00:26:31 -0500 Subject: [PATCH] very basic groundwork for bytecode --- kraken.krak | 37 ++++-- stdlib/bytecode_generator.krak | 189 +++++++++++++++++++++++++++++++ stdlib/function_value_lower.krak | 11 ++ stdlib/os.krak | 8 ++ 4 files changed, 235 insertions(+), 10 deletions(-) create mode 100644 stdlib/bytecode_generator.krak diff --git a/kraken.krak b/kraken.krak index 809871d..8378b82 100644 --- a/kraken.krak +++ b/kraken.krak @@ -8,6 +8,7 @@ import symbol:* import tree:* import serialize:* import interpreter:* +import bytecode_generator:* import os:* import ast_transformation:* import importer:* @@ -25,6 +26,7 @@ import vector:* import set:* fun main(argc: int, argv: **char):int { + var curr_time = get_time() // delay construction until we either load it or copy construct it var gram: grammer var base_dir = string("/").join(string(argv[0]).split('/').slice(0,-2)) @@ -103,6 +105,7 @@ fun main(argc: int, argv: **char):int { println("calculated, writing out") write_file_binary(compiled_name, serialize(compiled_version) + serialize(file_contents) + serialize(gram)) println("done writing") + curr_time = split(curr_time, "Grammer regen") } var lex = lexer(gram.terminals) @@ -140,57 +143,71 @@ fun main(argc: int, argv: **char):int { var executable_name = string(".").join(kraken_file_name.split('.').slice(0,-2)) if (positional_args.size > 1) executable_name = positional_args[1] + curr_time = split(curr_time, "Finish setup") var name_ast_map = import(kraken_file_name, parsers, ast_pass, vector(string(), base_dir + "/stdlib/")) + curr_time = split(curr_time, "Import") // Passes /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ - printlnerr("Lowering ADTs") + /*printlnerr("Lowering ADTs")*/ adt_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering ADTs") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ - printlnerr("Lowering Objects") + /*printlnerr("Lowering Objects")*/ obj_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering Objects") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ - printlnerr("Lowering Defer") + /*printlnerr("Lowering Defer")*/ defer_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering Defer") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ // Should come after lowering of ADTs and before lowering of Refs - printlnerr("Lowering Function Values (Lambdas, etc)") + /*printlnerr("Lowering Function Values (Lambdas, etc)")*/ function_value_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering Function Values (Lambdas, etc)") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ - printlnerr("Lowering Ref") + /*printlnerr("Lowering Ref")*/ ref_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering Ref") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ // Lowers #ctce and the current #ctce_pass - printlnerr("Lowering CTCE") + /*printlnerr("Lowering CTCE")*/ ctce_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering CTCE") /*printlnerr("Counting Nodes")*/ /*node_counter(&name_ast_map, &ast_pass.ast_to_syntax)*/ // Makes sure that & always takes reference to a variable - printlnerr("Lowering & to always have variable") + /*printlnerr("Lowering & to always have variable")*/ address_of_ensure_variable_lower(&name_ast_map, &ast_pass.ast_to_syntax) + curr_time = split(curr_time, "Lowering & to always have variable") if (interpret_instead) { - printlnerr("Interpreting!") - call_main(name_ast_map) + /*printlnerr("Interpreting!")*/ + /*call_main(name_ast_map)*/ + printlnerr("Generating bytecode!") + var bytecode = generate_bytecode(name_ast_map, ast_pass.ast_to_syntax) + printlnerr(bytecode_to_string(bytecode)) } else { if (line_ctrl) { printlnerr("running C-specific passes") printlnerr("running #line") c_line_control(&name_ast_map, &ast_pass.ast_to_syntax) } - printlnerr("Generating C") + /*printlnerr("Generating C")*/ var c_generator.construct(): c_generator var c_output_pair = c_generator.generate_c(name_ast_map, ast_pass.ast_to_syntax) var kraken_c_output_name = kraken_file_name + ".c" write_file(kraken_c_output_name, c_output_pair.first) + curr_time = split(curr_time, "Generating C") if (compile_c) { var compile_string = "cc -g " + opt_str + " -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-incompatible-pointer-types -std=c99 " + c_output_pair.second + " " + kraken_c_output_name + " -o " + executable_name printlnerr(compile_string) system(compile_string) + curr_time = split(curr_time, "Compiling C") } } diff --git a/stdlib/bytecode_generator.krak b/stdlib/bytecode_generator.krak new file mode 100644 index 0000000..989f661 --- /dev/null +++ b/stdlib/bytecode_generator.krak @@ -0,0 +1,189 @@ +import io:* +import mem:* +import map:* +import hash_map:* +import stack:* +import string:* +import util:* +import tree:* +import symbol:* +import ast_nodes:* +// for error with syntax tree +import pass_common:* +import poset:* + + +adt byte_inst { + nop +} + +fun bytecode_to_string(bytecode: ref vector): string { + var res = string() + bytecode.for_each(fun(b: byte_inst) { + match (b) { + byte_inst::nop() res += "nop" + } + res += "\n" + }) + return res +} + +fun generate_bytecode(name_ast_map: ref map,*ast_node>>, ast_to_syntax_in: ref map<*ast_node, *tree> ): vector { + var generator.construct(): bytecode_generator + return generator.generate_bytecode(name_ast_map, ast_to_syntax_in) +} + +obj bytecode_generator (Object) { + var id_counter: int + var ast_name_map: hash_map<*ast_node, string> + var bytecode: vector + fun construct(): *bytecode_generator { + id_counter = 0 + ast_name_map.construct() + bytecode.construct() + + return this + } + fun copy_construct(old: *bytecode_generator) { + id_counter = old->id_counter + ast_name_map.copy_construct(&old->ast_name_map) + bytecode.copy_construct(&old->bytecode) + } + fun operator=(other: ref bytecode_generator) { + destruct() + copy_construct(&other) + } + fun destruct() { + ast_name_map.destruct() + bytecode.destruct() + } + fun get_id(): string return to_string(id_counter++); + fun generate_bytecode(name_ast_map: map,*ast_node>>, ast_to_syntax_in: map<*ast_node, *tree> ): vector { + + // iterate through asts + name_ast_map.for_each(fun(name: string, tree_pair: pair<*tree,*ast_node>) { + // iterate through children for each ast + // do lambdas seperatly, so we can reconstitute the enclosing object if it has one + tree_pair.second->translation_unit.lambdas.for_each(fun(child: *ast_node) { + generate_function_definition(child) + }) + tree_pair.second->translation_unit.children.for_each(fun(child: *ast_node) { + match (*child) { + ast_node::declaration_statement(backing) generate_declaration_statement(child) + ast_node::compiler_intrinsic(backing) generate_compiler_intrinsic(child) + ast_node::function(backing) generate_function_definition(child) + ast_node::template(backing) { + backing.instantiated.for_each(fun(node: *ast_node) { + match (*node) { + ast_node::function(backing) generate_function_definition(node) + ast_node::type_def(backing) { + backing.methods.for_each(fun(method: *ast_node) { + if (is_template(method)) + method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) + else + generate_function_definition(method) + }) + } + } + }) + } + ast_node::type_def(backing) { + backing.methods.for_each(fun(method: *ast_node) { + if (is_template(method)) + method->template.instantiated.for_each(fun(m: *ast_node) generate_function_definition(m);) + else + generate_function_definition(method) + }) + } + } + }) + }) + return bytecode + } + fun generate_function_definition(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_declaration_statement(node: *ast_node) { + var identifier = node->declaration_statement.identifier + var ident_type = identifier->identifier.type + if (identifier->identifier.is_extern) + bytecode.add(byte_inst::nop()) + if (node->declaration_statement.expression) { + bytecode.add(byte_inst::nop()) + } + } + fun generate_assignment_statement(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_if_statement(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_while_loop(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_for_loop(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_identifier(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_return_statement(node: *ast_node) { + if (node->return_statement.return_value) + bytecode.add(byte_inst::nop()) + else + bytecode.add(byte_inst::nop()) + } + fun generate_branching_statement(node: *ast_node) { + match(node->branching_statement.b_type) { + branching_type::break_stmt() bytecode.add(byte_inst::nop()) + branching_type::continue_stmt() bytecode.add(byte_inst::nop()) + } + } + fun generate_cast(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_value(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_code_block(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + // this generates the function as a value, not the actual function + fun generate_function(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + fun generate_function_call(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + + fun generate_compiler_intrinsic(node: *ast_node) { + bytecode.add(byte_inst::nop()) + } + + fun generate(node: *ast_node) { + match (*node) { + ast_node::declaration_statement(backing) generate_declaration_statement(node) + ast_node::assignment_statement(backing) generate_assignment_statement(node) + ast_node::if_statement(backing) generate_if_statement(node) + ast_node::while_loop(backing) generate_while_loop(node) + ast_node::for_loop(backing) generate_for_loop(node) + ast_node::function(backing) generate_function(node) + ast_node::function_call(backing) generate_function_call(node) + ast_node::compiler_intrinsic(backing) generate_compiler_intrinsic(node) + ast_node::code_block(backing) generate_code_block(node) + ast_node::return_statement(backing) generate_return_statement(node) + ast_node::branching_statement(backing) generate_branching_statement(node) + ast_node::cast(backing) generate_cast(node) + ast_node::value(backing) generate_value(node) + ast_node::identifier(backing) generate_identifier(node) + } + } + fun get_name(node: *ast_node): string { + var maybe_it = ast_name_map.get_ptr_or_null(node); + if (maybe_it) + return *maybe_it + var result = "name" + get_id() + ast_name_map.set(node, result) + return result + } +} diff --git a/stdlib/function_value_lower.krak b/stdlib/function_value_lower.krak index 418b9a8..afc35a5 100644 --- a/stdlib/function_value_lower.krak +++ b/stdlib/function_value_lower.krak @@ -10,6 +10,7 @@ import io:* import ast_nodes:* import ast_transformation:* import hash_set:* +import os:* import pass_common:* @@ -88,6 +89,7 @@ fun in_scope_chain(node: *ast_node, high_scope: *ast_node): bool { } fun function_value_lower(name_ast_map: *map,*ast_node>>, ast_to_syntax: *map<*ast_node, *tree>) { + var curr_time = get_time() var visited = hash_set<*ast_node>() var lambdas = set<*ast_node>() name_ast_map->for_each(fun(name: string, syntax_ast_pair: pair<*tree,*ast_node>) { @@ -135,6 +137,7 @@ fun function_value_lower(name_ast_map: *map,*ast_node } run_on_tree(helper_before, empty_pass_second_half(), syntax_ast_pair.second, &visited) }) + curr_time = split(curr_time, "\tclosed_over_uses + function_value_call_points") var void_ptr = type_ptr(base_type::void_return(), 1) var lambda_type_to_struct_type_and_call_func = map>(); //freaking vexing parse moved @@ -144,6 +147,7 @@ fun function_value_lower(name_ast_map: *map,*ast_node return set<*type>() }) var all_type_values = all_types.map(fun(t: *type): type return *t;) + curr_time = split(curr_time, "\tall types/all type values") all_type_values.for_each(fun(t: type) { if (t.is_function() && t.indirection == 0 && !t.is_raw && !lambda_type_to_struct_type_and_call_func.contains_key(t)) { var cleaned = t.clone() @@ -197,6 +201,7 @@ fun function_value_lower(name_ast_map: *map,*ast_node name_ast_map->values.first().second->translation_unit.children.add(lambda_call_function) } }) + curr_time = split(curr_time, "\tall type values forEach") var lambda_creation_funcs = map<*ast_node, *ast_node>() // create the closure type for each lambda @@ -258,12 +263,14 @@ fun function_value_lower(name_ast_map: *map,*ast_node lambda_creation_funcs[l]->function.body_statement = body name_ast_map->values.first().second->translation_unit.children.add(lambda_creation_funcs[l]) }) + curr_time = split(curr_time, "\tlambdas forEach") function_value_call_points.for_each(fun(p: function_parent_block) { // parent is the function call var function_struct = p.function p.parent->function_call.func = lambda_type_to_struct_type_and_call_func[*get_ast_type(p.function)].second p.parent->function_call.parameters.add(0, function_struct) }) + curr_time = split(curr_time, "\tfunction_value_call_points.forEach") function_value_creation_points.for_each(fun(p: function_parent_block) { var lambda_creation_params = vector<*ast_node>() // add the declaration of the closure struct to the enclosing code block @@ -284,11 +291,13 @@ fun function_value_lower(name_ast_map: *map,*ast_node var func_call = ast_function_call_ptr(lambda_creation_funcs[p.function], lambda_creation_params) replace_with_in(p.function, func_call, p.parent) }) + curr_time = split(curr_time, "\tfunction_value_creation_points.forEach") lambdas.for_each(fun(l: *ast_node) l->function.type = l->function.type->clone();) all_types.for_each(fun(t: *type) { if (lambda_type_to_struct_type_and_call_func.contains_key(*t)) *t = *lambda_type_to_struct_type_and_call_func[*t].first }) + curr_time = split(curr_time, "\tlambdas.for_each") closed_over_uses.for_each(fun(p: pair<*ast_node, pair<*ast_node, *ast_node>>) { var variable = p.first var parent = p.second.first @@ -296,9 +305,11 @@ fun function_value_lower(name_ast_map: *map,*ast_node var closure_param = lambda->function.parameters[0] replace_with_in(variable, make_operator_call("*", vector(access_expression(closure_param, variable->identifier.name))), parent) }) + curr_time = split(curr_time, "\tclosed_over_uses") // now we can make them raw lambdas.for_each(fun(l: *ast_node) { l->function.type->is_raw = true; }) + curr_time = split(curr_time, "\tlambdas is raw") } diff --git a/stdlib/os.krak b/stdlib/os.krak index fc36f55..0f8033f 100644 --- a/stdlib/os.krak +++ b/stdlib/os.krak @@ -22,3 +22,11 @@ fun from_system_command(command: string, line_size: int): string { pclose(p) return to_ret } +fun get_time(): long { return string_to_num(from_system_command(string("date +%s"), 50)); } +fun split(time: long, split_label: *char): long { + var new_time = get_time() + print(string(split_label) + ": ") + println(new_time - time) + return new_time +} +