import io:* import grammer:* import lexer:* import parser:* import str:* import serialize:* import os:* import set:* import vec:* import vec_literals:* import poset:* import util:* import ast:* import type2:* import tree:* import symbol:* import binding:* fun main(argc: int, argv: **char): int { // delay construction until we either load it or copy construct it var gram: grammer var base_dir = str("/").join(str(argv[0]).split('/').slice(0,-2)) var import_paths = vec(str(), base_dir + "/stdlib/") var file_name = base_dir + "/krakenGrammer.kgm" var compiled_name = file_name + str(".comp_new") var compiled_version = 1 var file_contents = read_file(file_name) var loaded_and_valid = false if (argc <= 1) { println("No input file!\n Call with one argument (the input file), or two arguments (input file and output name)") exit(1) } else if (str(argv[1]) == "-v" || str(argv[1]) == "--version") { println("0.0 pre") exit(0) } var opt_str = str("-O2") var compile_c = true var positional_args = vec() for (var i = 1; i < argc; i++;) { var arg_str = str(argv[i]) if (arg_str.length() > 2 && arg_str.slice(0,2) == "-O") { opt_str = arg_str } else if (arg_str == "--no-c-compile") { compile_c = false } else { positional_args.add(arg_str) } } if (file_exists(compiled_name)) { var pos = 0 var binary = read_file_binary(compiled_name) var saved_version = 0 unpack(saved_version, pos) = unserialize(binary, pos) if (saved_version == compiled_version) { var cached_contents = str() unpack(cached_contents, pos) = unserialize(binary, pos) if (cached_contents == file_contents) { loaded_and_valid = true pos = gram.unserialize(binary, pos) } else println("contents different") } else println("version number different") } else { println("cached file does not exist") } if (!loaded_and_valid) { println("Not loaded_and_valid, re-generating and writing out") gram.copy_construct(&load_grammer(file_contents)) println("grammer loaded, calculate_first_set") gram.calculate_first_set() println("grammer loaded, calculate_state_automaton") gram.calculate_state_automaton() println("calculated, writing out") write_file_binary(compiled_name, serialize(compiled_version) + serialize(file_contents) + serialize(gram)) println("done writing") } var lex = lexer(gram.terminals) var parse.construct(&gram, &lex): parser var kraken_file_name = positional_args[0] var executable_name = str(".").join(kraken_file_name.split('.').slice(0,-2)) if (positional_args.size > 1) executable_name = positional_args[1] var pass_poset = poset, str>>() var name_ast_map = map>() var passes = map): void>() var multiple_binding_options = map<*tree, vec<*tree>>() var primitive_ops.construct(): map>> var number_tower = vec(binding_p(type::_char()), binding_p(type::_uchar()), binding_p(type::_short()), binding_p(type::_ushort()), binding_p(type::_int()), binding_p(type::_uint()), binding_p(type::_long()), binding_p(type::_ulong()), binding_p(type::_float()), binding_p(type::_double())) var comparators = vec(str("=="), str("<="), str(">="), str("!="), str("<"), str(">")) for (var i = 0; i < comparators.size; i++;) { primitive_ops["op" + comparators[i]] = vec<*tree>() for (var j = 0; j < number_tower.size; j++;) for (var k = 0; k < number_tower.size; k++;) primitive_ops["op" + comparators[i]].add(_compiler_intrinsic(comparators[i], binding_p(type::_fun(make_triple(make_pair(vec( number_tower[j], number_tower[k] ), binding_p(type::_bool()) ), false, false))), vec<*binding>())) } var math = vec(str("+"), str("-"), str("*"), str("/"), str("&"), str("|"), str("^")) for (var i = 0; i < math.size; i++;) { primitive_ops["op" + math[i]] = vec<*tree>() for (var j = 0; j < number_tower.size; j++;) { for (var k = 0; k < number_tower.size; k++;) { var return_type = null>() if (j > k) { return_type = number_tower[j] } else { return_type = number_tower[k] } primitive_ops["op" + math[i]].add(_compiler_intrinsic(math[i], binding_p(type::_fun(make_triple(make_pair(vec( number_tower[j], number_tower[k] ), return_type ), false, false))), vec<*binding>())) } } } // cute hack for getting plain = math.add(str("")) for (var i = 0; i < math.size; i++;) { primitive_ops["op" + math[i] + "="] = vec<*tree>() for (var j = 0; j < number_tower.size; j++;) { for (var k = 0; k <= j; k++;) { var return_type = null>() primitive_ops["op" + math[i] + "="].add(_compiler_intrinsic(math[i] + "=", binding_p(type::_fun(make_triple(make_pair(vec( number_tower[j], number_tower[k] ), binding_p(type::_void()) ), false, false))), vec<*binding>())) } } } math.remove(math.size-1) // resolves all binding possibilities for one top level item passes[str("name_possibility_resolve")] = fun(item: *tree) { println("Running name possibility resolver?") var scope_lookup: fun(*tree, str, bool): vec<*tree> = fun(scope: *tree, name: str, is_type: bool): vec<*tree> { var to_ret = vec<*tree>() for (var i = 0; i < scope->children.size; i++;) { match(scope->children[i]->data) { ast::_import(b) if b.second.contains(name) || b.second.contains(str("*")) { if !ast_bound(b.first) { // Import / parse file if not already var file_path = ast_binding_str(b.first) if (!name_ast_map.contains_key(file_path)) { printerr(file_path + ", ") var parse_tree = parse.parse_input(read_file(file_path), file_path) trim(parse_tree) name_ast_map[file_path] = syntax_to_ast(file_path, parse_tree, import_paths) printlnerr("syntax_to_ast " + file_path + ":") print_tree(name_ast_map[file_path], 1) } set_ast_binding(b.first, name_ast_map[file_path]) } to_ret += scope_lookup(get_ast_binding(b.first), name, is_type) } ast::_type_def(b) if (is_type && b == name) to_ret += scope->children[i] ast::_adt_def(b) if (is_type && b == name) to_ret += scope->children[i] ast::_function(b) if (!is_type && b.first == name) to_ret += scope->children[i] ast::_template(b) if (!is_type && b.first == name) to_ret += scope->children[i] ast::_identifier(b) if (!is_type && b.first == name) to_ret += scope->children[i] ast::_declaration() if (!is_type && scope->children[i]->children[0]->data._identifier.first == name) to_ret += scope->children[i]->children[0] } } if (scope->parent != null>()) return to_ret + scope_lookup(scope->parent, name, is_type) else if (primitive_ops.contains_key(name)) to_ret += primitive_ops[name] return to_ret } var try_binding = fun(binding: *tree, start_scope: *tree, type_binding: bool) { if !ast_bound(binding) { var options = scope_lookup(start_scope, ast_binding_str(binding), type_binding) if (options.size == 0) error("Could not find any options for scope lookup of " + ast_binding_str(binding)) else if (options.size == 1) set_ast_binding(binding, options[0]) else multiple_binding_options[binding] = options } } var handle_type: fun(*binding, *tree): void = fun(t: *binding, n: *tree) { match(*t->bound_to) { type::_obj(b) try_binding(b, n, true) type::_fun(b) { b.first.first.for_each(fun(it: *binding) { handle_type(it, n) }) handle_type(b.first.second, n) } } } var traverse_for_bindings: fun(*tree): void = fun(t: *tree) { match (t->data) { // TODO: Handle type binding lookup ast::_identifier(b) handle_type(b.second, t) /*_binding: triple, *tree>,*/ ast::_function(b) handle_type(b.second, t) ast::_compiler_intrinsic(b) { /*handle_type(b.second, t)*/ b.third.for_each(fun(tb: *binding) { handle_type(tb, t) }) } ast::_cast(b) handle_type(b, t) /*_value: pair*/ ast::_binding(b) try_binding(t, t, false) } t->children.for_each(traverse_for_bindings) } traverse_for_bindings(item) } var binding_types = map<*tree, *binding>() var get_type: fun(*tree): *binding = fun(a: *tree): *binding { match(a->data) { ast::_identifier(b) return b.second ast::_binding(b) if (binding_types.contains_key(a)) { return binding_types[a] } else { if (ast_bound(a)) { var t = get_type(get_ast_binding(a)) binding_types[a] = t return t } else { var new_type = binding_p(type::_unknown()) binding_types[a] = new_type return new_type } } ast::_function(b) return b.second ast::_template(b) { return inst_temp_type(get_type(a->children[0]), b.second.associate(fun(k: str, v: *binding): pair<*binding, *binding> return make_pair(v, binding_p(type::_unknown()));)) } ast::_compiler_intrinsic(b) return b.second ast::_call() { var t = get_type(a->children[0]) if (is_fun(t->bound_to)) return t->bound_to->_fun.first.second if (is_unknown(t->bound_to)) { var return_type = binding_p(type::_unknown()) var parameter_types = vec<*binding>() for (var i = 1; i < a->children.size; i++;) parameter_types.add(get_type(a->children[i])) t->set(type::_fun(make_triple(make_pair(parameter_types, return_type), false, false))) return return_type } error("Trying to get type of call where type of first child is not function, but " + to_string(t->bound_to)) } ast::_cast(b) return b ast::_value(b) return b.second } error("Trying to get type of node without one: " + to_string(a->data)) } // resolves all binding possibilities for one top level item passes[str("name_type_resolve")] = fun(item: *tree) { if !pass_poset.done(make_pair(item, str("name_possibility_resolve"))) { pass_poset.add_open_dep(make_pair(item, str("name_type_resolve")), make_pair(item, str("name_possibility_resolve"))) return } println("name_type resolve for:") print_tree(item, 1) var traverse_for_unify: fun(*tree): void = fun(t: *tree) { t->children.for_each(traverse_for_unify) match (t->data) { ast::_declaration() if (t->children.size > 1) unify(get_type(t->children[0]), get_type(t->children[1])) ast::_call() { // we call get type to make sure if it is unknown it is transformed into a function version get_type(t) var fun_type = get_type(t->children[0])->bound_to if (!is_fun(fun_type)) error("trying to call not a function type: " + to_string(fun_type)) if (fun_type->_fun.first.first.size != (t->children.size - 1)) error("trying to call function with type wrong number of params (" + to_string(fun_type->_fun.first.first.size) + " vs " + to_string(t->children.size - 1) + "): " + to_string(fun_type)) for (var i = 1; i < t->children.size; i++;) unify(fun_type->_fun.first.first[i-1], get_type(t->children[i])) } ast::_return() if (t->children.size > 0) unify(get_type(get_ancestor_satisfying(t, fun(t: *tree): bool return is_function(t);))->bound_to->_fun.first.second, get_type(t->children[0])) } } traverse_for_unify(item) var more_to_do = true while (more_to_do) { more_to_do = false var work_done = false var traverse_for_select: fun(*tree): void = fun(t: *tree) { match (t->data) { ast::_binding(b) if (!ast_bound(t)) { println(to_string(t->data) + " - not bound!") var filtered_options = multiple_binding_options[t].filter(fun(p: *tree): bool return equality(binding_types[t]->bound_to, get_type(p)->bound_to, true);) if (filtered_options.size == 0) { println("Attempting to use our inferenced type " + to_string(binding_types[t]->bound_to) + " to decide what to bind " + to_string(t->data) + " to from options:") multiple_binding_options[t].for_each(fun(p: *tree) { println("\t" + to_string(p->data) + " of type " + to_string(get_type(p)->bound_to)); }) error("no options remain after filtering overloads by type for " + to_string(t->data)) } else if (filtered_options.size > 1) { more_to_do = true } else { set_ast_binding(t, filtered_options[0]) unify(binding_types[t], get_type(filtered_options[0])) work_done = true println("wok done! set " + to_string(t->data)) } } } t->children.for_each(traverse_for_select) } traverse_for_select(item) if (!work_done) { var traverse_for_error: fun(*tree): void = fun(t: *tree) { match (t->data) { ast::_binding(b) if (!ast_bound(t)) { var filtered_options = multiple_binding_options[t].filter(fun(p: *tree): bool return equality(binding_types[t]->bound_to, get_type(p)->bound_to, true);) if (filtered_options.size > 1) { println("Attempting to use our inferenced type " + to_string(binding_types[t]->bound_to) + " to decide what to bind " + to_string(t->data) + " to form options:") multiple_binding_options[t].for_each(fun(p: *tree) { println("\t" + to_string(p->data) + " of type " + to_string(get_type(p)->bound_to)); }) println("too many options remain after filtering overloads by type for " + to_string(t->data) + ", they were:") filtered_options.for_each(fun(p: *tree) { println("\t" + to_string(p->data) + " of type " + to_string(get_type(p)->bound_to)); }) error("cannot resolve") } } } t->children.for_each(traverse_for_error) } traverse_for_error(item) } } } // emit C var C_str = str() var C_declaration_str = str() var to_c_type: fun(*binding): str = fun(tb: *binding): str { match(*tb->bound_to) { type::_unknown() error("unknown in to_c_type") type::_ptr(p) return to_c_type(p) + "*" type::_ref() error("ref in to_c_type") type::_void() return str("void") type::_obj(b) error("obj in to_c_type unimplemented") type::_fun(b) error("fun in to_c_type unimplemented") type::_template_placeholder() error("template_placeholder in to_c_type") type::_bool() return str("bool") type::_char() return str("char") type::_uchar() return str("usigned char") type::_short() return str("short") type::_ushort() return str("unsigned short") type::_int() return str("int") type::_uint() return str("unsigned int") type::_long() return str("long") type::_ulong() return str("unsigned long") type::_float() return str("float") type::_double() return str("double") } error("fell through to_c_type") } var taken_names = map<*tree, str>() var id = 0 var replacement_map.construct() : map replacement_map[str("+")] = str("plus") replacement_map[str("-")] = str("minus") replacement_map[str("*")] = str("star") replacement_map[str("/")] = str("div") replacement_map[str("%")] = str("mod") replacement_map[str("^")] = str("carat") replacement_map[str("&")] = str("amprsd") replacement_map[str("|")] = str("pipe") replacement_map[str("~")] = str("tilde") replacement_map[str("!")] = str("exlmtnpt") replacement_map[str(",")] = str("comma") replacement_map[str("=")] = str("eq") replacement_map[str("++")] = str("dbplus") replacement_map[str("--")] = str("dbminus") replacement_map[str("<<")] = str("dbleft") replacement_map[str(">>")] = str("dbright") replacement_map[str("::")] = str("scopeop") replacement_map[str(":")] = str("colon") replacement_map[str("==")] = str("dbq") replacement_map[str("!=")] = str("notequals") replacement_map[str("&&")] = str("doubleamprsnd") replacement_map[str("||")] = str("doublepipe") replacement_map[str("+=")] = str("plusequals") replacement_map[str("-=")] = str("minusequals") replacement_map[str("/=")] = str("divequals") replacement_map[str("%=")] = str("modequals") replacement_map[str("^=")] = str("caratequals") replacement_map[str("&=")] = str("amprsdequals") replacement_map[str("|=")] = str("pipeequals") replacement_map[str("*=")] = str("starequals") replacement_map[str("<<=")] = str("doublerightequals") replacement_map[str("<")] = str("lt") replacement_map[str(">")] = str("gt") replacement_map[str(">>=")] = str("doubleleftequals") replacement_map[str("(")] = str("openparen") replacement_map[str(")")] = str("closeparen") replacement_map[str("[")] = str("obk") replacement_map[str("]")] = str("cbk") replacement_map[str(" ")] = str("_") replacement_map[str(".")] = str("dot") replacement_map[str("->")] = str("arrow") var longest_replacement = 0 replacement_map.for_each(fun(key: str, value: str) { if (key.length() > longest_replacement) longest_replacement = key.length() }) var cify_name = fun(name: ref str): str { var to_ret = str() for (var i = 0; i < name.length(); i++;) { var replaced = false for (var j = longest_replacement; j > 0; j--;) { if (i + j <= name.length() && replacement_map.contains_key(name.slice(i,i+j))) { to_ret += replacement_map[name.slice(i,i+j)] replaced = true i += j-1; break } } if (!replaced) to_ret += name[i] } return to_ret } var get_c_name = fun(x: *tree): str { if (taken_names.contains_key(x)) return taken_names[x] var possible = str() match(x->data) { ast::_identifier(b) { possible = b.first; } ast::_type_def(b) { possible = b; } ast::_function(b) { possible = b.first; } } if (possible == "") error("cannot get_c_name of thing: " + to_string(x->data)) if (taken_names.contains_value(possible)) { possible += id++ } taken_names[x] = cify_name(possible) return taken_names[x] } // has to be set instead of map<> as we need to use type's "equality" // function instead of type's adt's operator== var instantiated_map = map<*tree, set, *tree>>>() passes[str("emit_C")] = fun(item: *tree) { if !pass_poset.done(make_pair(item, str("name_type_resolve"))) { pass_poset.add_open_dep(make_pair(item, str("emit_C")), make_pair(item, str("name_type_resolve"))) return } println("Emitting C for:") print_tree(item, 1) var emit_C: fun(*tree, int): void = fun(t: *tree, level: int) { var idt = str("\t") * level match (t->data) { ast::_translation_unit(b) { t->children.for_each(fun(c: *tree) { emit_C(c, 0) C_str += ";\n" }) } ast::_import(b) { } ast::_identifier(b) { C_str += idt + get_c_name(t); } ast::_binding(b) { var bound_to = get_ast_binding(t) if (is_top_level_item(bound_to)) { if (is_template(bound_to)) { if (!instantiated_map.contains_key(bound_to)) instantiated_map[bound_to] = set, *tree>>() var binding_type = get_type(t) // grab inst types out of binding, or regen again from unify? Cache from first unify? // regenning from unify var inst_map = bound_to->data._template.second.associate(fun(k: str, v: *binding): pair<*binding, *binding> return make_pair(v, binding_p(type::_unknown()));) unify(binding_type, inst_temp_type(get_type(bound_to->children[0]), inst_map)) // shouldn't cache by binding, but by all insted var already_inst = instantiated_map[bound_to].filter(fun(p: pair<*binding, *tree>): bool return equality(binding_type->bound_to, p.first->bound_to, false);) if (already_inst.size() > 1) { error("already inst > 1, should be impossible") } else if (already_inst.size() == 1) { pass_poset.add_close_dep(make_pair(item, str("emit_C")), make_pair(already_inst.single().second, str("emit_C"))) set_single_ast_binding(t, already_inst.single().second) } else { println("Copying tree to instantiate template!") var inst_copy = bound_to->children[0]->clone(fun(a: ref ast): ast { match (a) { ast::_identifier(b) return ast::_identifier(make_pair(b.first, inst_temp_type(b.second, inst_map))) ast::_binding(b) return ast::_binding(make_triple(b.first, b.second.map(fun(bd: *binding): *binding return inst_temp_type(bd, inst_map);), binding>())) ast::_function(b) return ast::_function(make_triple(b.first, inst_temp_type(b.second, inst_map), b.third)) ast::_compiler_intrinsic(b) return ast::_compiler_intrinsic(make_triple( b.first, inst_temp_type(b.second, inst_map), b.third.map(fun(bd: *binding): *binding return inst_temp_type(bd, inst_map);))) ast::_cast(b) return ast::ast::_cast(inst_temp_type(b, inst_map)) ast::_value(b) return ast::_value(make_pair(b.first, inst_temp_type(b.second, inst_map))) /*_template: pair>>,*/ } return a }) // add inst copy as a child of template? bound_to->add_child(inst_copy) println("inst from:") print_tree(bound_to->children[0], 1) println("inst to:") print_tree(inst_copy, 1) // save it in our insted map so we don't instantate more than once per types instantiated_map[bound_to].add(make_pair(binding_type, inst_copy)) pass_poset.add_close_dep(make_pair(item, str("emit_C")), make_pair(inst_copy, str("emit_C"))) set_single_ast_binding(t, inst_copy) } } else { pass_poset.add_close_dep(make_pair(item, str("emit_C")), make_pair(bound_to, str("emit_C"))) } // top level var dec } else if (is_identifier(bound_to) && is_declaration(bound_to->parent) && is_top_level_item(bound_to->parent)) { pass_poset.add_close_dep(make_pair(item, str("emit_C")), make_pair(bound_to->parent, str("emit_C"))) } // bound_to might have changed from binding C_str += idt + get_c_name(get_ast_binding(t)) } ast::_type_def(b) { error("type_def gen unimplemented"); } ast::_adt_def(b) { error("no adt_def should remain at C emit"); } ast::_function(b) { /*var fun_name = b.first*/ var fun_name = get_c_name(t) var fun_type = b.second->bound_to var is_ext = b.third var return_type = fun_type->_fun.first.second var parameter_types = fun_type->_fun.first.first var is_variadic = fun_type->_fun.second var is_raw = fun_type->_fun.third // TODO check is_ext for name mangling C_str += to_c_type(return_type) + " " + fun_name + "(" C_declaration_str += to_c_type(return_type) + " " + fun_name + "(" for (var i = 0; i < parameter_types.size; i++;) { if (i != 0) { C_str += ", " C_declaration_str += ", " } C_str += to_c_type(parameter_types[i]) + " " C_declaration_str += to_c_type(parameter_types[i]) emit_C(t->children[i], 0) } if (is_variadic) { if (parameter_types.size != 0) { C_str += ", " C_declaration_str += ", " } C_str += "..." C_declaration_str += "..." } C_str += ") {\n" C_declaration_str += ");\n" for (var i = parameter_types.size; i < t->children.size; i++;) { emit_C(t->children[i], level+1) C_str += ";\n" } C_str += "}\n" } ast::_template(b) { /* template should be ignored */ } ast::_declaration() { C_str += idt + to_c_type(t->children[0]->data._identifier.second) + " " + get_c_name(t->children[0]) if (t->children.size > 1) { C_str += " = " emit_C(t->children[1], 0) } if (is_top_level_item(t)) { C_str += ";\n" C_declaration_str += idt + to_c_type(t->children[0]->data._identifier.second) + " " + get_c_name(t->children[0]) + ";\n" } } ast::_block() { C_str += idt + "{\n" t->children.for_each(fun(c: *tree) { emit_C(c, level+1) C_str += ";\n" }) C_str += idt + "}" } ast::_if() { C_str += idt + "if (" emit_C(t->children[0], 0) C_str += ") {\n" emit_C(t->children[1], level + 1) C_str += ";\n" + idt + "}" if t->children.size > 2 { C_str += " else {\n" emit_C(t->children[2], level + 1) C_str += ";\n" + idt + "}" } } ast::_match() { error("no match should remain at C emit"); } ast::_case() { error("no case should remain at C emit"); } ast::_while() { C_str += idt + "while (" emit_C(t->children[0], 0) C_str += ") {\n" emit_C(t->children[1], level + 1) C_str += ";\n" + idt + "}" } ast::_for() { C_str += idt + "for (" emit_C(t->children[0], 0) C_str += ";" emit_C(t->children[1], 0) C_str += ";" emit_C(t->children[2], 0) C_str += ") {\n" emit_C(t->children[3], level+1) C_str += ";\n" + idt + "}" } ast::_return() { C_str += idt + "return" if (t->children.size == 1) { C_str += " " emit_C(t->children[0], 0) } } ast::_break() { C_str += idt + "break"; } ast::_continue() { C_str += idt + "continue"; } ast::_defer() { error("no defer should remain at C emit"); } ast::_call() { if (is_compiler_intrinsic(get_ast_binding(t->children[0]))) { if (t->children.size == 2) { C_str += idt + "(" + get_ast_binding(t->children[0])->data._compiler_intrinsic.first + "(" emit_C(t->children[1], 0) C_str += "))" } else if (t->children.size == 3) { C_str += idt + "((" emit_C(t->children[1], 0) C_str += ")" + get_ast_binding(t->children[0])->data._compiler_intrinsic.first + "(" emit_C(t->children[2], 0) C_str += "))" } else error("Calling primitive intrinsic with not 1 or 2 arguments") } else { emit_C(t->children[0], level) C_str += "(" for (var i = 1; i < t->children.size; i++;) { if (i != 1) C_str += ", " emit_C(t->children[i], 0) } C_str += ")" } } ast::_compiler_intrinsic(b) { error("compiler_intrinsic gen unimplemented"); } ast::_cast(b) { error("cast gen unimplemented"); } ast::_value(b) { C_str += idt + b.first; } } } emit_C(item, 0) } // We construct our real main entry function and add an emit_C pass for it, // starting generation of the entire program var real_main = _function( str("main"), binding_p(type::_fun(make_triple(make_pair(vec( binding_p(type::_int()), binding_p(type::_char()) ), binding_p(type::_int()) ), false, false))), true, vec( _identifier(str("argc"), binding_p(type::_int())), _identifier(str("argv"), binding_p(type::_char())), _return(vec(_call(vec(make_ast_binding("fmain"), make_ast_binding("argc"), make_ast_binding("argv"))))) ) ) var top_unit = _translation_unit(str(), vec( _import(make_ast_binding(kraken_file_name), set(str("*")), vec( _identifier(kraken_file_name, binding_p(type::_void())) )), real_main )) pass_poset.add_job(make_pair(real_main, str("emit_C"))) pass_poset.run(fun(file_pass: pair<*tree, str>) { printlnerr("doing pass new style " + file_pass.second + " on " + to_string(file_pass.first->data)) passes[file_pass.second](file_pass.first) }) C_str = "#include \n" + C_declaration_str + "\n" + C_str println() println() println("Finished with trees:") name_ast_map.for_each(fun(key: str, value: *tree) { printlnerr(key + ":") print_tree(value, 1) printlnerr("done") }) var kraken_c_output_name = kraken_file_name + ".c" println(C_str) write_file(kraken_c_output_name, C_str) var c_flags = str("") if (compile_c) { var compile_string = "cc -g " + opt_str + " -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-incompatible-pointer-types -std=c99 " + c_flags + " " + kraken_c_output_name + " -o " + executable_name printlnerr(compile_string) system(compile_string) } return 0 } fun parse_type(syntax: *tree, declared_template_types: ref map>): *binding { var is_ref = get_node("\"ref\"", syntax) != null>() var indr = 0 syntax = get_node("pre_reffed", syntax) var next = get_node("pre_reffed", syntax) while(next != null>()) { indr++ syntax = next next = get_node("pre_reffed", syntax) } var ident = get_node("scoped_identifier", syntax) var func = get_node("function_type", syntax) var first_child_name = syntax->children[0]->data.name if (ident != null>()) { var ident_str = concat(ident) var template_inst = get_node("template_inst", syntax) if (template_inst != null>()) { return binding_p(type::_obj(make_ast_binding(ident_str + ""))) } else { if (declared_template_types.contains_key(ident_str)) return declared_template_types[ident_str] else return binding_p(type::_obj(make_ast_binding(ident_str))) } } else if (func != null>()) { var param_types = vec<*binding>() var return_type = binding_p(type::_void()) var variadic = false var raw = false return binding_p(type::_fun(make_triple(make_pair(param_types, return_type), variadic, raw))) } else if (first_child_name == "\"void\"") { return binding_p(type::_void()) } else if (first_child_name == "\"bool\"") { return binding_p(type::_bool()) } else if (first_child_name == "\"char\"") { return binding_p(type::_char()) } else if (first_child_name == "\"uchar\"") { return binding_p(type::_uchar()) } else if (first_child_name == "\"short\"") { return binding_p(type::_short()) } else if (first_child_name == "\"ushort\"") { return binding_p(type::_ushort()) } else if (first_child_name == "\"int\"") { return binding_p(type::_int()) } else if (first_child_name == "\"uint\"") { return binding_p(type::_uint()) } else if (first_child_name == "\"long\"") { return binding_p(type::_long()) } else if (first_child_name == "\"ulong\"") { return binding_p(type::_ulong()) } else if (first_child_name == "\"float\"") { return binding_p(type::_float()) } else if (first_child_name == "\"double\"") { return binding_p(type::_double()) } error(syntax, "could not parse type " + first_child_name) } fun syntax_to_ast(file_name: str, syntax: *tree, import_paths: ref vec): *tree { var resolve_import_file = fun(file_name: str): str { var file_path = str() for (var i = 0; i < import_paths.size; i++;) { if (file_exists(import_paths[i] + file_name)) { if (file_path != "") error("File: " + file_name + ", found in multiple import paths - at least two of [" + str(",").join(import_paths) + "]") file_path = import_paths[i] + file_name } } if (file_path == "") error("File: " + file_name + ", not found in any import path - none of [" + str(",").join(import_paths) + "]") return file_path } var syntax_to_ast_helper: fun(*tree, ref map>): *tree = fun(syntax: *tree, declared_template_types: ref map>): *tree { if (syntax->data.name == "import") { return _import(make_ast_binding(resolve_import_file(concat(syntax->children[1]) + ".krak")), from_vector(syntax->children.slice(2,-1).filter(fun(s:*tree):bool { return s->data.name == "identifier" || s->data.data == "*" }).map(concat)), vec(syntax_to_ast_helper(syntax->children[1], declared_template_types))) } else if (syntax->data.name == "function") { var template = get_node("template_dec", syntax) var new_template_type_map = map>() var with_added_declared_template_types = declared_template_types if (template != null>()) { get_nodes("template_param", template).for_each(fun(p: *tree) { var key = concat(p) var value = binding_p(type::_template_placeholder()) new_template_type_map[key] = value with_added_declared_template_types[key] = value }) } var parameters = get_nodes("typed_parameter", syntax).map(fun(x: *tree): *tree return syntax_to_ast_helper(x, with_added_declared_template_types);) var body = syntax_to_ast_helper(get_node("statement", syntax), with_added_declared_template_types) var return_type = null>() var return_type_node = get_node("typed_return", syntax) if (return_type_node != null>()) return_type = parse_type(get_node("type", return_type_node), with_added_declared_template_types) else return_type = binding_p(type::_void()) var function_type = binding_p(type::_fun(make_triple(make_pair(parameters.map(fun(i: *tree): *binding return i->data._identifier.second;), return_type), false, false))) var n = _function(concat(get_node("func_identifier", syntax)), function_type, false, parameters + body) if (new_template_type_map.size() > 0) { return _template(n->data._function.first, new_template_type_map, vec(n)) } else { return n } } else if (syntax->data.name == "typed_parameter") return _identifier(concat(get_node("identifier", syntax)), parse_type(get_node("type", syntax), declared_template_types)) else if (syntax->data.name == "type_def") { var n = _type_def(concat(get_node("identifier", syntax)), get_nodes("declaration_statement", syntax).map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) var template = get_node("template_dec", syntax) if (template == null>()) { return n } else { /*return _template(n->data._type_def, from_vector(get_nodes("template_param", template).map(concat)), vec(n))*/ error("implement me") } } else if (syntax->data.name == "adt_def") { var n = _adt_def(concat(get_node("identifier", syntax)), get_nodes("adt_option", syntax).map(fun(s: *tree): *tree { var option_type = get_node("type", s) if (option_type != null>()) return _identifier(concat(get_node("identifier", s)), parse_type(option_type, declared_template_types)) else return _identifier(concat(get_node("identifier", s)), binding_p(type::_void())) })) var template = get_node("template_dec", syntax) if (template == null>()) { return n } else { /*return _template(n->data._adt_def, from_vector(get_nodes("template_param", template).map(concat)), vec(n))*/ error("implement me") } } else if (syntax->data.name == "statement") return syntax_to_ast_helper(syntax->children[0], declared_template_types) else if (syntax->data.name == "code_block") return _block(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "if_statement") return _if(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "for_loop") return _for(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "while_loop") return _while(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "return_statement") return _return(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "defer_statement") return _defer(syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) else if (syntax->data.name == "break_statement") return _break() else if (syntax->data.name == "continue_statement") return _continue() else if (syntax->data.name == "match_statement") { return _match(vec(syntax_to_ast_helper(get_node("boolean_expression", syntax), declared_template_types)) + get_nodes("case_statement", syntax).map(fun(s: *tree): *tree { return _case(s->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) })) } else if (syntax->data.name == "declaration_statement") { var t = binding_p(type::_unknown()) var type_syntax = get_node("type", syntax) if type_syntax != null>() t = parse_type(type_syntax, declared_template_types) var children = vec(_identifier(concat(get_node("identifier", syntax)), t)) children += get_nodes("boolean_expression", syntax).map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);) return _declaration(children) } else if (syntax->data.name == "assignment_statement") return _call(vec(make_ast_binding("op" + concat(syntax->children[1])), syntax_to_ast_helper(syntax->children[0], declared_template_types), syntax_to_ast_helper(syntax->children[2], declared_template_types))) else if (syntax->data.name == "function_call") return _call(vec(syntax_to_ast_helper(syntax->children[0], declared_template_types)) + get_nodes("parameter", syntax).map(fun(s: *tree): *tree { return syntax_to_ast_helper(s->children[0], declared_template_types) })) else if (syntax->data.name == "boolean_expression" || syntax->data.name == "and_boolean_expression" || syntax->data.name == "bitwise_or" || syntax->data.name == "bitwise_xor" || syntax->data.name == "bitwise_and" || syntax->data.name == "bool_exp" || syntax->data.name == "expression" || syntax->data.name == "shiftand" || syntax->data.name == "term" || syntax->data.name == "factor" || syntax->data.name == "unarad" || syntax->data.name == "access_operation") { if (syntax->children.size == 1) { return syntax_to_ast_helper(syntax->children[0], declared_template_types) } else if (syntax->children.size == 2) { var template_inst = get_node("template_inst", syntax) if (template_inst != null>()) { if (syntax->children[0]->data.name != "scoped_identifier") error(syntax, "Unexpected template instantiation (not on an identifier)") return make_ast_binding(concat(syntax->children[0]), get_nodes("type", template_inst).map(fun(s: *tree): *binding { return parse_type(s, declared_template_types); })) } else if (syntax->children[0]->data.terminal) { return _call(vec(make_ast_binding(concat(syntax->children[0])), syntax_to_ast_helper(syntax->children[1], declared_template_types))) } else { return _call(vec(make_ast_binding(concat(syntax->children[1])), syntax_to_ast_helper(syntax->children[0], declared_template_types))) } } else { return _call(vec(make_ast_binding("op" + concat(syntax->children[1])), syntax_to_ast_helper(syntax->children[0], declared_template_types), syntax_to_ast_helper(syntax->children[2], declared_template_types))) } } else if (syntax->data.name == "number") { var number_string = concat(syntax) if (number_string.contains('.')) return _value(number_string, binding_p(type::_double())) else return _value(number_string, binding_p(type::_int())) } else if (syntax->data.name == "bool") return _value(concat(syntax), binding_p(type::_bool())) else if (syntax->data.name == "scoped_identifier" || syntax->data.name == "identifier") return make_ast_binding(concat(syntax)) else { error(syntax, "Cannot transform") } } var declared_template_types = map>() var result = _translation_unit(file_name, syntax->children.map(fun(x: *tree): *tree return syntax_to_ast_helper(x, declared_template_types);)) return result } fun print_tree(t: *tree, level: int) { printlnerr("\t" * level + to_string(t->data)) for (var i = 0; i < t->children.size; i++;) if (t->children[i]) print_tree(t->children[i], level+1) else printlnerr("\t" * (level + 1) + "null!") } fun get_node(lookup: *char, parent: *tree): *tree { return get_node(str(lookup), parent) } fun get_node(lookup: str, parent: *tree): *tree { var results = get_nodes(lookup, parent) if (results.size > 1) error(parent, "get node too many results!") if (results.size) return results[0] return null>() } fun get_nodes(lookup: *char, parent: *tree): vec<*tree> { return get_nodes(str(lookup), parent) } fun get_nodes(lookup: str, parent: *tree): vec<*tree> { return parent->children.filter(fun(node: *tree):bool return node->data.name == lookup;) } fun concat(node: *tree): str { var str.construct(): str if (node->data.data != "no_value") str += node->data.data node->children.for_each(fun(child: *tree) str += concat(child);) return str } fun get_first_terminal(source: *tree): *tree { if (!source) return null>() if (source->data.terminal) return source if (source->children.size == 0) return null>() return get_first_terminal(source->children.first()) } fun error(source: *tree, message: *char) error(source, str(message)); fun error(source: *tree, message: str) { var first = get_first_terminal(source) if (first) error("***error |" + concat(source) + "| *** " + first->data.source + ": " + first->data.position + " " + message) error(message) } fun trim(parse_tree: *tree) { remove_node(symbol("$NULL$", false), parse_tree) remove_node(symbol("WS", false), parse_tree) // the terminals have " around them, which we have to escape remove_node(symbol("\"\\(\"", true), parse_tree) remove_node(symbol("\"\\)\"", true), parse_tree) remove_node(symbol("\"template\"", true), parse_tree) remove_node(symbol("\"return\"", true), parse_tree) remove_node(symbol("\"defer\"", true), parse_tree) remove_node(symbol("\";\"", true), parse_tree) remove_node(symbol("line_end", false), parse_tree) remove_node(symbol("\"{\"", true), parse_tree) remove_node(symbol("\"}\"", true), parse_tree) remove_node(symbol("\"(\"", true), parse_tree) remove_node(symbol("\")\"", true), parse_tree) remove_node(symbol("\"if\"", true), parse_tree) remove_node(symbol("\"else\"", true), parse_tree) remove_node(symbol("\"while\"", true), parse_tree) remove_node(symbol("\"for\"", true), parse_tree) remove_node(symbol("\"__if_comp__\"", true), parse_tree) remove_node(symbol("\"comp_simple_passthrough\"", true), parse_tree) /*remove_node(symbol("obj_nonterm", false), parse_tree)*/ remove_node(symbol("adt_nonterm", false), parse_tree) collapse_node(symbol("case_statement_list", false), parse_tree) collapse_node(symbol("opt_param_assign_list", false), parse_tree) collapse_node(symbol("param_assign_list", false), parse_tree) collapse_node(symbol("opt_typed_parameter_list", false), parse_tree) collapse_node(symbol("opt_parameter_list", false), parse_tree) collapse_node(symbol("intrinsic_parameter_list", false), parse_tree) collapse_node(symbol("identifier_list", false), parse_tree) collapse_node(symbol("adt_option_list", false), parse_tree) collapse_node(symbol("statement_list", false), parse_tree) collapse_node(symbol("parameter_list", false), parse_tree) collapse_node(symbol("typed_parameter_list", false), parse_tree) collapse_node(symbol("unorderd_list_part", false), parse_tree) collapse_node(symbol("if_comp_pred", false), parse_tree) collapse_node(symbol("declaration_block", false), parse_tree) collapse_node(symbol("type_list", false), parse_tree) collapse_node(symbol("opt_type_list", false), parse_tree) collapse_node(symbol("template_param_list", false), parse_tree) collapse_node(symbol("trait_list", false), parse_tree) collapse_node(symbol("dec_type", false), parse_tree) } fun remove_node(remove: symbol, parse_tree: *tree) { var to_process = stack<*tree>() to_process.push(parse_tree) while(!to_process.empty()) { var node = to_process.pop() for (var i = 0; i < node->children.size; i++;) { if (!node->children[i] || node->children[i]->data.equal_wo_data(remove)) { node->children.remove(i) i--; } else { to_process.push(node->children[i]) } } } } fun collapse_node(remove: symbol, parse_tree: *tree) { var to_process = stack<*tree>() to_process.push(parse_tree) while(!to_process.empty()) { var node = to_process.pop() for (var i = 0; i < node->children.size; i++;) { if (node->children[i]->data.equal_wo_data(remove)) { var add_children = node->children[i]->children; // stick child's children between the current children divided // on i, without including i node->children = node->children.slice(0,i) + add_children + node->children.slice(i+1,-1) i--; } else { to_process.push(node->children[i]) } } } }