import io:* import grammer:* import lexer:* import parser:* import str:* import serialize:* import os:* import set:* import vec:* import vec_literals:* import poset:* import util:* import ast:* import tree:* import symbol:* fun main(argc: int, argv: **char): int { // delay construction until we either load it or copy construct it var gram: grammer var base_dir = str("/").join(str(argv[0]).split('/').slice(0,-2)) var file_name = base_dir + "/krakenGrammer.kgm" var compiled_name = file_name + str(".comp_new") var compiled_version = 1 var file_contents = read_file(file_name) var loaded_and_valid = false if (argc <= 1) { println("No input file!\n Call with one argument (the input file), or two arguments (input file and output name)") exit(1) } else if (str(argv[1]) == "-v" || str(argv[1]) == "--version") { println("0.0 pre") exit(0) } var opt_str = str("-O2") var compile_c = true var positional_args = vec() for (var i = 1; i < argc; i++;) { var arg_str = str(argv[i]) if (arg_str.length() > 2 && arg_str.slice(0,2) == "-O") { opt_str = arg_str } else if (arg_str == "--no-c-compile") { compile_c = false } else { positional_args.add(arg_str) } } if (file_exists(compiled_name)) { var pos = 0 var binary = read_file_binary(compiled_name) var saved_version = 0 unpack(saved_version, pos) = unserialize(binary, pos) if (saved_version == compiled_version) { var cached_contents = str() unpack(cached_contents, pos) = unserialize(binary, pos) if (cached_contents == file_contents) { loaded_and_valid = true pos = gram.unserialize(binary, pos) } else println("contents different") } else println("version number different") } else { println("cached file does not exist") } if (!loaded_and_valid) { println("Not loaded_and_valid, re-generating and writing out") gram.copy_construct(&load_grammer(file_contents)) println("grammer loaded, calculate_first_set") gram.calculate_first_set() println("grammer loaded, calculate_state_automaton") gram.calculate_state_automaton() println("calculated, writing out") write_file_binary(compiled_name, serialize(compiled_version) + serialize(file_contents) + serialize(gram)) println("done writing") } var lex = lexer(gram.terminals) var parse.construct(&gram, &lex): parser var kraken_file_name = positional_args[0] var executable_name = str(".").join(kraken_file_name.split('.').slice(0,-2)) if (positional_args.size > 1) executable_name = positional_args[1] var pass_poset = poset>() var name_ast_map = map>() var import_paths = vec(str(), base_dir + "/stdlib/") var passes = vec>() passes = vec( fun(file_name: str): *tree { var file = str() for (var i = 0; i < import_paths.size; i++;) { if (file_exists(import_paths[i] + file_name)) { if (file != "") error("File: " + file_name + ", found in multiple import paths - at least two of [" + str(",").join(import_paths) + "]") file = read_file(import_paths[i] + file_name) } } if (file == "") error("File: " + file_name + ", not found in any import path - none of [" + str(",").join(import_paths) + "]") printerr(file_name + ", ") var parse_tree = parse.parse_input(file, file_name) trim(parse_tree) return syntax_to_ast(file_name, parse_tree) }, fun(file_name: str): *tree { println("Checking for imports in " + file_name) name_ast_map[file_name]->children.for_each(fun(n: *tree) { match (n->data) { ast::_import(b) { var imported_file_name = n->children[0]->data._identifier.first + ".krak" for (var i = 0; i < passes.size; i++;) { if (i == 0) pass_poset.add_relationship(make_pair(file_name, 2), make_pair(imported_file_name, 0)) else pass_poset.add_relationship(make_pair(imported_file_name, i), make_pair(imported_file_name, i-1)) } } } }) return name_ast_map[file_name] }, fun(file_name: str): *tree { println("Doing thing 3 to " + file_name) return name_ast_map[file_name] } ) for (var i = 0; i < passes.size; i++;) { if (i == 0) pass_poset.add_vertex(make_pair(kraken_file_name, i)) else pass_poset.add_relationship(make_pair(kraken_file_name, i), make_pair(kraken_file_name, i-1)) } while (pass_poset.size() != 0) { var file_pass = pass_poset.pop() printlnerr("doing pass " + to_string(file_pass.second) + " on " + file_pass.first) name_ast_map[file_pass.first] = passes[file_pass.second](file_pass.first) } println() println() println("Finished with trees:") name_ast_map.for_each(fun(key: str, value: *tree) { printlnerr(key + ":") print_tree(value, 1) printlnerr("done") }) var kraken_c_output_name = kraken_file_name + ".c" var c_code = str("//don't you wish this was real kraken\n") var c_flags = str("") write_file(kraken_c_output_name, c_code) if (compile_c) { var compile_string = "cc -g " + opt_str + " -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-incompatible-pointer-types -std=c99 " + c_flags + " " + kraken_c_output_name + " -o " + executable_name /*printlnerr(compile_string)*/ /*system(compile_string)*/ } return 0 } fun syntax_to_ast(file_name: str, syntax: *tree): *tree { var syntax_to_ast_helper: fun(*tree): *tree = fun(syntax: *tree): *tree { printlnerr("syntax_to_ast " + syntax->data.name) if (syntax->data.name == "import") { return _import(from_vector(syntax->children.slice(2,-1).filter(fun(s:*tree):bool { return s->data.name == "identifier" || s->data.data == "*" }).map(fun(s: *tree): str { return concat(s) })), vec(syntax_to_ast_helper(syntax->children[1]))) } else if (syntax->data.name == "function") return _function(concat(get_node("func_identifier", syntax)), null(), (get_nodes("typed_parameter", syntax) + get_nodes("statement", syntax)).map(syntax_to_ast_helper)) else if (syntax->data.name == "typed_parameter") return _identifier(concat(get_node("identifier", syntax)), null()) else if (syntax->data.name == "type_def") return _type_def(concat(get_node("identifier", syntax)), get_nodes("declaration_statement", syntax).map(syntax_to_ast_helper)) else if (syntax->data.name == "adt_def") return _type_def(concat(get_node("identifier", syntax)), get_nodes("adt_option", syntax).map(fun(s: *tree): *tree { return _identifier(concat(get_node("identifier", s)), null()) })) else if (syntax->data.name == "statement") return syntax_to_ast_helper(syntax->children[0]) else if (syntax->data.name == "code_block") return _block(syntax->children.map(syntax_to_ast_helper)) else if (syntax->data.name == "return_statement") return _return(syntax->children.map(syntax_to_ast_helper)) else if (syntax->data.name == "defer_statement") return _defer(syntax->children.map(syntax_to_ast_helper)) else if (syntax->data.name == "match_statement") { return _match(vec(syntax_to_ast_helper(get_node("boolean_expression", syntax))) + get_nodes("case_statement", syntax).map(fun(s: *tree): *tree { return _case(s->children.map(syntax_to_ast_helper)) })) } else if (syntax->data.name == "declaration_statement") { var children = vec(_identifier(concat(get_node("identifier", syntax)), null())) children += get_nodes("boolean_expression", syntax).map(syntax_to_ast_helper) return _declaration(children) } else if (syntax->data.name == "assignment_statement") return _assignment(vec(_binding(concat(syntax->children[1]), null>()), syntax_to_ast_helper(syntax->children[0]), syntax_to_ast_helper(syntax->children[2]))) else if (syntax->data.name == "function_call") return _call(syntax->children.map(fun(s: *tree): *tree { return syntax_to_ast_helper(s->children[0]) })) else if (syntax->data.name == "boolean_expression" || syntax->data.name == "and_boolean_expression" || syntax->data.name == "bitwise_or" || syntax->data.name == "bitwise_xor" || syntax->data.name == "bitwise_and" || syntax->data.name == "bool_exp" || syntax->data.name == "expression" || syntax->data.name == "shiftand" || syntax->data.name == "term" || syntax->data.name == "factor" || syntax->data.name == "unarad" || syntax->data.name == "access_operation") { if (syntax->children.size == 1) { return syntax_to_ast_helper(syntax->children[0]) } else if (syntax->children.size == 2) { if (syntax->children[0]->data.terminal) { return _call(vec(_binding(concat(syntax->children[0]), null>()), syntax_to_ast_helper(syntax->children[1]))) } else { return _call(vec(_binding(concat(syntax->children[1]), null>()), syntax_to_ast_helper(syntax->children[0]))) } } else { return _call(vec(_binding(concat(syntax->children[1]), null>()), syntax_to_ast_helper(syntax->children[0]), syntax_to_ast_helper(syntax->children[2]))) } } else if (syntax->data.name == "number") return _value(concat(syntax), null()) else if (syntax->data.name == "scoped_identifier" || syntax->data.name == "identifier") return _binding(concat(syntax), null>()) else return null>() } var result = _translation_unit(file_name, syntax->children.map(syntax_to_ast_helper)) printlnerr("made") print_tree(result, 1) printlnerr("from") print_tree(syntax, 1) return result } fun print_tree(t: *tree, level: int) { printlnerr("\t" * level + to_string(t->data)) for (var i = 0; i < t->children.size; i++;) if (t->children[i]) print_tree(t->children[i], level+1) else printlnerr("\t" * (level + 1) + "null!") } fun get_node(lookup: *char, parent: *tree): *tree { return get_node(str(lookup), parent) } fun get_node(lookup: str, parent: *tree): *tree { var results = get_nodes(lookup, parent) if (results.size > 1) error(parent, "get node too many results!") if (results.size) return results[0] return null>() } fun get_nodes(lookup: *char, parent: *tree): vec<*tree> { return get_nodes(str(lookup), parent) } fun get_nodes(lookup: str, parent: *tree): vec<*tree> { return parent->children.filter(fun(node: *tree):bool return node->data.name == lookup;) } fun concat(node: *tree): str { var str.construct(): str if (node->data.data != "no_value") str += node->data.data node->children.for_each(fun(child: *tree) str += concat(child);) return str } fun get_first_terminal(source: *tree): *tree { if (!source) return null>() if (source->data.terminal) return source if (source->children.size == 0) return null>() return get_first_terminal(source->children.first()) } fun error(source: *tree, message: *char) error(source, str(message)); fun error(source: *tree, message: str) { var first = get_first_terminal(source) if (first) error("***error |" + concat(source) + "| *** " + first->data.source + ": " + first->data.position + " " + message) error(message) } fun trim(parse_tree: *tree) { remove_node(symbol("$NULL$", false), parse_tree) remove_node(symbol("WS", false), parse_tree) // the terminals have " around them, which we have to escape remove_node(symbol("\"\\(\"", true), parse_tree) remove_node(symbol("\"\\)\"", true), parse_tree) remove_node(symbol("\"template\"", true), parse_tree) remove_node(symbol("\"return\"", true), parse_tree) remove_node(symbol("\"defer\"", true), parse_tree) remove_node(symbol("\";\"", true), parse_tree) remove_node(symbol("line_end", false), parse_tree) remove_node(symbol("\"{\"", true), parse_tree) remove_node(symbol("\"}\"", true), parse_tree) remove_node(symbol("\"(\"", true), parse_tree) remove_node(symbol("\")\"", true), parse_tree) remove_node(symbol("\"if\"", true), parse_tree) remove_node(symbol("\"while\"", true), parse_tree) remove_node(symbol("\"__if_comp__\"", true), parse_tree) remove_node(symbol("\"comp_simple_passthrough\"", true), parse_tree) /*remove_node(symbol("obj_nonterm", false), parse_tree)*/ remove_node(symbol("adt_nonterm", false), parse_tree) collapse_node(symbol("case_statement_list", false), parse_tree) collapse_node(symbol("opt_param_assign_list", false), parse_tree) collapse_node(symbol("param_assign_list", false), parse_tree) collapse_node(symbol("opt_typed_parameter_list", false), parse_tree) collapse_node(symbol("opt_parameter_list", false), parse_tree) collapse_node(symbol("intrinsic_parameter_list", false), parse_tree) collapse_node(symbol("identifier_list", false), parse_tree) collapse_node(symbol("adt_option_list", false), parse_tree) collapse_node(symbol("statement_list", false), parse_tree) collapse_node(symbol("parameter_list", false), parse_tree) collapse_node(symbol("typed_parameter_list", false), parse_tree) collapse_node(symbol("unorderd_list_part", false), parse_tree) collapse_node(symbol("if_comp_pred", false), parse_tree) collapse_node(symbol("declaration_block", false), parse_tree) collapse_node(symbol("type_list", false), parse_tree) collapse_node(symbol("opt_type_list", false), parse_tree) collapse_node(symbol("template_param_list", false), parse_tree) collapse_node(symbol("trait_list", false), parse_tree) collapse_node(symbol("dec_type", false), parse_tree) } fun remove_node(remove: symbol, parse_tree: *tree) { var to_process = stack<*tree>() to_process.push(parse_tree) while(!to_process.empty()) { var node = to_process.pop() for (var i = 0; i < node->children.size; i++;) { if (!node->children[i] || node->children[i]->data.equal_wo_data(remove)) { node->children.remove(i) i--; } else { to_process.push(node->children[i]) } } } } fun collapse_node(remove: symbol, parse_tree: *tree) { var to_process = stack<*tree>() to_process.push(parse_tree) while(!to_process.empty()) { var node = to_process.pop() for (var i = 0; i < node->children.size; i++;) { if (node->children[i]->data.equal_wo_data(remove)) { var add_children = node->children[i]->children; // stick child's children between the current children divided // on i, without including i node->children = node->children.slice(0,i) + add_children + node->children.slice(i+1,-1) i--; } else { to_process.push(node->children[i]) } } } }