Files
kraken/k.krak

443 lines
19 KiB
Plaintext
Raw Normal View History

2018-06-14 00:08:55 -04:00
import io:*
import grammer:*
import lexer:*
import parser:*
import str:*
import serialize:*
import os:*
import set:*
2018-06-14 00:08:55 -04:00
import vec:*
import vec_literals:*
import poset:*
import util:*
2018-06-18 19:04:24 -04:00
import ast:*
import type2:*
2018-06-18 19:04:24 -04:00
import tree:*
import symbol:*
2018-06-14 00:08:55 -04:00
2018-06-18 19:04:24 -04:00
fun main(argc: int, argv: **char): int {
2018-06-14 00:08:55 -04:00
// delay construction until we either load it or copy construct it
var gram: grammer
var base_dir = str("/").join(str(argv[0]).split('/').slice(0,-2))
var import_paths = vec(str(), base_dir + "/stdlib/")
2018-06-14 00:08:55 -04:00
var file_name = base_dir + "/krakenGrammer.kgm"
var compiled_name = file_name + str(".comp_new")
var compiled_version = 1
var file_contents = read_file(file_name)
var loaded_and_valid = false
if (argc <= 1) {
println("No input file!\n Call with one argument (the input file), or two arguments (input file and output name)")
exit(1)
} else if (str(argv[1]) == "-v" || str(argv[1]) == "--version") {
println("0.0 pre")
exit(0)
}
var opt_str = str("-O2")
var compile_c = true
var positional_args = vec<str>()
for (var i = 1; i < argc; i++;) {
var arg_str = str(argv[i])
if (arg_str.length() > 2 && arg_str.slice(0,2) == "-O") {
opt_str = arg_str
} else if (arg_str == "--no-c-compile") {
compile_c = false
} else {
positional_args.add(arg_str)
}
}
if (file_exists(compiled_name)) {
var pos = 0
var binary = read_file_binary(compiled_name)
var saved_version = 0
unpack(saved_version, pos) = unserialize<int>(binary, pos)
if (saved_version == compiled_version) {
var cached_contents = str()
unpack(cached_contents, pos) = unserialize<str>(binary, pos)
if (cached_contents == file_contents) {
loaded_and_valid = true
pos = gram.unserialize(binary, pos)
} else println("contents different")
} else println("version number different")
} else {
println("cached file does not exist")
}
if (!loaded_and_valid) {
println("Not loaded_and_valid, re-generating and writing out")
gram.copy_construct(&load_grammer(file_contents))
println("grammer loaded, calculate_first_set")
gram.calculate_first_set()
println("grammer loaded, calculate_state_automaton")
gram.calculate_state_automaton()
println("calculated, writing out")
write_file_binary(compiled_name, serialize(compiled_version) + serialize(file_contents) + serialize(gram))
println("done writing")
}
var lex = lexer(gram.terminals)
var parse.construct(&gram, &lex): parser
var kraken_file_name = positional_args[0]
var executable_name = str(".").join(kraken_file_name.split('.').slice(0,-2))
if (positional_args.size > 1)
executable_name = positional_args[1]
var pass_poset = poset<pair<*ast, str>>()
2018-06-18 19:04:24 -04:00
var name_ast_map = map<str, *tree<ast>>()
var passes = map<str, fun(*ast): bool>()
// resolves a single import
passes[str("import_resolver")] = fun(import_binding: *ast): bool {
var file_path = binding_str(import_binding)
println("Running import resolver for" + file_path)
if (!name_ast_map.contains_key(file_path)) {
printerr(file_path + ", ")
var parse_tree = parse.parse_input(read_file(file_path), file_path)
trim(parse_tree)
name_ast_map[file_path] = syntax_to_ast(file_path, parse_tree, import_paths)
}
set_bindings(import_binding, name_ast_map[file_path])
return true
}
// ensures that all imports reachable from this one are resolved
passes[str("import_checker")] = fun(import_binding: *ast): bool {
var all_resolved = true
var file_path = binding_str(import_binding)
println("Running import checker for " + file_path)
name_ast_map[file_path]->children.for_each(fun(n: *tree<ast>) {
match (n->data) {
ast::_import(b) {
if (!bound(b.first)) {
all_resolved = false
pass_poset.add_relationship(make_pair(import_binding, str("import_checker")), make_pair(b.first, str("import_resolver")))
println(to_string(*b.first) + " is not bound!")
} else {
println(to_string(*b.first) + " is bound!")
}
}
}
})
return all_resolved
2018-06-14 00:08:55 -04:00
}
var top_binding = make_binding(kraken_file_name)
pass_poset.add_relationship(make_pair(top_binding, str("import_checker")), make_pair(top_binding, str("import_resolver")))
2018-06-14 00:08:55 -04:00
while (pass_poset.size() != 0) {
var file_pass = pass_poset.top()
printlnerr("doing pass " + file_pass.second + " on " + to_string(*file_pass.first))
var done = passes[file_pass.second](file_pass.first)
if (done)
pass_poset.remove(file_pass)
2018-06-14 00:08:55 -04:00
}
2018-06-18 19:04:24 -04:00
println()
println()
println("Finished with trees:")
name_ast_map.for_each(fun(key: str, value: *tree<ast>) {
printlnerr(key + ":")
print_tree(value, 1)
printlnerr("done")
})
2018-06-14 00:08:55 -04:00
var kraken_c_output_name = kraken_file_name + ".c"
var c_code = str("//don't you wish this was real kraken\n")
var c_flags = str("")
write_file(kraken_c_output_name, c_code)
if (compile_c) {
var compile_string = "cc -g " + opt_str + " -Wno-int-to-pointer-cast -Wno-pointer-to-int-cast -Wno-incompatible-pointer-types -std=c99 " + c_flags + " " + kraken_c_output_name + " -o " + executable_name
/*printlnerr(compile_string)*/
/*system(compile_string)*/
}
return 0
}
2018-06-18 19:04:24 -04:00
var bindings: *vec<*ast>
fun make_binding(s: str): *ast {
var binding = new<ast>()->copy_construct(&ast::_binding(make_triple(s, vec<*type>(), null<tree<ast>>())))
if (bindings == null<vec<*ast>>())
bindings = new<vec<*ast>>()->construct()
bindings->add(binding)
return binding
}
fun set_bindings(binding: *tree<ast>, to: *tree<ast>) {
set_bindings(&binding->data, to)
}
fun set_bindings(binding: *ast, to: *tree<ast>) {
match(*binding) {
ast::_binding(b) {
var from = binding->_binding.third
// don't set null, that will set all unbound ones
if (from == null<tree<ast>>()) {
binding->_binding.third = to
return
}
for (var i = 0; i < bindings->size; i++;)
if (bindings->get(i)->_binding.third == from)
bindings->get(i)->_binding.third = to
return
}
}
error("trying to set bindings on not a binding")
}
fun bound(binding: *ast): bool {
match(*binding) {
ast::_binding(b) return b.third != null<tree<ast>>()
}
error("Trying to check bound for not a binding")
}
fun binding_str(binding: *ast): str {
match(*binding) {
ast::_binding(b) return b.first
}
error("Trying to get name for not a binding")
}
fun syntax_to_ast(file_name: str, syntax: *tree<symbol>, import_paths: ref vec<str>): *tree<ast> {
var resolve_import_file = fun(file_name: str): str {
var file_path = str()
for (var i = 0; i < import_paths.size; i++;) {
if (file_exists(import_paths[i] + file_name)) {
if (file_path != "")
error("File: " + file_name + ", found in multiple import paths - at least two of [" + str(",").join(import_paths) + "]")
file_path = import_paths[i] + file_name
}
}
if (file_path == "")
error("File: " + file_name + ", not found in any import path - none of [" + str(",").join(import_paths) + "]")
return file_path
}
var syntax_to_ast_helper: fun(*tree<symbol>): *tree<ast> = fun(syntax: *tree<symbol>): *tree<ast> {
if (syntax->data.name == "import") {
return _import(make_binding(resolve_import_file(concat(syntax->children[1]) + ".krak")), from_vector(syntax->children.slice(2,-1).filter(fun(s:*tree<symbol>):bool {
return s->data.name == "identifier" || s->data.data == "*"
}).map(concat)), vec(syntax_to_ast_helper(syntax->children[1])))
} else if (syntax->data.name == "function")
return _function(concat(get_node("func_identifier", syntax)), null<type>(),
(get_nodes("typed_parameter", syntax) +
get_nodes("statement", syntax)).map(syntax_to_ast_helper))
else if (syntax->data.name == "typed_parameter")
return _identifier(concat(get_node("identifier", syntax)), null<type>())
else if (syntax->data.name == "type_def") {
var n = _type_def(concat(get_node("identifier", syntax)),
get_nodes("declaration_statement", syntax).map(syntax_to_ast_helper))
var template = get_node("template_dec", syntax)
if (template == null<tree<symbol>>()) {
return n
} else {
return _template(n->data._type_def, from_vector(get_nodes("template_param", template).map(concat)), vec(n))
}
} else if (syntax->data.name == "adt_def") {
var n = _adt_def(concat(get_node("identifier", syntax)),
get_nodes("adt_option", syntax).map(fun(s: *tree<symbol>): *tree<ast> {
return _identifier(concat(get_node("identifier", s)), null<type>())
}))
var template = get_node("template_dec", syntax)
if (template == null<tree<symbol>>()) {
return n
} else {
return _template(n->data._adt_def, from_vector(get_nodes("template_param", template).map(concat)), vec(n))
}
} else if (syntax->data.name == "statement")
return syntax_to_ast_helper(syntax->children[0])
else if (syntax->data.name == "code_block")
return _block(syntax->children.map(syntax_to_ast_helper))
else if (syntax->data.name == "return_statement")
return _return(syntax->children.map(syntax_to_ast_helper))
else if (syntax->data.name == "defer_statement")
return _defer(syntax->children.map(syntax_to_ast_helper))
else if (syntax->data.name == "match_statement") {
return _match(vec(syntax_to_ast_helper(get_node("boolean_expression", syntax))) +
get_nodes("case_statement", syntax).map(fun(s: *tree<symbol>): *tree<ast> {
return _case(s->children.map(syntax_to_ast_helper))
}))
} else if (syntax->data.name == "declaration_statement") {
var children = vec(_identifier(concat(get_node("identifier", syntax)), null<type>()))
children += get_nodes("boolean_expression", syntax).map(syntax_to_ast_helper)
return _declaration(children)
} else if (syntax->data.name == "assignment_statement")
return _assignment(vec(_binding(concat(syntax->children[1]), null<tree<ast>>()),
syntax_to_ast_helper(syntax->children[0]),
syntax_to_ast_helper(syntax->children[2])))
else if (syntax->data.name == "function_call")
return _call(vec(syntax_to_ast_helper(syntax->children[0])) + get_nodes("parameter", syntax).map(fun(s: *tree<symbol>): *tree<ast> {
return syntax_to_ast_helper(s->children[0])
}))
else if (syntax->data.name == "boolean_expression" ||
syntax->data.name == "and_boolean_expression" ||
syntax->data.name == "bitwise_or" ||
syntax->data.name == "bitwise_xor" ||
syntax->data.name == "bitwise_and" ||
syntax->data.name == "bool_exp" ||
syntax->data.name == "expression" ||
syntax->data.name == "shiftand" ||
syntax->data.name == "term" ||
syntax->data.name == "factor" ||
syntax->data.name == "unarad" ||
syntax->data.name == "access_operation") {
if (syntax->children.size == 1) {
return syntax_to_ast_helper(syntax->children[0])
} else if (syntax->children.size == 2) {
var template_inst = get_node("template_inst", syntax)
if (template_inst != null<tree<symbol>>()) {
if (syntax->children[0]->data.name != "scoped_identifier")
error(syntax, "Unexpected template instantiation (not on an identifier)")
return _binding(concat(syntax->children[0]) + "<somin>", vec<*type>(), null<tree<ast>>())
} else if (syntax->children[0]->data.terminal) {
return _call(vec(_binding(concat(syntax->children[0]), null<tree<ast>>()),
syntax_to_ast_helper(syntax->children[1])))
} else {
return _call(vec(_binding(concat(syntax->children[1]), null<tree<ast>>()),
syntax_to_ast_helper(syntax->children[0])))
}
} else {
return _call(vec(_binding(concat(syntax->children[1]), null<tree<ast>>()),
syntax_to_ast_helper(syntax->children[0]),
syntax_to_ast_helper(syntax->children[2])))
}
} else if (syntax->data.name == "number")
return _value(concat(syntax), null<type>())
else if (syntax->data.name == "scoped_identifier" || syntax->data.name == "identifier")
return _binding(concat(syntax), null<tree<ast>>())
else
return null<tree<ast>>()
}
var result = _translation_unit(file_name, syntax->children.map(syntax_to_ast_helper))
2018-06-18 19:04:24 -04:00
printlnerr("made")
print_tree(result, 1)
printlnerr("from")
print_tree(syntax, 1)
return result
}
fun print_tree<T>(t: *tree<T>, level: int) {
printlnerr("\t" * level + to_string(t->data))
for (var i = 0; i < t->children.size; i++;)
if (t->children[i])
print_tree(t->children[i], level+1)
else
printlnerr("\t" * (level + 1) + "null!")
}
fun get_node(lookup: *char, parent: *tree<symbol>): *tree<symbol> {
return get_node(str(lookup), parent)
}
fun get_node(lookup: str, parent: *tree<symbol>): *tree<symbol> {
var results = get_nodes(lookup, parent)
if (results.size > 1)
error(parent, "get node too many results!")
if (results.size)
return results[0]
return null<tree<symbol>>()
}
fun get_nodes(lookup: *char, parent: *tree<symbol>): vec<*tree<symbol>> {
return get_nodes(str(lookup), parent)
}
fun get_nodes(lookup: str, parent: *tree<symbol>): vec<*tree<symbol>> {
return parent->children.filter(fun(node: *tree<symbol>):bool return node->data.name == lookup;)
}
fun concat(node: *tree<symbol>): str {
var str.construct(): str
if (node->data.data != "no_value")
str += node->data.data
node->children.for_each(fun(child: *tree<symbol>) str += concat(child);)
return str
}
fun get_first_terminal(source: *tree<symbol>): *tree<symbol> {
if (!source)
return null<tree<symbol>>()
if (source->data.terminal)
return source
if (source->children.size == 0)
return null<tree<symbol>>()
return get_first_terminal(source->children.first())
}
fun error(source: *tree<symbol>, message: *char) error(source, str(message));
fun error(source: *tree<symbol>, message: str) {
var first = get_first_terminal(source)
if (first)
error("***error |" + concat(source) + "| *** " + first->data.source + ": " + first->data.position + " " + message)
error(message)
}
fun trim(parse_tree: *tree<symbol>) {
remove_node(symbol("$NULL$", false), parse_tree)
remove_node(symbol("WS", false), parse_tree)
// the terminals have " around them, which we have to escape
remove_node(symbol("\"\\(\"", true), parse_tree)
remove_node(symbol("\"\\)\"", true), parse_tree)
remove_node(symbol("\"template\"", true), parse_tree)
remove_node(symbol("\"return\"", true), parse_tree)
remove_node(symbol("\"defer\"", true), parse_tree)
remove_node(symbol("\";\"", true), parse_tree)
remove_node(symbol("line_end", false), parse_tree)
remove_node(symbol("\"{\"", true), parse_tree)
remove_node(symbol("\"}\"", true), parse_tree)
remove_node(symbol("\"(\"", true), parse_tree)
remove_node(symbol("\")\"", true), parse_tree)
remove_node(symbol("\"if\"", true), parse_tree)
remove_node(symbol("\"while\"", true), parse_tree)
remove_node(symbol("\"__if_comp__\"", true), parse_tree)
remove_node(symbol("\"comp_simple_passthrough\"", true), parse_tree)
/*remove_node(symbol("obj_nonterm", false), parse_tree)*/
remove_node(symbol("adt_nonterm", false), parse_tree)
collapse_node(symbol("case_statement_list", false), parse_tree)
collapse_node(symbol("opt_param_assign_list", false), parse_tree)
collapse_node(symbol("param_assign_list", false), parse_tree)
collapse_node(symbol("opt_typed_parameter_list", false), parse_tree)
collapse_node(symbol("opt_parameter_list", false), parse_tree)
collapse_node(symbol("intrinsic_parameter_list", false), parse_tree)
collapse_node(symbol("identifier_list", false), parse_tree)
collapse_node(symbol("adt_option_list", false), parse_tree)
collapse_node(symbol("statement_list", false), parse_tree)
collapse_node(symbol("parameter_list", false), parse_tree)
collapse_node(symbol("typed_parameter_list", false), parse_tree)
collapse_node(symbol("unorderd_list_part", false), parse_tree)
collapse_node(symbol("if_comp_pred", false), parse_tree)
collapse_node(symbol("declaration_block", false), parse_tree)
collapse_node(symbol("type_list", false), parse_tree)
collapse_node(symbol("opt_type_list", false), parse_tree)
collapse_node(symbol("template_param_list", false), parse_tree)
collapse_node(symbol("trait_list", false), parse_tree)
collapse_node(symbol("dec_type", false), parse_tree)
}
fun remove_node(remove: symbol, parse_tree: *tree<symbol>) {
var to_process = stack<*tree<symbol>>()
to_process.push(parse_tree)
while(!to_process.empty()) {
var node = to_process.pop()
for (var i = 0; i < node->children.size; i++;) {
if (!node->children[i] || node->children[i]->data.equal_wo_data(remove)) {
node->children.remove(i)
i--;
} else {
to_process.push(node->children[i])
}
}
}
}
fun collapse_node(remove: symbol, parse_tree: *tree<symbol>) {
var to_process = stack<*tree<symbol>>()
to_process.push(parse_tree)
while(!to_process.empty()) {
var node = to_process.pop()
for (var i = 0; i < node->children.size; i++;) {
if (node->children[i]->data.equal_wo_data(remove)) {
var add_children = node->children[i]->children;
// stick child's children between the current children divided
// on i, without including i
node->children = node->children.slice(0,i) +
add_children + node->children.slice(i+1,-1)
i--;
} else {
to_process.push(node->children[i])
}
}
}
2018-06-14 00:08:55 -04:00
}