From 38ec4abc01788d3ddcc7e0f2c0bf6c21ee902d70 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 5 Apr 2016 03:14:56 -0400 Subject: [PATCH] Added file name + line number to symbols and use it for reasonable error handling now, added a version number to the compiled grammer --- kraken.krak | 34 ++++++++++----------- stdlib/ast_transformation.krak | 49 ++++++++++++++++++++++--------- stdlib/lexer.krak | 9 +++++- stdlib/parser.krak | 2 ++ stdlib/symbol.krak | 24 ++++++++++++--- tests/error_test.krak | 13 ++++++++ tests/test_lexer.expected_results | 34 ++++++++++----------- 7 files changed, 111 insertions(+), 54 deletions(-) create mode 100644 tests/error_test.krak diff --git a/kraken.krak b/kraken.krak index 635b967..a709a61 100644 --- a/kraken.krak +++ b/kraken.krak @@ -21,6 +21,7 @@ fun main(argc: int, argv: **char):int { println(file_name) var compiled_name = file_name + string(".comp_new") + var compiled_version = 1 var file_contents = read_file(file_name) var loaded_and_valid = false @@ -29,23 +30,20 @@ fun main(argc: int, argv: **char):int { var pos = 0 var binary = read_file_binary(compiled_name) println("read file!") - var cached_contents = string() - unpack(cached_contents, pos) = unserialize(binary, pos) - if (cached_contents == file_contents) { - println("loaded_and_valid, using cached version!") - loaded_and_valid = true - /*unpack(gram, pos) = unserialize(binary, pos)*/ - // skip unnecessary copies this way - pos = gram.unserialize(binary, pos) - println("finished unserializeing!!") - } else { - /*println("file contents do not match:")*/ - /*println("CACHED:")*/ - /*println(cached_contents)*/ - /*println("REAL:")*/ - /*println(file_contents)*/ - /*println("END")*/ - } + var saved_version = 0 + unpack(saved_version, pos) = unserialize(binary, pos) + if (saved_version == compiled_version) { + var cached_contents = string() + unpack(cached_contents, pos) = unserialize(binary, pos) + if (cached_contents == file_contents) { + println("loaded_and_valid, using cached version!") + loaded_and_valid = true + /*unpack(gram, pos) = unserialize(binary, pos)*/ + // skip unnecessary copies this way + pos = gram.unserialize(binary, pos) + println("finished unserializeing!!") + } else println("contents different") + } else println("version number different") } else { println("cached file does not exist") } @@ -59,7 +57,7 @@ fun main(argc: int, argv: **char):int { println("grammer loaded, calculate_state_automaton") gram.calculate_state_automaton() println("calculated, writing out") - write_file_binary(compiled_name, serialize(file_contents) + serialize(gram)) + write_file_binary(compiled_name, serialize(compiled_version) + serialize(file_contents) + serialize(gram)) println("done writing") } diff --git a/stdlib/ast_transformation.krak b/stdlib/ast_transformation.krak index 3a6da79..dce2b00 100644 --- a/stdlib/ast_transformation.krak +++ b/stdlib/ast_transformation.krak @@ -222,11 +222,16 @@ obj ast_transformation (Object) { var return_type = null() if (typed_return_node) return_type = transform_type(get_node("type", typed_return_node), scope, template_replacements) else return_type = type_ptr(base_type::void_return()) + if (return_type->is_none()) + error(node, "return type none") // transform parameters var parameters = vector<*ast_node>() get_nodes("typed_parameter", node).for_each(fun(child: *tree) { // note the temporary null() which gets replaced below, as the dependency is circular - parameters.add(ast_identifier_ptr(concat_symbol_tree(get_node("identifier", child)), transform_type(get_node("type", child), scope, template_replacements), null())) + var param_type = transform_type(get_node("type", child), scope, template_replacements) + if (param_type->is_none()) + error(child, "parameter type none") + parameters.add(ast_identifier_ptr(concat_symbol_tree(get_node("identifier", child)), param_type, null())) }) // figure out function type and make function_node var function_node = ast_function_ptr(function_name, type_ptr(parameters.map(fun(parameter: *ast_node): *type return parameter->identifier.type;), return_type), parameters) @@ -351,12 +356,12 @@ obj ast_transformation (Object) { key.for_each(fun(t: type) hasTypStr += t.to_string(false) + " ";) /*print(hasTypStr)*/ if (typeStr == hasTypStr) - error("they're equal but really shouldnt be") + error(node, "they're equal but really shouldnt be") /*println()*/ }) /*println("donr")*/ if (real_types.any_true(fun(t: *type): bool return t->is_none() || t ->is_template_type();)) { - error("Instantiating types for templated object are not all real types!") + error(node, "Instantiating types for templated object are not all real types!") } inst_type = first_pass_type_def(results[i]->template.syntax_node, results[i], true) // no change up it's name so we can see that it's instantiated when printed out and keep track of it @@ -372,7 +377,7 @@ obj ast_transformation (Object) { } if (fitting_types.size == 0) { println("no working templated object found") - error("FREAK OUT AUTOMATON") + error(node, "FREAK OUT AUTOMATON") return null() } return fitting_types.max(fun(a: pair<*ast_node, int>, b: pair<*ast_node, int>): bool return a.second < b.second;).first->type_def.self_type->clone_with_indirection(indirection, is_ref) @@ -466,7 +471,7 @@ obj ast_transformation (Object) { return transform_value(node, scope) } print("FAILED TO TRANSFORM: "); print(name + ": "); println(concat_symbol_tree(node)) - error("FAILED TO TRANSFORM") + error(node, "FAILED TO TRANSFORM") return null() } fun transform_all(nodes: vector<*tree>, scope: *ast_node, template_replacements: map): vector<*ast_node> { @@ -593,7 +598,8 @@ obj ast_transformation (Object) { if (!type_syntax_node) identifier->identifier.type = get_ast_type(expression)->clone_without_ref() } - if (!identifier->identifier.type) error("declaration statement with no type or expression from which to inference type") + if (!identifier->identifier.type) error(node, "declaration statement with no type or expression from which to inference type") + if (identifier->identifier.type->is_none()) error(node, "declaration statement with bad type") var declaration = ast_declaration_statement_ptr(identifier, expression) // ok, deal with the possible init position method call if (identifiers.size == 2) { @@ -713,7 +719,7 @@ obj ast_transformation (Object) { var to_ret = ast_case_statement_ptr() var the_adts = scope_lookup(concat_symbol_tree(get_node("scoped_identifier", get_node("scoped_identifier", node))), scope) if (the_adts.size != 1) - error(string("the number of adts found was not 1, it was ") + the_adts.size + " for " + concat_symbol_tree(get_node("scoped_identifier", node))) + error(node, string("the number of adts found was not 1, it was ") + the_adts.size + " for " + concat_symbol_tree(get_node("scoped_identifier", node))) var the_adt = the_adts[0] var the_option_name = concat_symbol_tree(get_node("identifier", get_node("scoped_identifier", node))) var the_option = the_adt->adt_def.options.find_first_satisfying(fun(option: *ast_node): bool return option->identifier.name == the_option_name;) @@ -850,7 +856,7 @@ obj ast_transformation (Object) { search_type::function(type_vec) possible_value = find_or_instantiate_template_function(concat_symbol_tree(node->children[0]), null>(), scope, type_vec, template_replacements, map()); } if (!possible_value) - error(concat_symbol_tree(node) + ": HAS NO POSSIBLE FUNCTION OR FUNCTION TEMPLATE SOLUTIONS") + error(node, concat_symbol_tree(node) + ": HAS NO POSSIBLE FUNCTION OR FUNCTION TEMPLATE SOLUTIONS") return possible_value } else if (node->children.size == 2) { var template_inst = get_node("template_inst", node) @@ -859,11 +865,11 @@ obj ast_transformation (Object) { var result = null() match (searching_for) { // I guess this should never happen? - search_type::none() error("TE()) } if (!result) - error("Could not find templated function " + concat_symbol_tree(identifier) + " even though had a template_inst") + error(node, "Could not find templated function " + concat_symbol_tree(identifier) + " even though had a template_inst") return result } var check_if_post = concat_symbol_tree(node->children[1]) @@ -902,7 +908,7 @@ obj ast_transformation (Object) { /*println("PRE TEMPLATE TRY FOR SECOND PARAM")*/ second_param = find_or_instantiate_template_function(method_name, template_inst, get_ast_type(first_param)->type_def, type_vec, template_replacements, inherited_replacements); if (!second_param) { - error("Could not find method " + method_name + " on the right side of (. or ->) " + concat_symbol_tree(node->children[0]) + + error(node, "Could not find method " + method_name + " on the right side of (. or ->) " + concat_symbol_tree(node->children[0]) + ", whole string: " + concat_symbol_tree(node) + ", left type: " + get_ast_type(first_param)->to_string()) } } @@ -1151,7 +1157,7 @@ fun unify_type(template_type: *tree, param_type: *type, new_map: *mapchildren[0]->data.name) println(template_type->children[0]->data.data) - error("TYPE INFERENCE NOT GOOD ENOUGH") + error(template_type, "TYPE INFERENCE NOT GOOD ENOUGH") } } fun function_satisfies_params(node: *ast_node, param_types: vector<*type>): bool { @@ -1263,7 +1269,7 @@ fun get_node(lookup: *char, parent: *tree): *tree { fun get_node(lookup: string, parent: *tree): *tree { var results = get_nodes(lookup, parent) if (results.size > 1) - error("get node too many results!") + error(parent, "get node too many results!") if (results.size) return results[0] return null>() @@ -1284,9 +1290,24 @@ fun add_to_scope(name: string, to_add: *ast_node, add_to: *ast_node) { else add_to_map->set(name, vector(to_add)) } +fun get_first_terminal(source: *tree): *tree { + if (!source) + return null>() + if (source->data.terminal) + return source + if (source->children.size == 0) + return null>() + return get_first_terminal(source->children.first()) +} fun error(message: *char) error(string(message)); -fun error(message: string) { +fun error(source: *tree, message: *char) error(source, string(message)); +fun error(message: string) error(null>(), message); +fun error(source: *tree, message: string) { println("****ERROR****") + source = get_first_terminal(source) + if (source) { + print(source->data.source + ": " + source->data.position + " ") + } println(message) exit(-1) /*while (true){}*/ diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index 626ed15..8667aeb 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -85,7 +85,14 @@ obj lexer (Object) { if (max < 0) return symbol::invalid_symbol() position += max_length - return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position)) + var line_number = fun(str: ref string::string, pos: int): int { + var line_no = 1 + for (var i = 0; i < pos; i++;) + if (str[i] == '\n') + line_no++ + return line_no + } + return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position), line_number(input, position)) } } diff --git a/stdlib/parser.krak b/stdlib/parser.krak index 00cf546..3725ebb 100644 --- a/stdlib/parser.krak +++ b/stdlib/parser.krak @@ -75,6 +75,8 @@ obj parser (Object) { for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) { /*println("current_symbol is ")*/ /*println(current_symbol.to_string())*/ + if (current_symbol != eof_symbol() && current_symbol != invalid_symbol()) + current_symbol.source = name input.addEnd(current_symbol) } input.addEnd(current_symbol) diff --git a/stdlib/symbol.krak b/stdlib/symbol.krak index ae24020..54f66de 100644 --- a/stdlib/symbol.krak +++ b/stdlib/symbol.krak @@ -31,8 +31,11 @@ fun symbol(nameIn: *char, terminalIn: bool, dataIn: *char): symbol { return toRet } -fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol { +fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol return symbol(nameIn, terminalIn, dataIn, 0) + +fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string, position: int): symbol { var toRet.construct(nameIn, terminalIn, dataIn): symbol + toRet.position = position return toRet } @@ -41,32 +44,43 @@ obj symbol (Object, Serializable) { var name: string::string var terminal: bool + var source: string::string + var position: int + fun construct(): *symbol { data.construct() name.construct() + terminal = false + source.construct() + position = 0 return this } fun construct(nameIn: string::string, terminalIn: bool, dataIn: string::string): *symbol { name.construct(nameIn) terminal = terminalIn data.construct(dataIn) + source.construct() + position = 0 return this } fun destruct() { data.destruct() name.destruct() + source.destruct() } fun copy_construct(old: *symbol) { data.copy_construct(&old->data) name.copy_construct(&old->name) terminal = old->terminal + source.copy_construct(&old->source) + position = old->position } fun operator=(old: ref symbol) { destruct() copy_construct(&old) } fun serialize(): vector::vector { - return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal) + return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal) + serialize::serialize(source) + serialize::serialize(position) } fun unserialize(it: ref vector::vector, pos: int): int { /*construct()*/ @@ -75,13 +89,15 @@ obj symbol (Object, Serializable) { pos = data.unserialize(it, pos) pos = name.unserialize(it, pos) util::unpack(terminal, pos) = serialize::unserialize(it, pos) + pos = source.unserialize(it, pos) + util::unpack(position, pos) = serialize::unserialize(it, pos) return pos } fun equal_wo_data(other: ref symbol): bool { return name == other.name && terminal == other.terminal; } fun operator==(other: ref symbol): bool { - return data == other.data && name == other.name && terminal == other.terminal; + return data == other.data && name == other.name && terminal == other.terminal && source == other.source && position == other.position } fun operator!=(other: ref symbol): bool { return !(*this == other); @@ -92,7 +108,7 @@ obj symbol (Object, Serializable) { terminalString = "true" else terminalString = "false" - return name + ": " + data + " " + terminalString + return name + ": " + data + " " + terminalString + "[" + source + ":" + position + "]" } } diff --git a/tests/error_test.krak b/tests/error_test.krak new file mode 100644 index 0000000..94ab785 --- /dev/null +++ b/tests/error_test.krak @@ -0,0 +1,13 @@ +import mem:* +import vector:* + + +fun main():int { + var a = null>() + /*doesnt_exist(2)*/ + /*var b: doesnt_exist*/ + return -1 +} + + + diff --git a/tests/test_lexer.expected_results b/tests/test_lexer.expected_results index 641330d..3822721 100644 --- a/tests/test_lexer.expected_results +++ b/tests/test_lexer.expected_results @@ -1,22 +1,22 @@ -a+: aaaa true -test: test true +a+: aaaa true[:1] +test: test true[:1] old contributed tests -b: b true -b: b true -$EOF$: $EOF$ false +b: b true[:1] +b: b true[:1] +$EOF$: $EOF$ false[:0] -a*: aaa true -b: b true -a*: aa true -b: b true -b: b true -$EOF$: $EOF$ false +a*: aaa true[:1] +b: b true[:1] +a*: aa true[:1] +b: b true[:1] +b: b true[:1] +$EOF$: $EOF$ false[:0] -a|b: b true -$INVALID$: $INVALID$ false +a|b: b true[:1] +$INVALID$: $INVALID$ false[:0] -xyzzy: xyzzy true -$EOF$: $EOF$ false +xyzzy: xyzzy true[:1] +$EOF$: $EOF$ false[:0] -(i|n|t|e)+: intent true -$EOF$: $EOF$ false +(i|n|t|e)+: intent true[:1] +$EOF$: $EOF$ false[:0]