From 58ab3e311fcd62b8ae375b9b20d3ba49f9294037 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 11 Aug 2015 01:07:16 -0400 Subject: [PATCH] Starting to really parse! Fixed the bad syntax trees (reduction copy_construct wasn't actually copying label because param and dest had same name) and fixed the krakenGrammer and CGenerator to allow escaped quotes in regular strings. add_children needs to be finished so the resulting trees don't go nuts with ambiguity, and some situations still cause vector access errors, which is odd. Also added the crazy simple write_file to io and the test_grammer.krak now outputs a syntax_tree.dot --- krakenGrammer.kgm | 2 +- src/CGenerator.cpp | 13 ++++++----- stdlib/io.krak | 13 +++++++++++ stdlib/parser.krak | 49 +++++++++++++++++++++++++++-------------- stdlib/string.krak | 5 +++++ tests/test_grammer.krak | 10 ++++++--- 6 files changed, 66 insertions(+), 26 deletions(-) diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 65fa857..270f3f1 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -134,7 +134,7 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric | numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ; +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| |(\\\"))*\"" ; comment = cpp_comment | c_comment ; cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )* " ; diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 6e5a026..8db8686 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -796,11 +796,14 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc } case value: { - // ok, we now check for it being a string and escape all returns if it is (so that multiline strings work) - if (data.symbol.getName()[0] == '"') { - std::string innerString = strSlice(data.symbol.getName(), 0, 3) == "\"\"\"" - ? strSlice(data.symbol.getName(), 3, -4) - : strSlice(data.symbol.getName(), 1, -2); + // ok, we now check for it being a multiline string and escape all returns if it is (so that multiline strings work) + //if (data.symbol.getName()[0] == '"') { + if (data.symbol.getName()[0] == '"' && strSlice(data.symbol.getName(), 0, 3) == "\"\"\"") { + //bool multiline_str = strSlice(data.symbol.getName(), 0, 3) == "\"\"\""; + //std::string innerString = multiline_str + //? strSlice(data.symbol.getName(), 3, -4) + //: strSlice(data.symbol.getName(), 1, -2); + std::string innerString = strSlice(data.symbol.getName(), 3, -4); std::string newStr; for (auto character: innerString) if (character == '\n') diff --git a/stdlib/io.krak b/stdlib/io.krak index 30f4c69..967e16b 100644 --- a/stdlib/io.krak +++ b/stdlib/io.krak @@ -98,4 +98,17 @@ fun read_file(path: string::string): string::string { } return toRet } +fun write_file(path: string::string, data: string::string) { + var char_path = path.toCharArray() + defer delete(char_path) + var char_data = data.toCharArray() + defer delete(char_data) + __if_comp__ __C__ { + simple_passthrough(char_path,char_data::) """ + FILE *fp = fopen(char_path, "w"); + fprintf(fp, "%s", char_data); + fclose(fp); + """ + } +} diff --git a/stdlib/parser.krak b/stdlib/parser.krak index c6cdb71..0f817bb 100644 --- a/stdlib/parser.krak +++ b/stdlib/parser.krak @@ -89,8 +89,6 @@ obj parser (Object) { var null_symbol_tree = null>() - /*println("looking up")*/ - /*println(input[0].to_string())*/ gram.parse_table.get(0, input[0]).for_each(fun(act: action) { println("for each action") act.print() @@ -100,7 +98,6 @@ obj parser (Object) { to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree)) }) - for (var i = 0; i < input.size; i++;) { if (gss.frontier_is_empty(i)) { print(i) @@ -145,8 +142,13 @@ obj parser (Object) { }) println() println("got path edges") - if (curr_reduction.length != 0) + println("there are this many:") + println(path_edges.size) + if (curr_reduction.length != 0) { path_edges.addEnd(curr_reduction.label) + println("also adding the one from the reduction") + println(curr_reduction.label->data.to_string()) + } var curr_reached = path.last() print("checking shift for state ") print(curr_reached->data) @@ -155,7 +157,8 @@ obj parser (Object) { // if this is the Goal = a type reduction, then skip the actual reduction part. // the shift lookup will fail, and likely other things, and this is our accept // criteria anyway - if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs) + /*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/ + if (curr_reduction.sym == gram.rules[0].lhs) return; var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule println("got shift to") @@ -187,7 +190,7 @@ obj parser (Object) { if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) to_reduce.push(reduction(curr_reached, reduce_rule.lhs, act.rule_position, - new>()->construct(null_symbol()), + get_nullable_parts(reduce_rule), new_label)) }) } @@ -203,12 +206,12 @@ obj parser (Object) { var action_rule = gram.rules[act.state_or_rule] if (fully_reduces_to_null(action_rule)) { to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, - new>()->construct(null_symbol()), - null>() )) + get_nullable_parts(action_rule), + null>() )) } else if (curr_reduction.length != 0) { to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position, - new>()->construct(null_symbol()), - new_label )) + get_nullable_parts(action_rule), + new_label )) } } }) @@ -239,7 +242,7 @@ obj parser (Object) { var reduce_rule = gram.rules[action.state_or_rule] if (!fully_reduces_to_null(reduce_rule)) { to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position, - new>()->construct(null_symbol()), + get_nullable_parts(reduce_rule), new_label )) } }) @@ -262,12 +265,12 @@ obj parser (Object) { if (!fully_reduces_to_null(action_rule)) { println("does not reduce to null") to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position, - new>()->construct(null_symbol()), + get_nullable_parts(action_rule), new_label )) } else { println("does reduce to null") to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, - new>()->construct(null_symbol()), + get_nullable_parts(action_rule), null>() )) } } @@ -297,6 +300,11 @@ obj parser (Object) { fun fully_reduces_to_null(r: ref rule): bool { return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol()) } + fun get_nullable_parts(r: ref rule): *tree { + if (fully_reduces_to_null(r)) + return new>()->construct(null_symbol()) + return null>() + } } obj gss (Object) { @@ -388,12 +396,12 @@ obj reduction (Object) { var nullable_parts: *tree var label: *tree - fun construct(f: *tree, s: symbol, l: int, n: *tree, label:*tree): *reduction { + fun construct(f: *tree, s: symbol, l: int, n: *tree, labelIn:*tree): *reduction { from = f sym.copy_construct(&s) length = l nullable_parts = n - label = label + label = labelIn return this } fun copy_construct(old: *reduction) { @@ -415,11 +423,18 @@ obj reduction (Object) { fun syntax_tree_to_dot(root: *tree): string { var ret = string("digraph Kaken {\n") var helper: fun(*tree):void = fun(node: *tree) { - ret += node->data.to_string() + ";;;;\n"; + /*ret += node->data.to_string() + ";;;;\n";*/ node->children.for_each(fun(child: *tree) { if (!child) return; // where on earth does the null come from - ret += node->data.to_string() + " -> " + child->data.to_string() + "\n"; + var escaped_child = string("") + child->data.to_string().data.for_each(fun(c: char) { + if (c != '"') + escaped_child += c + else + escaped_child += "\\\"" + }) + ret += string("\"") + node->data.to_string() + "\" -> \"" + escaped_child + "\"\n"; helper(child) }) } diff --git a/stdlib/string.krak b/stdlib/string.krak index dee6745..5102ac8 100644 --- a/stdlib/string.krak +++ b/stdlib/string.krak @@ -6,6 +6,11 @@ fun string(in:*char):string { var out.construct(in):string return out } +fun string(in:char):string { + var out.construct():string + out += in + return out +} obj string (Object) { var data: vector::vector; diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index 7138b7b..36eabbd 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -9,9 +9,9 @@ import tree:* fun main():int { - /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/ + var a = load_grammer(read_file(string("../krakenGrammer.kgm"))) /*var a = load_grammer(read_file(string("grammer.kgm")))*/ - var a = load_grammer(read_file(string("grammer2.kgm"))) + /*var a = load_grammer(read_file(string("grammer2.kgm")))*/ /*var a = load_grammer(read_file(string("grammer3.kgm")))*/ println(a.to_string()) var doFirstSet = fun() { @@ -52,10 +52,14 @@ fun main():int { println(a.to_string()) a.calculate_state_automaton() var parse.construct(a): parser - var result = parse.parse_input(string("ad"), string("fun name")) + var result = parse.parse_input(string("fun main():int { return 0; }"), string("fun name")) + /*var result = parse.parse_input(string("ad"), string("fun name"))*/ + /*var result = parse.parse_input(string("hibyed"), string("fun name"))*/ + /*var result = parse.parse_input(string("hmmhmm"), string("fun name"))*/ /*var result = parse.parse_input(string("hid"), string("fun name"))*/ println("the tree") println(syntax_tree_to_dot(result)) + write_file(string("syntax_tree.dot"), syntax_tree_to_dot(result)) /*var parse.construct(): parser*/ return 0 }