Starting to really parse! Fixed the bad syntax trees (reduction copy_construct wasn't actually copying label because param and dest had same name) and fixed the krakenGrammer and CGenerator to allow escaped quotes in regular strings. add_children needs to be finished so the resulting trees don't go nuts with ambiguity, and some situations still cause vector access errors, which is odd. Also added the crazy simple write_file to io and the test_grammer.krak now outputs a syntax_tree.dot

This commit is contained in:
Nathan Braswell
2015-08-11 01:07:16 -04:00
parent 2777ca10f1
commit 58ab3e311f
6 changed files with 66 additions and 26 deletions

View File

@@ -134,7 +134,7 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric |
numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ;
string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ; |z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| |(\\\"))*\"" ;
comment = cpp_comment | c_comment ; comment = cpp_comment | c_comment ;
cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )* cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*
" ; " ;

View File

@@ -796,11 +796,14 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
} }
case value: case value:
{ {
// ok, we now check for it being a string and escape all returns if it is (so that multiline strings work) // ok, we now check for it being a multiline string and escape all returns if it is (so that multiline strings work)
if (data.symbol.getName()[0] == '"') { //if (data.symbol.getName()[0] == '"') {
std::string innerString = strSlice(data.symbol.getName(), 0, 3) == "\"\"\"" if (data.symbol.getName()[0] == '"' && strSlice(data.symbol.getName(), 0, 3) == "\"\"\"") {
? strSlice(data.symbol.getName(), 3, -4) //bool multiline_str = strSlice(data.symbol.getName(), 0, 3) == "\"\"\"";
: strSlice(data.symbol.getName(), 1, -2); //std::string innerString = multiline_str
//? strSlice(data.symbol.getName(), 3, -4)
//: strSlice(data.symbol.getName(), 1, -2);
std::string innerString = strSlice(data.symbol.getName(), 3, -4);
std::string newStr; std::string newStr;
for (auto character: innerString) for (auto character: innerString)
if (character == '\n') if (character == '\n')

View File

@@ -98,4 +98,17 @@ fun read_file(path: string::string): string::string {
} }
return toRet return toRet
} }
fun write_file(path: string::string, data: string::string) {
var char_path = path.toCharArray()
defer delete(char_path)
var char_data = data.toCharArray()
defer delete(char_data)
__if_comp__ __C__ {
simple_passthrough(char_path,char_data::) """
FILE *fp = fopen(char_path, "w");
fprintf(fp, "%s", char_data);
fclose(fp);
"""
}
}

View File

@@ -89,8 +89,6 @@ obj parser (Object) {
var null_symbol_tree = null<tree<symbol>>() var null_symbol_tree = null<tree<symbol>>()
/*println("looking up")*/
/*println(input[0].to_string())*/
gram.parse_table.get(0, input[0]).for_each(fun(act: action) { gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
println("for each action") println("for each action")
act.print() act.print()
@@ -100,7 +98,6 @@ obj parser (Object) {
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree)) to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
}) })
for (var i = 0; i < input.size; i++;) { for (var i = 0; i < input.size; i++;) {
if (gss.frontier_is_empty(i)) { if (gss.frontier_is_empty(i)) {
print(i) print(i)
@@ -145,8 +142,13 @@ obj parser (Object) {
}) })
println() println()
println("got path edges") println("got path edges")
if (curr_reduction.length != 0) println("there are this many:")
println(path_edges.size)
if (curr_reduction.length != 0) {
path_edges.addEnd(curr_reduction.label) path_edges.addEnd(curr_reduction.label)
println("also adding the one from the reduction")
println(curr_reduction.label->data.to_string())
}
var curr_reached = path.last() var curr_reached = path.last()
print("checking shift for state ") print("checking shift for state ")
print(curr_reached->data) print(curr_reached->data)
@@ -155,7 +157,8 @@ obj parser (Object) {
// if this is the Goal = a type reduction, then skip the actual reduction part. // if this is the Goal = a type reduction, then skip the actual reduction part.
// the shift lookup will fail, and likely other things, and this is our accept // the shift lookup will fail, and likely other things, and this is our accept
// criteria anyway // criteria anyway
if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs) /*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/
if (curr_reduction.sym == gram.rules[0].lhs)
return; return;
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
println("got shift to") println("got shift to")
@@ -187,7 +190,7 @@ obj parser (Object) {
if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
to_reduce.push(reduction(curr_reached, reduce_rule.lhs, to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
act.rule_position, act.rule_position,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(reduce_rule),
new_label)) new_label))
}) })
} }
@@ -203,12 +206,12 @@ obj parser (Object) {
var action_rule = gram.rules[act.state_or_rule] var action_rule = gram.rules[act.state_or_rule]
if (fully_reduces_to_null(action_rule)) { if (fully_reduces_to_null(action_rule)) {
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(action_rule),
null<tree<symbol>>() )) null<tree<symbol>>() ))
} else if (curr_reduction.length != 0) { } else if (curr_reduction.length != 0) {
to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position, to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(action_rule),
new_label )) new_label ))
} }
} }
}) })
@@ -239,7 +242,7 @@ obj parser (Object) {
var reduce_rule = gram.rules[action.state_or_rule] var reduce_rule = gram.rules[action.state_or_rule]
if (!fully_reduces_to_null(reduce_rule)) { if (!fully_reduces_to_null(reduce_rule)) {
to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position, to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(reduce_rule),
new_label )) new_label ))
} }
}) })
@@ -262,12 +265,12 @@ obj parser (Object) {
if (!fully_reduces_to_null(action_rule)) { if (!fully_reduces_to_null(action_rule)) {
println("does not reduce to null") println("does not reduce to null")
to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position, to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(action_rule),
new_label )) new_label ))
} else { } else {
println("does reduce to null") println("does reduce to null")
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
new<tree<symbol>>()->construct(null_symbol()), get_nullable_parts(action_rule),
null<tree<symbol>>() )) null<tree<symbol>>() ))
} }
} }
@@ -297,6 +300,11 @@ obj parser (Object) {
fun fully_reduces_to_null(r: ref rule): bool { fun fully_reduces_to_null(r: ref rule): bool {
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol()) return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
} }
fun get_nullable_parts(r: ref rule): *tree<symbol> {
if (fully_reduces_to_null(r))
return new<tree<symbol>>()->construct(null_symbol())
return null<tree<symbol>>()
}
} }
obj gss (Object) { obj gss (Object) {
@@ -388,12 +396,12 @@ obj reduction (Object) {
var nullable_parts: *tree<symbol> var nullable_parts: *tree<symbol>
var label: *tree<symbol> var label: *tree<symbol>
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): *reduction { fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
from = f from = f
sym.copy_construct(&s) sym.copy_construct(&s)
length = l length = l
nullable_parts = n nullable_parts = n
label = label label = labelIn
return this return this
} }
fun copy_construct(old: *reduction) { fun copy_construct(old: *reduction) {
@@ -415,11 +423,18 @@ obj reduction (Object) {
fun syntax_tree_to_dot(root: *tree<symbol>): string { fun syntax_tree_to_dot(root: *tree<symbol>): string {
var ret = string("digraph Kaken {\n") var ret = string("digraph Kaken {\n")
var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) { var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
ret += node->data.to_string() + ";;;;\n"; /*ret += node->data.to_string() + ";;;;\n";*/
node->children.for_each(fun(child: *tree<symbol>) { node->children.for_each(fun(child: *tree<symbol>) {
if (!child) if (!child)
return; // where on earth does the null come from return; // where on earth does the null come from
ret += node->data.to_string() + " -> " + child->data.to_string() + "\n"; var escaped_child = string("")
child->data.to_string().data.for_each(fun(c: char) {
if (c != '"')
escaped_child += c
else
escaped_child += "\\\""
})
ret += string("\"") + node->data.to_string() + "\" -> \"" + escaped_child + "\"\n";
helper(child) helper(child)
}) })
} }

View File

@@ -6,6 +6,11 @@ fun string(in:*char):string {
var out.construct(in):string var out.construct(in):string
return out return out
} }
fun string(in:char):string {
var out.construct():string
out += in
return out
}
obj string (Object) { obj string (Object) {
var data: vector::vector<char>; var data: vector::vector<char>;

View File

@@ -9,9 +9,9 @@ import tree:*
fun main():int { fun main():int {
/*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/ var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
/*var a = load_grammer(read_file(string("grammer.kgm")))*/ /*var a = load_grammer(read_file(string("grammer.kgm")))*/
var a = load_grammer(read_file(string("grammer2.kgm"))) /*var a = load_grammer(read_file(string("grammer2.kgm")))*/
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/ /*var a = load_grammer(read_file(string("grammer3.kgm")))*/
println(a.to_string()) println(a.to_string())
var doFirstSet = fun() { var doFirstSet = fun() {
@@ -52,10 +52,14 @@ fun main():int {
println(a.to_string()) println(a.to_string())
a.calculate_state_automaton() a.calculate_state_automaton()
var parse.construct(a): parser var parse.construct(a): parser
var result = parse.parse_input(string("ad"), string("fun name")) var result = parse.parse_input(string("fun main():int { return 0; }"), string("fun name"))
/*var result = parse.parse_input(string("ad"), string("fun name"))*/
/*var result = parse.parse_input(string("hibyed"), string("fun name"))*/
/*var result = parse.parse_input(string("hmmhmm"), string("fun name"))*/
/*var result = parse.parse_input(string("hid"), string("fun name"))*/ /*var result = parse.parse_input(string("hid"), string("fun name"))*/
println("the tree") println("the tree")
println(syntax_tree_to_dot(result)) println(syntax_tree_to_dot(result))
write_file(string("syntax_tree.dot"), syntax_tree_to_dot(result))
/*var parse.construct(): parser*/ /*var parse.construct(): parser*/
return 0 return 0
} }