Starting to really parse! Fixed the bad syntax trees (reduction copy_construct wasn't actually copying label because param and dest had same name) and fixed the krakenGrammer and CGenerator to allow escaped quotes in regular strings. add_children needs to be finished so the resulting trees don't go nuts with ambiguity, and some situations still cause vector access errors, which is odd. Also added the crazy simple write_file to io and the test_grammer.krak now outputs a syntax_tree.dot

This commit is contained in:
Nathan Braswell
2015-08-11 01:07:16 -04:00
parent 2777ca10f1
commit 58ab3e311f
6 changed files with 66 additions and 26 deletions

View File

@@ -134,7 +134,7 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric |
numeric = "(0|1|2|3|4|5|6|7|8|9)+" ;
string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ;
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| |(\\\"))*\"" ;
comment = cpp_comment | c_comment ;
cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*
" ;

View File

@@ -796,11 +796,14 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
}
case value:
{
// ok, we now check for it being a string and escape all returns if it is (so that multiline strings work)
if (data.symbol.getName()[0] == '"') {
std::string innerString = strSlice(data.symbol.getName(), 0, 3) == "\"\"\""
? strSlice(data.symbol.getName(), 3, -4)
: strSlice(data.symbol.getName(), 1, -2);
// ok, we now check for it being a multiline string and escape all returns if it is (so that multiline strings work)
//if (data.symbol.getName()[0] == '"') {
if (data.symbol.getName()[0] == '"' && strSlice(data.symbol.getName(), 0, 3) == "\"\"\"") {
//bool multiline_str = strSlice(data.symbol.getName(), 0, 3) == "\"\"\"";
//std::string innerString = multiline_str
//? strSlice(data.symbol.getName(), 3, -4)
//: strSlice(data.symbol.getName(), 1, -2);
std::string innerString = strSlice(data.symbol.getName(), 3, -4);
std::string newStr;
for (auto character: innerString)
if (character == '\n')

View File

@@ -98,4 +98,17 @@ fun read_file(path: string::string): string::string {
}
return toRet
}
fun write_file(path: string::string, data: string::string) {
var char_path = path.toCharArray()
defer delete(char_path)
var char_data = data.toCharArray()
defer delete(char_data)
__if_comp__ __C__ {
simple_passthrough(char_path,char_data::) """
FILE *fp = fopen(char_path, "w");
fprintf(fp, "%s", char_data);
fclose(fp);
"""
}
}

View File

@@ -89,8 +89,6 @@ obj parser (Object) {
var null_symbol_tree = null<tree<symbol>>()
/*println("looking up")*/
/*println(input[0].to_string())*/
gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
println("for each action")
act.print()
@@ -100,7 +98,6 @@ obj parser (Object) {
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
})
for (var i = 0; i < input.size; i++;) {
if (gss.frontier_is_empty(i)) {
print(i)
@@ -145,8 +142,13 @@ obj parser (Object) {
})
println()
println("got path edges")
if (curr_reduction.length != 0)
println("there are this many:")
println(path_edges.size)
if (curr_reduction.length != 0) {
path_edges.addEnd(curr_reduction.label)
println("also adding the one from the reduction")
println(curr_reduction.label->data.to_string())
}
var curr_reached = path.last()
print("checking shift for state ")
print(curr_reached->data)
@@ -155,7 +157,8 @@ obj parser (Object) {
// if this is the Goal = a type reduction, then skip the actual reduction part.
// the shift lookup will fail, and likely other things, and this is our accept
// criteria anyway
if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)
/*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/
if (curr_reduction.sym == gram.rules[0].lhs)
return;
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
println("got shift to")
@@ -187,7 +190,7 @@ obj parser (Object) {
if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
act.rule_position,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(reduce_rule),
new_label))
})
}
@@ -203,11 +206,11 @@ obj parser (Object) {
var action_rule = gram.rules[act.state_or_rule]
if (fully_reduces_to_null(action_rule)) {
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(action_rule),
null<tree<symbol>>() ))
} else if (curr_reduction.length != 0) {
to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(action_rule),
new_label ))
}
}
@@ -239,7 +242,7 @@ obj parser (Object) {
var reduce_rule = gram.rules[action.state_or_rule]
if (!fully_reduces_to_null(reduce_rule)) {
to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(reduce_rule),
new_label ))
}
})
@@ -262,12 +265,12 @@ obj parser (Object) {
if (!fully_reduces_to_null(action_rule)) {
println("does not reduce to null")
to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(action_rule),
new_label ))
} else {
println("does reduce to null")
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
new<tree<symbol>>()->construct(null_symbol()),
get_nullable_parts(action_rule),
null<tree<symbol>>() ))
}
}
@@ -297,6 +300,11 @@ obj parser (Object) {
fun fully_reduces_to_null(r: ref rule): bool {
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
}
fun get_nullable_parts(r: ref rule): *tree<symbol> {
if (fully_reduces_to_null(r))
return new<tree<symbol>>()->construct(null_symbol())
return null<tree<symbol>>()
}
}
obj gss (Object) {
@@ -388,12 +396,12 @@ obj reduction (Object) {
var nullable_parts: *tree<symbol>
var label: *tree<symbol>
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): *reduction {
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
from = f
sym.copy_construct(&s)
length = l
nullable_parts = n
label = label
label = labelIn
return this
}
fun copy_construct(old: *reduction) {
@@ -415,11 +423,18 @@ obj reduction (Object) {
fun syntax_tree_to_dot(root: *tree<symbol>): string {
var ret = string("digraph Kaken {\n")
var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
ret += node->data.to_string() + ";;;;\n";
/*ret += node->data.to_string() + ";;;;\n";*/
node->children.for_each(fun(child: *tree<symbol>) {
if (!child)
return; // where on earth does the null come from
ret += node->data.to_string() + " -> " + child->data.to_string() + "\n";
var escaped_child = string("")
child->data.to_string().data.for_each(fun(c: char) {
if (c != '"')
escaped_child += c
else
escaped_child += "\\\""
})
ret += string("\"") + node->data.to_string() + "\" -> \"" + escaped_child + "\"\n";
helper(child)
})
}

View File

@@ -6,6 +6,11 @@ fun string(in:*char):string {
var out.construct(in):string
return out
}
fun string(in:char):string {
var out.construct():string
out += in
return out
}
obj string (Object) {
var data: vector::vector<char>;

View File

@@ -9,9 +9,9 @@ import tree:*
fun main():int {
/*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
var a = load_grammer(read_file(string("grammer2.kgm")))
/*var a = load_grammer(read_file(string("grammer2.kgm")))*/
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/
println(a.to_string())
var doFirstSet = fun() {
@@ -52,10 +52,14 @@ fun main():int {
println(a.to_string())
a.calculate_state_automaton()
var parse.construct(a): parser
var result = parse.parse_input(string("ad"), string("fun name"))
var result = parse.parse_input(string("fun main():int { return 0; }"), string("fun name"))
/*var result = parse.parse_input(string("ad"), string("fun name"))*/
/*var result = parse.parse_input(string("hibyed"), string("fun name"))*/
/*var result = parse.parse_input(string("hmmhmm"), string("fun name"))*/
/*var result = parse.parse_input(string("hid"), string("fun name"))*/
println("the tree")
println(syntax_tree_to_dot(result))
write_file(string("syntax_tree.dot"), syntax_tree_to_dot(result))
/*var parse.construct(): parser*/
return 0
}