Starting to really parse! Fixed the bad syntax trees (reduction copy_construct wasn't actually copying label because param and dest had same name) and fixed the krakenGrammer and CGenerator to allow escaped quotes in regular strings. add_children needs to be finished so the resulting trees don't go nuts with ambiguity, and some situations still cause vector access errors, which is odd. Also added the crazy simple write_file to io and the test_grammer.krak now outputs a syntax_tree.dot
This commit is contained in:
@@ -134,7 +134,7 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric |
|
|||||||
|
|
||||||
numeric = "(0|1|2|3|4|5|6|7|8|9)+" ;
|
numeric = "(0|1|2|3|4|5|6|7|8|9)+" ;
|
||||||
string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|
string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|
||||||
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ;
|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| |(\\\"))*\"" ;
|
||||||
comment = cpp_comment | c_comment ;
|
comment = cpp_comment | c_comment ;
|
||||||
cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*
|
cpp_comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*
|
||||||
" ;
|
" ;
|
||||||
|
|||||||
@@ -796,11 +796,14 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
|
|||||||
}
|
}
|
||||||
case value:
|
case value:
|
||||||
{
|
{
|
||||||
// ok, we now check for it being a string and escape all returns if it is (so that multiline strings work)
|
// ok, we now check for it being a multiline string and escape all returns if it is (so that multiline strings work)
|
||||||
if (data.symbol.getName()[0] == '"') {
|
//if (data.symbol.getName()[0] == '"') {
|
||||||
std::string innerString = strSlice(data.symbol.getName(), 0, 3) == "\"\"\""
|
if (data.symbol.getName()[0] == '"' && strSlice(data.symbol.getName(), 0, 3) == "\"\"\"") {
|
||||||
? strSlice(data.symbol.getName(), 3, -4)
|
//bool multiline_str = strSlice(data.symbol.getName(), 0, 3) == "\"\"\"";
|
||||||
: strSlice(data.symbol.getName(), 1, -2);
|
//std::string innerString = multiline_str
|
||||||
|
//? strSlice(data.symbol.getName(), 3, -4)
|
||||||
|
//: strSlice(data.symbol.getName(), 1, -2);
|
||||||
|
std::string innerString = strSlice(data.symbol.getName(), 3, -4);
|
||||||
std::string newStr;
|
std::string newStr;
|
||||||
for (auto character: innerString)
|
for (auto character: innerString)
|
||||||
if (character == '\n')
|
if (character == '\n')
|
||||||
|
|||||||
@@ -98,4 +98,17 @@ fun read_file(path: string::string): string::string {
|
|||||||
}
|
}
|
||||||
return toRet
|
return toRet
|
||||||
}
|
}
|
||||||
|
fun write_file(path: string::string, data: string::string) {
|
||||||
|
var char_path = path.toCharArray()
|
||||||
|
defer delete(char_path)
|
||||||
|
var char_data = data.toCharArray()
|
||||||
|
defer delete(char_data)
|
||||||
|
__if_comp__ __C__ {
|
||||||
|
simple_passthrough(char_path,char_data::) """
|
||||||
|
FILE *fp = fopen(char_path, "w");
|
||||||
|
fprintf(fp, "%s", char_data);
|
||||||
|
fclose(fp);
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -89,8 +89,6 @@ obj parser (Object) {
|
|||||||
|
|
||||||
var null_symbol_tree = null<tree<symbol>>()
|
var null_symbol_tree = null<tree<symbol>>()
|
||||||
|
|
||||||
/*println("looking up")*/
|
|
||||||
/*println(input[0].to_string())*/
|
|
||||||
gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
|
gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
|
||||||
println("for each action")
|
println("for each action")
|
||||||
act.print()
|
act.print()
|
||||||
@@ -100,7 +98,6 @@ obj parser (Object) {
|
|||||||
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
|
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
for (var i = 0; i < input.size; i++;) {
|
for (var i = 0; i < input.size; i++;) {
|
||||||
if (gss.frontier_is_empty(i)) {
|
if (gss.frontier_is_empty(i)) {
|
||||||
print(i)
|
print(i)
|
||||||
@@ -145,8 +142,13 @@ obj parser (Object) {
|
|||||||
})
|
})
|
||||||
println()
|
println()
|
||||||
println("got path edges")
|
println("got path edges")
|
||||||
if (curr_reduction.length != 0)
|
println("there are this many:")
|
||||||
|
println(path_edges.size)
|
||||||
|
if (curr_reduction.length != 0) {
|
||||||
path_edges.addEnd(curr_reduction.label)
|
path_edges.addEnd(curr_reduction.label)
|
||||||
|
println("also adding the one from the reduction")
|
||||||
|
println(curr_reduction.label->data.to_string())
|
||||||
|
}
|
||||||
var curr_reached = path.last()
|
var curr_reached = path.last()
|
||||||
print("checking shift for state ")
|
print("checking shift for state ")
|
||||||
print(curr_reached->data)
|
print(curr_reached->data)
|
||||||
@@ -155,7 +157,8 @@ obj parser (Object) {
|
|||||||
// if this is the Goal = a type reduction, then skip the actual reduction part.
|
// if this is the Goal = a type reduction, then skip the actual reduction part.
|
||||||
// the shift lookup will fail, and likely other things, and this is our accept
|
// the shift lookup will fail, and likely other things, and this is our accept
|
||||||
// criteria anyway
|
// criteria anyway
|
||||||
if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)
|
/*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/
|
||||||
|
if (curr_reduction.sym == gram.rules[0].lhs)
|
||||||
return;
|
return;
|
||||||
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
|
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
|
||||||
println("got shift to")
|
println("got shift to")
|
||||||
@@ -187,7 +190,7 @@ obj parser (Object) {
|
|||||||
if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
|
if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
|
||||||
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
|
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
|
||||||
act.rule_position,
|
act.rule_position,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(reduce_rule),
|
||||||
new_label))
|
new_label))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -203,12 +206,12 @@ obj parser (Object) {
|
|||||||
var action_rule = gram.rules[act.state_or_rule]
|
var action_rule = gram.rules[act.state_or_rule]
|
||||||
if (fully_reduces_to_null(action_rule)) {
|
if (fully_reduces_to_null(action_rule)) {
|
||||||
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(action_rule),
|
||||||
null<tree<symbol>>() ))
|
null<tree<symbol>>() ))
|
||||||
} else if (curr_reduction.length != 0) {
|
} else if (curr_reduction.length != 0) {
|
||||||
to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
|
to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(action_rule),
|
||||||
new_label ))
|
new_label ))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -239,7 +242,7 @@ obj parser (Object) {
|
|||||||
var reduce_rule = gram.rules[action.state_or_rule]
|
var reduce_rule = gram.rules[action.state_or_rule]
|
||||||
if (!fully_reduces_to_null(reduce_rule)) {
|
if (!fully_reduces_to_null(reduce_rule)) {
|
||||||
to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
|
to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(reduce_rule),
|
||||||
new_label ))
|
new_label ))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -262,12 +265,12 @@ obj parser (Object) {
|
|||||||
if (!fully_reduces_to_null(action_rule)) {
|
if (!fully_reduces_to_null(action_rule)) {
|
||||||
println("does not reduce to null")
|
println("does not reduce to null")
|
||||||
to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
|
to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(action_rule),
|
||||||
new_label ))
|
new_label ))
|
||||||
} else {
|
} else {
|
||||||
println("does reduce to null")
|
println("does reduce to null")
|
||||||
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
||||||
new<tree<symbol>>()->construct(null_symbol()),
|
get_nullable_parts(action_rule),
|
||||||
null<tree<symbol>>() ))
|
null<tree<symbol>>() ))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -297,6 +300,11 @@ obj parser (Object) {
|
|||||||
fun fully_reduces_to_null(r: ref rule): bool {
|
fun fully_reduces_to_null(r: ref rule): bool {
|
||||||
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
|
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
|
||||||
}
|
}
|
||||||
|
fun get_nullable_parts(r: ref rule): *tree<symbol> {
|
||||||
|
if (fully_reduces_to_null(r))
|
||||||
|
return new<tree<symbol>>()->construct(null_symbol())
|
||||||
|
return null<tree<symbol>>()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
obj gss (Object) {
|
obj gss (Object) {
|
||||||
@@ -388,12 +396,12 @@ obj reduction (Object) {
|
|||||||
var nullable_parts: *tree<symbol>
|
var nullable_parts: *tree<symbol>
|
||||||
var label: *tree<symbol>
|
var label: *tree<symbol>
|
||||||
|
|
||||||
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): *reduction {
|
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
|
||||||
from = f
|
from = f
|
||||||
sym.copy_construct(&s)
|
sym.copy_construct(&s)
|
||||||
length = l
|
length = l
|
||||||
nullable_parts = n
|
nullable_parts = n
|
||||||
label = label
|
label = labelIn
|
||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
fun copy_construct(old: *reduction) {
|
fun copy_construct(old: *reduction) {
|
||||||
@@ -415,11 +423,18 @@ obj reduction (Object) {
|
|||||||
fun syntax_tree_to_dot(root: *tree<symbol>): string {
|
fun syntax_tree_to_dot(root: *tree<symbol>): string {
|
||||||
var ret = string("digraph Kaken {\n")
|
var ret = string("digraph Kaken {\n")
|
||||||
var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
|
var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
|
||||||
ret += node->data.to_string() + ";;;;\n";
|
/*ret += node->data.to_string() + ";;;;\n";*/
|
||||||
node->children.for_each(fun(child: *tree<symbol>) {
|
node->children.for_each(fun(child: *tree<symbol>) {
|
||||||
if (!child)
|
if (!child)
|
||||||
return; // where on earth does the null come from
|
return; // where on earth does the null come from
|
||||||
ret += node->data.to_string() + " -> " + child->data.to_string() + "\n";
|
var escaped_child = string("")
|
||||||
|
child->data.to_string().data.for_each(fun(c: char) {
|
||||||
|
if (c != '"')
|
||||||
|
escaped_child += c
|
||||||
|
else
|
||||||
|
escaped_child += "\\\""
|
||||||
|
})
|
||||||
|
ret += string("\"") + node->data.to_string() + "\" -> \"" + escaped_child + "\"\n";
|
||||||
helper(child)
|
helper(child)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,11 @@ fun string(in:*char):string {
|
|||||||
var out.construct(in):string
|
var out.construct(in):string
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
fun string(in:char):string {
|
||||||
|
var out.construct():string
|
||||||
|
out += in
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
obj string (Object) {
|
obj string (Object) {
|
||||||
var data: vector::vector<char>;
|
var data: vector::vector<char>;
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ import tree:*
|
|||||||
|
|
||||||
fun main():int {
|
fun main():int {
|
||||||
|
|
||||||
/*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
|
var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
|
||||||
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
|
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
|
||||||
var a = load_grammer(read_file(string("grammer2.kgm")))
|
/*var a = load_grammer(read_file(string("grammer2.kgm")))*/
|
||||||
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/
|
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/
|
||||||
println(a.to_string())
|
println(a.to_string())
|
||||||
var doFirstSet = fun() {
|
var doFirstSet = fun() {
|
||||||
@@ -52,10 +52,14 @@ fun main():int {
|
|||||||
println(a.to_string())
|
println(a.to_string())
|
||||||
a.calculate_state_automaton()
|
a.calculate_state_automaton()
|
||||||
var parse.construct(a): parser
|
var parse.construct(a): parser
|
||||||
var result = parse.parse_input(string("ad"), string("fun name"))
|
var result = parse.parse_input(string("fun main():int { return 0; }"), string("fun name"))
|
||||||
|
/*var result = parse.parse_input(string("ad"), string("fun name"))*/
|
||||||
|
/*var result = parse.parse_input(string("hibyed"), string("fun name"))*/
|
||||||
|
/*var result = parse.parse_input(string("hmmhmm"), string("fun name"))*/
|
||||||
/*var result = parse.parse_input(string("hid"), string("fun name"))*/
|
/*var result = parse.parse_input(string("hid"), string("fun name"))*/
|
||||||
println("the tree")
|
println("the tree")
|
||||||
println(syntax_tree_to_dot(result))
|
println(syntax_tree_to_dot(result))
|
||||||
|
write_file(string("syntax_tree.dot"), syntax_tree_to_dot(result))
|
||||||
/*var parse.construct(): parser*/
|
/*var parse.construct(): parser*/
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user