more work
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -10,4 +10,5 @@ stats
|
||||
*krakout*
|
||||
kraklist.txt
|
||||
.*.un~
|
||||
RNGLR.pdf
|
||||
papers
|
||||
callgrind*
|
||||
|
||||
@@ -105,21 +105,21 @@ obj grammer (Object) {
|
||||
var non_terminals: set::set<symbol::symbol>
|
||||
var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>>
|
||||
var first_set_map: map::map<symbol::symbol, set::set<symbol::symbol>>
|
||||
var state_automata: state
|
||||
var parse_table: table
|
||||
|
||||
fun construct(): *grammer {
|
||||
rules.construct()
|
||||
non_terminals.construct()
|
||||
terminals.construct()
|
||||
first_set_map.construct()
|
||||
state_automata.construct()
|
||||
parse_table.construct()
|
||||
}
|
||||
fun copy_construct(old: *grammer) {
|
||||
rules.copy_construct(&old->rules)
|
||||
non_terminals.copy_construct(&old->non_terminals)
|
||||
terminals.copy_construct(&old->terminals)
|
||||
first_set_map.copy_construct(&old->first_set_map)
|
||||
state_automata.copy_construct(&old->state_automata)
|
||||
parse_table.copy_construct(&old->parse_table)
|
||||
}
|
||||
fun operator=(other: grammer) {
|
||||
destruct()
|
||||
@@ -130,7 +130,7 @@ obj grammer (Object) {
|
||||
non_terminals.destruct()
|
||||
terminals.destruct()
|
||||
first_set_map.destruct()
|
||||
state_automata.destruct()
|
||||
parse_table.destruct()
|
||||
}
|
||||
|
||||
fun calculate_first_set() {
|
||||
@@ -154,7 +154,7 @@ obj grammer (Object) {
|
||||
})
|
||||
}
|
||||
}
|
||||
fun first_vector(rhs: vector::vector<symbol::symbol>): set::set<symbol::symbol> {
|
||||
fun first_vector(rhs: ref vector::vector<symbol::symbol>): set::set<symbol::symbol> {
|
||||
var toRet = set::set<symbol::symbol>()
|
||||
if (rhs.size) {
|
||||
for (var i = 0; i < rhs.size; i++;) {
|
||||
@@ -176,11 +176,8 @@ obj grammer (Object) {
|
||||
}
|
||||
|
||||
fun calculate_state_automaton() {
|
||||
state_automata.items = vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol())))
|
||||
io::println("pre first closure")
|
||||
state_automata = closure(state_automata)
|
||||
io::println("post first closure")
|
||||
var states = vector::vector(state_automata) // vector instead of set because we need to iterate by index
|
||||
var first_state = closure(state(vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol())))))
|
||||
var states = vector::vector(first_state) // vector instead of set because we need to iterate by index
|
||||
var newItems = stack::stack(0) // 0 is the index of the first and only item in states
|
||||
var count = 0
|
||||
while (newItems.size()) {
|
||||
@@ -194,12 +191,25 @@ obj grammer (Object) {
|
||||
states[I].items.for_each(fun(r: ref rule) {
|
||||
if (!r.at_end())
|
||||
possGoto.add(r.next())
|
||||
// if r is at end or the rest reduces to null, add a reduce for each lookahead symbol
|
||||
if ( r.at_end() || first_vector(r.after_next()).contains(symbol::null_symbol()) ) {
|
||||
var rule_no = rules.find(r.plain())
|
||||
r.lookahead.for_each(fun(sym: ref symbol::symbol) {
|
||||
parse_table.add_reduce(I, sym, rule_no)
|
||||
})
|
||||
}
|
||||
})
|
||||
possGoto.for_each(fun(X: ref symbol::symbol) {
|
||||
var goneState = goto(states[I], X)
|
||||
if (goneState.items.size && !states.contains(goneState)) {
|
||||
newItems.push(states.size)
|
||||
states.add(goneState)
|
||||
if (goneState.items.size) {
|
||||
var already_state = states.find(goneState)
|
||||
if (already_state == -1) {
|
||||
parse_table.add_push(I, X, states.size)
|
||||
newItems.push(states.size)
|
||||
states.add(goneState)
|
||||
} else {
|
||||
parse_table.add_push(I, X, already_state)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -212,6 +222,9 @@ obj grammer (Object) {
|
||||
})
|
||||
io::println(" there were : states")
|
||||
io::println(states.size)
|
||||
io::println(" there were : table")
|
||||
/*io::println(parse_table.to_string())*/
|
||||
parse_table.print_string()
|
||||
}
|
||||
|
||||
fun closure(initial: ref state): state {
|
||||
@@ -297,8 +310,6 @@ obj grammer (Object) {
|
||||
non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } )
|
||||
result += "\nterminals:"
|
||||
terminals.for_each( fun(i : util::pair<symbol::symbol, regex::regex>) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } )
|
||||
result += "\nstate:"
|
||||
result += state_automata.to_string()
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -355,6 +366,9 @@ obj rule (Object) {
|
||||
fun at_end(): bool {
|
||||
return position >= rhs.size
|
||||
}
|
||||
fun plain(): rule {
|
||||
return rule(lhs, rhs)
|
||||
}
|
||||
fun with_lookahead(newLookahead: set::set<symbol::symbol>): rule {
|
||||
var toRet = rule(lhs, rhs)
|
||||
toRet.position = position
|
||||
@@ -418,3 +432,98 @@ obj state (Object) {
|
||||
}
|
||||
}
|
||||
|
||||
// REALLY need those enums
|
||||
var push = 0
|
||||
var reduce = 1
|
||||
// note that these two are not actually currently used
|
||||
// accept is the reduce of the goal rule and reject is the
|
||||
// absence of actions
|
||||
var accept = 2
|
||||
var reject = 3
|
||||
fun action(act: int, state_or_rule: int): action {
|
||||
var toRet: action
|
||||
toRet.act = act
|
||||
toRet.state_or_rule = state_or_rule
|
||||
return toRet
|
||||
}
|
||||
obj action {
|
||||
var act: int // really need those enums
|
||||
var state_or_rule: int // sigh
|
||||
fun operator==(other: action): bool {
|
||||
return act == other.act && state_or_rule == other.state_or_rule
|
||||
}
|
||||
}
|
||||
|
||||
obj table (Object) {
|
||||
// a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions
|
||||
var items: vector::vector<map::map<symbol::symbol, vector::vector<action>>>
|
||||
|
||||
fun construct(): *table {
|
||||
items.construct()
|
||||
}
|
||||
fun copy_construct(other: *table) {
|
||||
items.copy_construct(&other->items)
|
||||
}
|
||||
fun operator=(other: table) {
|
||||
destruct()
|
||||
copy_construct(&other)
|
||||
}
|
||||
fun destruct() {
|
||||
items.destruct()
|
||||
}
|
||||
fun expand_to(include_state: int) {
|
||||
while (include_state >= items.size)
|
||||
items.addEnd(map::map<symbol::symbol, vector::vector<action>>())
|
||||
}
|
||||
fun add_push(from_state: int, on_symbol: ref symbol::symbol, to_state: int) {
|
||||
expand_to(from_state)
|
||||
if (items[from_state].contains_key(on_symbol))
|
||||
items[from_state][on_symbol].addEnd(action(push, to_state))
|
||||
else
|
||||
items[from_state].set(on_symbol, vector::vector(action(push, to_state)))
|
||||
}
|
||||
fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) {
|
||||
expand_to(from_state)
|
||||
if (items[from_state].contains_key(on_symbol))
|
||||
items[from_state][on_symbol].addEnd(action(reduce, by_rule_no))
|
||||
else
|
||||
items[from_state].set(on_symbol, vector::vector(action(reduce, by_rule_no)))
|
||||
}
|
||||
fun add_accept(from_state: int, on_symbol: ref symbol::symbol) {
|
||||
expand_to(from_state)
|
||||
if (items[from_state].contains_key(on_symbol))
|
||||
items[from_state][on_symbol].addEnd(action(accept, 0))
|
||||
else
|
||||
items[from_state].set(on_symbol, vector::vector(action(accept, 0)))
|
||||
}
|
||||
fun get(state: int, sym: symbol::symbol): vector::vector<action> {
|
||||
return items[state][sym]
|
||||
}
|
||||
fun print_string(): string::string {
|
||||
/*return string::string("woo a table of size: ") + items.size*/
|
||||
io::print("woo a table of size: ")
|
||||
io::println(items.size)
|
||||
for (var i = 0; i < items.size; i++;) {
|
||||
io::print("for state: ")
|
||||
io::println(i)
|
||||
items[i].for_each(fun(sym: symbol::symbol, actions: vector::vector<action>) {
|
||||
actions.for_each(fun(action: action) {
|
||||
io::print("\ton symbol: ")
|
||||
io::print(sym.to_string())
|
||||
io::print(" do action: ")
|
||||
if (action.act == push)
|
||||
io::print("push ")
|
||||
else if (action.act == reduce)
|
||||
io::print("reduce ")
|
||||
else if (action.act == accept)
|
||||
io::print("accept ")
|
||||
else if (action.act == reject)
|
||||
io::print("reject ")
|
||||
io::print(action.state_or_rule)
|
||||
io::println()
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -58,6 +58,10 @@ obj map<T,U> (Object) {
|
||||
keys.remove(idx)
|
||||
values.remove(idx)
|
||||
}
|
||||
fun clear() {
|
||||
keys.clear()
|
||||
values.clear()
|
||||
}
|
||||
fun operator[](key: T): ref U {
|
||||
return get(key)
|
||||
}
|
||||
|
||||
107
stdlib/parser.krak
Normal file
107
stdlib/parser.krak
Normal file
@@ -0,0 +1,107 @@
|
||||
import grammer
|
||||
import symbol
|
||||
import tree
|
||||
import vector
|
||||
import stack
|
||||
import map
|
||||
import util
|
||||
import string
|
||||
import io
|
||||
import mem
|
||||
|
||||
obj parser (Object) {
|
||||
var input: vector::vector<symbol::symbol>
|
||||
var gram: grammer::grammer
|
||||
// gss
|
||||
var to_reduce: stack::stack<reduction>
|
||||
var to_shift: stack::stack< util::pair<*tree::tree<int>, int> >
|
||||
var SPPFStepNodes: vector::vector< util::pair<*tree::tree<symbol::symbol>, int> >
|
||||
var packed_map: map::map<*tree::tree<symbol::symbol>, bool>
|
||||
|
||||
fun construct(grammerIn: grammer::grammer): *parser {
|
||||
input.construct()
|
||||
gram.copy_construct(&grammerIn)
|
||||
to_reduce.construct()
|
||||
to_shift.construct()
|
||||
SPPFStepNodes.construct()
|
||||
packed_map.construct()
|
||||
return this
|
||||
}
|
||||
fun copy_construct(old: *parser) {
|
||||
input.copy_construct(&old->input)
|
||||
gram.copy_construct(&old->gram)
|
||||
to_reduce.copy_construct(&old->to_reduce)
|
||||
to_shift.copy_construct(&old->to_shift)
|
||||
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
|
||||
packed_map.copy_construct(&old->packed_map)
|
||||
}
|
||||
fun operator=(old: ref parser) {
|
||||
destruct()
|
||||
copy_construct(&old)
|
||||
}
|
||||
fun destruct() {
|
||||
input.destruct()
|
||||
gram.destruct()
|
||||
to_reduce.destruct()
|
||||
to_shift.destruct()
|
||||
SPPFStepNodes.destruct()
|
||||
packed_map.destruct()
|
||||
}
|
||||
|
||||
fun parse_input(inputStr: string::string, name: string::string): *tree::tree<symbol::symbol> {
|
||||
input.clear()
|
||||
// gss.clear
|
||||
to_reduce.clear()
|
||||
to_shift.clear()
|
||||
SPPFStepNodes.clear()
|
||||
packed_map.clear()
|
||||
|
||||
// if the zero state contains any reductions for state 0 and eof, then
|
||||
// it must be reducing to the goal state
|
||||
io::println("checking the bidness")
|
||||
if (inputStr == "" && gram.parse_table.get(0, symbol::eof_symbol()).contains(grammer::action(grammer::reduce, 0))) {
|
||||
io::println("Accept on no input for ")
|
||||
io::println(name)
|
||||
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
|
||||
}
|
||||
io::println("failed for ")
|
||||
io::println(name)
|
||||
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
|
||||
}
|
||||
}
|
||||
|
||||
fun reduction(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): reduction {
|
||||
var toRet.construct(f,s,l,n,label): reduction
|
||||
return toRet
|
||||
}
|
||||
|
||||
obj reduction (Object) {
|
||||
var from: *tree::tree<int>
|
||||
var sym: symbol::symbol
|
||||
var length: int
|
||||
var nullable_parts: *tree::tree<symbol::symbol>
|
||||
var label: *tree::tree<symbol::symbol>
|
||||
|
||||
fun construct(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): *reduction {
|
||||
from = f
|
||||
sym.copy_construct(&s)
|
||||
length = l
|
||||
nullable_parts = n
|
||||
label = label
|
||||
return this
|
||||
}
|
||||
fun copy_construct(old: *reduction) {
|
||||
from = old->from
|
||||
sym.copy_construct(&old->sym)
|
||||
length = old->length
|
||||
nullable_parts = old->nullable_parts
|
||||
label = old->label
|
||||
}
|
||||
fun operator=(other: reduction):void {
|
||||
destruct()
|
||||
copy_construct(&other)
|
||||
}
|
||||
fun destruct() {
|
||||
sym.destruct()
|
||||
}
|
||||
}
|
||||
@@ -56,12 +56,12 @@ obj set<T> (Object) {
|
||||
fun operator+=(items: set<T>) {
|
||||
add(items)
|
||||
}
|
||||
fun add(item: T) {
|
||||
fun add(item: ref T) {
|
||||
if (!contains(item))
|
||||
data.add(item)
|
||||
}
|
||||
fun add(items: set<T>) {
|
||||
items.for_each( fun(item: T) add(item); )
|
||||
fun add(items: ref set<T>) {
|
||||
items.for_each( fun(item: ref T) add(item); )
|
||||
}
|
||||
fun remove(item: T) {
|
||||
var idx = data.find(item)
|
||||
|
||||
@@ -34,6 +34,9 @@ obj stack<T> (Object) {
|
||||
data.remove(data.size-1)
|
||||
return toRet
|
||||
}
|
||||
fun clear() {
|
||||
data.clear()
|
||||
}
|
||||
fun top(): T {
|
||||
return data[data.size-1]
|
||||
}
|
||||
|
||||
28
stdlib/tree.krak
Normal file
28
stdlib/tree.krak
Normal file
@@ -0,0 +1,28 @@
|
||||
import mem
|
||||
import vector
|
||||
|
||||
obj tree<T> (Object) {
|
||||
var data: T
|
||||
var children: vector::vector<*tree<T>>
|
||||
fun construct(dataIn: T): *tree<T> {
|
||||
mem::maybe_copy_construct(&data, &dataIn)
|
||||
children.construct()
|
||||
return this
|
||||
}
|
||||
// Some of these don't really make much sense considering this tree is all about
|
||||
// heap allocated pointers. Best to have it for saftey, though
|
||||
fun copy_construct(old: *tree<T>) {
|
||||
mem::maybe_copy_construct(&data, &old->data)
|
||||
children.copy_construct(&old->children)
|
||||
}
|
||||
// ditto
|
||||
fun operator=(other: tree<T>):void {
|
||||
destruct()
|
||||
copy_construct(&other)
|
||||
}
|
||||
fun destruct() {
|
||||
mem::maybe_destruct(&data)
|
||||
children.destruct()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,6 +173,12 @@ obj vector<T> (Object) {
|
||||
size--
|
||||
}
|
||||
|
||||
fun clear() {
|
||||
for (var i = 0; i < size; i++;)
|
||||
maybe_destruct(&data[i])
|
||||
size = 0
|
||||
}
|
||||
|
||||
fun for_each(func: fun(ref T):void):void {
|
||||
for (var i = 0; i < size; i++;)
|
||||
func(data[i])
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
import io:*
|
||||
import grammer:*
|
||||
import parser:*
|
||||
import lexer:*
|
||||
import string:*
|
||||
import util:*
|
||||
import symbol:*
|
||||
import tree:*
|
||||
|
||||
fun main():int {
|
||||
|
||||
var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
|
||||
/*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
|
||||
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
|
||||
//var a = load_grammer(read_file(string("grammer2.kgm")))
|
||||
var a = load_grammer(read_file(string("grammer2.kgm")))
|
||||
println(a.to_string())
|
||||
var doFirstSet = fun() {
|
||||
a.calculate_first_set()
|
||||
@@ -48,6 +50,9 @@ fun main():int {
|
||||
/*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
|
||||
println(a.to_string())
|
||||
a.calculate_state_automaton()
|
||||
var parse.construct(a): parser
|
||||
var result = parse.parse_input(string(""), string("fun name"))
|
||||
/*var parse.construct(): parser*/
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user