import string import vector import set import map import symbol import regex import io import util fun split_into_words(gram_str: string::string): vector::vector { var out.construct(): vector::vector var begin = 0 for (var i = 0; i < gram_str.length(); i++;) { if (gram_str[i] == '#') { while(gram_str[i] != '\n') i++ i++ io::print("comment: "); io::print(gram_str.slice(begin, i)) begin = i } if (gram_str[i] == '"') { i++ while (gram_str[i] != '"') { i++ // if we hit a " we check to see if an odd number of backslashes preceed it // (meaning that the " is escaped), and if so, we move on. Otherwise, we found // the end of the quoted string if (gram_str[i] == '"') { var escaped = 0 while (gram_str[i-(1+escaped)] == '\\') escaped++ if (escaped % 2) i++ } } } if (gram_str[i] == ' ') { out.add(gram_str.slice(begin, i)) // allow multiple spaces between words while (gram_str[i] == ' ') i++ begin = i i-- } if (gram_str[i] == '\n') { if (i != begin) out.add(gram_str.slice(begin, i)) begin = i + 1 } } return out } fun load_grammer(gram_str: string::string): grammer { var gram.construct(): grammer var leftSide = symbol::symbol("", false) var doLeftSide = true var rightSide = vector::vector() /*split_into_words(io::read_file(path)).for_each(fun(word: string::string) {*/ /*io::print("word: "); io::println(word);*/ /*})*/ /*return gram*/ split_into_words(gram_str).for_each(fun(word: string::string) { io::print("word: "); io::println(word) if (word == "=") { // do nothing } else if (word == "|") { gram.rules.add(rule(leftSide, rightSide)) rightSide = vector::vector() } else if (word == ";") { gram.rules.add(rule(leftSide, rightSide)) rightSide = vector::vector() doLeftSide = true } else { if (doLeftSide) { leftSide = symbol::symbol(word, false) gram.non_terminals.add(leftSide) } else { if (word[0] == '"') { // ok, we support both plain terminals "hia*" // and decorated terminals "hia*":hi_with_as // so first check to find the ending " and see if it's // the end of the string var last_quote = word.length()-1 while(word[last_quote] != '"') last_quote-- if (last_quote != word.length()-1) { rightSide.add(symbol::symbol(word.slice(last_quote+2, -1), true)) gram.terminals.add(util::make_pair(symbol::symbol(word.slice(last_quote+2, -1), true), regex::regex(word.slice(1,last_quote)))) } else { rightSide.add(symbol::symbol(word, true)) gram.terminals.add(util::make_pair(symbol::symbol(word, true), regex::regex(word.slice(1,last_quote)))) } } else { var non_term = symbol::symbol(word, false) rightSide.add(non_term) gram.non_terminals.add(non_term) } } doLeftSide = false } }) return gram } obj grammer (Object) { var rules: vector::vector var non_terminals: set::set var terminals: vector::vector> var first_set_map: map::map> fun construct(): *grammer { rules.construct() non_terminals.construct() terminals.construct() first_set_map.construct() } fun copy_construct(old: *grammer) { rules.copy_construct(&old->rules) non_terminals.copy_construct(&old->non_terminals) terminals.copy_construct(&old->terminals) first_set_map.copy_construct(&old->first_set_map) } fun operator=(other: grammer) { destruct() copy_construct(&other) } fun destruct() { rules.destruct() non_terminals.destruct() terminals.destruct() first_set_map.destruct() } fun calculate_first_set() { // the first set of a terminal is itself terminals.for_each( fun(terminal: util::pair) first_set_map[terminal.first] = set::set(terminal.first) ) // start out the non-terminals as empty sets non_terminals.for_each( fun(non_terminal: symbol::symbol) first_set_map[non_terminal] = set::set() ) var first_helper = fun(rhs: vector::vector): set::set { var toRet = set::set() if (rhs.size) { for (var i = 0; i < rhs.size; i++;) { var lookahead = first_set_map[rhs[i]] if (lookahead.contains(symbol::null_symbol())) { // remove the null if this is not the last in the rule if (i != rhs.size-1) lookahead.remove(symbol::null_symbol()) toRet.add(lookahead) } else { toRet.add(lookahead) break } } } else { toRet.add(symbol::null_symbol()) } return toRet } var changed = true while (changed) { /*io::println("//////////current state of map/////////////")*/ first_set_map.keys.for_each(fun(sym: symbol::symbol) { /*io::print("for ")*/ /*io::println(sym.to_string())*/ /*io::println("map is:")*/ /*first_set_map[sym].for_each(fun(look: symbol::symbol) {*/ /*io::print("lookahead: "); io::println(look.to_string())*/ /*})*/ }) changed = false rules.for_each( fun(r: rule) { var rule_lookahead = first_helper(r.rhs) if (!changed) { /*io::println(r.to_string())*/ changed = !first_set_map[r.lhs].contains(rule_lookahead) /*io::print("changed: "); io::println(changed)*/ /*io::print("\tcurrent lookahead is sized:")*/ /*io::println(first_set_map[r.lhs].size())*/ /*io::println("\tcurrent lookahead is:")*/ /*first_set_map[r.lhs].for_each(fun(look: symbol::symbol) {*/ /*io::print("\t\tlookahead: "); io::println(look.to_string())*/ /*})*/ /*io::println()*/ /*io::print("\rule lookahead is sized:")*/ /*io::println(rule_lookahead.size())*/ /*io::println("\trule lookahead is:")*/ /*rule_lookahead.for_each(fun(look: symbol::symbol) {*/ /*io::print("\t\tlookahead: "); io::println(look.to_string())*/ /*})*/ } first_set_map[r.lhs].add(rule_lookahead) }) } } fun to_string(): string::string { var result = string::string("grammer rules:") rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } ) result += "\nnon_terminals:" non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } ) result += "\nterminals:" terminals.for_each( fun(i : util::pair) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } ) return result } } fun rule(lhs: symbol::symbol, rhs: vector::vector): rule { var toRet.construct(): rule toRet.lhs = lhs toRet.rhs = rhs return toRet } obj rule (Object) { var lhs: symbol::symbol var rhs: vector::vector var position: int var lookahead: set::set fun construct(): *rule { lhs.construct() rhs.construct() position = 0 lookahead.construct() } fun copy_construct(other: *rule) { lhs.copy_construct(&other->lhs) rhs.copy_construct(&other->rhs) position = other->position lookahead.copy_construct(&other->lookahead) } fun operator=(other: rule) { destruct() copy_construct(&other) } fun destruct() { lhs.destruct() rhs.destruct() lookahead.destruct() } fun to_string(): string::string { var result = lhs.name + " -> " rhs.for_each( fun(i : symbol::symbol) { result += i.to_string() + ", "; } ) return result } } obj state (Object) { var kernel: vector::vector var rest: vector::vector fun construct(): *state { kernel.construct() rest.construct() } fun copy_construct(other: *state) { kernel.copy_construct(&other->kernel) rest.copy_construct(&other->rest) } fun operator=(other: state) { destruct() copy_construct(&other) } fun destruct() { kernel.destruct() rest.destruct() } }