import string import vector import set import symbol import regex import io fun split_into_words(gram_str: string::string): vector::vector { var out.construct(): vector::vector var begin = 0 for (var i = 0; i < gram_str.length(); i++;) { if (gram_str[i] == '#') { while(gram_str[i] != '\n') i++ i++ io::print("comment: "); io::print(gram_str.slice(begin, i)) begin = i } if (gram_str[i] == '"') { i++ while (gram_str[i] != '"') { i++ // if we hit a " we check to see if an odd number of backslashes preceed it // (meaning that the " is escaped), and if so, we move on. Otherwise, we found // the end of the quoted string if (gram_str[i] == '"') { var escaped = 0 while (gram_str[i-(1+escaped)] == '\\') escaped++ if (escaped % 2) i++ } } } if (gram_str[i] == ' ') { out.add(gram_str.slice(begin, i)) // allow multiple spaces between words while (gram_str[i] == ' ') i++ begin = i i-- } if (gram_str[i] == '\n') { if (i != begin) out.add(gram_str.slice(begin, i)) begin = i + 1 } } return out } fun load_grammer(gram_str: string::string): grammer { var gram.construct(): grammer var leftSide = symbol::symbol("", false) var doLeftSide = true var rightSide = vector::vector() /*split_into_words(io::read_file(path)).for_each(fun(word: string::string) {*/ /*io::print("word: "); io::println(word);*/ /*})*/ /*return gram*/ split_into_words(gram_str).for_each(fun(word: string::string) { if (word == "=") { // do nothing } else if (word == "|") { gram.rules.add(rule(leftSide, rightSide)) rightSide = vector::vector() } else if (word == ";") { gram.rules.add(rule(leftSide, rightSide)) rightSide = vector::vector() doLeftSide = true } else { if (doLeftSide) leftSide = symbol::symbol(word, true) else if (word[0] == '"') { rightSide.add(symbol::symbol(word.slice(1,-2), true)) /*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/ gram.regexs.add(regex::regex(word.slice(1,-2))) } else { rightSide.add(symbol::symbol(word, false)) } doLeftSide = false } }) return gram } obj grammer (Object) { var rules: vector::vector var regexs: vector::vector fun construct(): *grammer { rules.construct() regexs.construct() } fun copy_construct(old: *grammer) { rules.copy_construct(&old->rules) regexs.copy_construct(&old->regexs) } fun operator=(other: grammer) { destruct() copy_construct(&other) } fun destruct() { rules.destruct() regexs.destruct() } fun to_string(): string::string { var result = string::string("grammer rules:") rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } ) result += "\nregexs:" regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } ) return result } } fun rule(lhs: symbol::symbol, rhs: vector::vector): rule { var toRet.construct(): rule toRet.lhs = lhs toRet.rhs = rhs return toRet } obj rule (Object) { var lhs: symbol::symbol var rhs: vector::vector var position: int var lookahead: set::set fun construct(): *rule { lhs.construct() rhs.construct() position = 0 lookahead.construct() } fun copy_construct(other: *rule) { lhs.copy_construct(&other->lhs) rhs.copy_construct(&other->rhs) position = other->position lookahead.copy_construct(&other->lookahead) } fun operator=(other: rule) { destruct() copy_construct(&other) } fun destruct() { lhs.destruct() rhs.destruct() lookahead.destruct() } fun to_string(): string::string { var result = lhs.name + " -> " rhs.for_each( fun(i : symbol::symbol) { result += i.to_string() + ", "; } ) return result } }