Files
kraken/stdlib/grammer.krak

157 lines
4.6 KiB
Plaintext

import string
import vector
import set
import symbol
import regex
import io
fun split_into_words(gram_str: string::string): vector::vector<string::string> {
var out.construct(): vector::vector<string>
var begin = 0
for (var i = 0; i < gram_str.length(); i++;) {
if (gram_str[i] == '#') {
while(gram_str[i] != '\n') i++
i++
io::print("comment: "); io::print(gram_str.slice(begin, i))
begin = i
}
if (gram_str[i] == '"') {
i++
while (gram_str[i] != '"') {
i++
// if we hit a " we check to see if an odd number of backslashes preceed it
// (meaning that the " is escaped), and if so, we move on. Otherwise, we found
// the end of the quoted string
if (gram_str[i] == '"') {
var escaped = 0
while (gram_str[i-(1+escaped)] == '\\') escaped++
if (escaped % 2)
i++
}
}
}
if (gram_str[i] == ' ') {
out.add(gram_str.slice(begin, i))
// allow multiple spaces between words
while (gram_str[i] == ' ') i++
begin = i
i--
}
if (gram_str[i] == '\n') {
if (i != begin)
out.add(gram_str.slice(begin, i))
begin = i + 1
}
}
return out
}
fun load_grammer(gram_str: string::string): grammer {
var gram.construct(): grammer
var leftSide = symbol::symbol("", false)
var doLeftSide = true
var rightSide = vector::vector<symbol::symbol>()
/*split_into_words(io::read_file(path)).for_each(fun(word: string::string) {*/
/*io::print("word: "); io::println(word);*/
/*})*/
/*return gram*/
split_into_words(gram_str).for_each(fun(word: string::string) {
if (word == "=") {
// do nothing
} else if (word == "|") {
gram.rules.add(rule(leftSide, rightSide))
rightSide = vector::vector<symbol::symbol>()
} else if (word == ";") {
gram.rules.add(rule(leftSide, rightSide))
rightSide = vector::vector<symbol::symbol>()
doLeftSide = true
} else {
if (doLeftSide)
leftSide = symbol::symbol(word, true)
else
if (word[0] == '"') {
rightSide.add(symbol::symbol(word.slice(1,-2), true))
/*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/
gram.regexs.add(regex::regex(word.slice(1,-2)))
} else {
rightSide.add(symbol::symbol(word, false))
}
doLeftSide = false
}
})
return gram
}
obj grammer (Object) {
var rules: vector::vector<rule>
var regexs: vector::vector<regex::regex>
fun construct(): *grammer {
rules.construct()
regexs.construct()
}
fun copy_construct(old: *grammer) {
rules.copy_construct(&old->rules)
regexs.copy_construct(&old->regexs)
}
fun operator=(other: grammer) {
destruct()
copy_construct(&other)
}
fun destruct() {
rules.destruct()
regexs.destruct()
}
fun to_string(): string::string {
var result = string::string("grammer rules:")
rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
result += "\nregexs:"
regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } )
return result
}
}
fun rule(lhs: symbol::symbol, rhs: vector::vector<symbol::symbol>): rule {
var toRet.construct(): rule
toRet.lhs = lhs
toRet.rhs = rhs
return toRet
}
obj rule (Object) {
var lhs: symbol::symbol
var rhs: vector::vector<symbol::symbol>
var position: int
var lookahead: set::set<symbol::symbol>
fun construct(): *rule {
lhs.construct()
rhs.construct()
position = 0
lookahead.construct()
}
fun copy_construct(other: *rule) {
lhs.copy_construct(&other->lhs)
rhs.copy_construct(&other->rhs)
position = other->position
lookahead.copy_construct(&other->lookahead)
}
fun operator=(other: rule) {
destruct()
copy_construct(&other)
}
fun destruct() {
lhs.destruct()
rhs.destruct()
lookahead.destruct()
}
fun to_string(): string::string {
var result = lhs.name + " -> "
rhs.for_each( fun(i : symbol::symbol) { result += i.to_string() + ", "; } )
return result
}
}