2015-06-30 02:40:46 -04:00
|
|
|
import string
|
|
|
|
|
import vector
|
|
|
|
|
import set
|
|
|
|
|
import symbol
|
2015-07-03 18:34:46 -04:00
|
|
|
import regex
|
2015-07-04 03:21:36 -04:00
|
|
|
import io
|
2015-07-08 13:43:06 -04:00
|
|
|
import util
|
2015-07-04 03:21:36 -04:00
|
|
|
|
2015-07-06 12:49:29 -04:00
|
|
|
fun split_into_words(gram_str: string::string): vector::vector<string::string> {
|
2015-07-04 03:21:36 -04:00
|
|
|
var out.construct(): vector::vector<string>
|
|
|
|
|
var begin = 0
|
2015-07-06 12:49:29 -04:00
|
|
|
for (var i = 0; i < gram_str.length(); i++;) {
|
|
|
|
|
if (gram_str[i] == '#') {
|
|
|
|
|
while(gram_str[i] != '\n') i++
|
|
|
|
|
i++
|
|
|
|
|
io::print("comment: "); io::print(gram_str.slice(begin, i))
|
2015-07-04 03:21:36 -04:00
|
|
|
begin = i
|
|
|
|
|
}
|
2015-07-06 12:49:29 -04:00
|
|
|
if (gram_str[i] == '"') {
|
|
|
|
|
i++
|
|
|
|
|
while (gram_str[i] != '"') {
|
|
|
|
|
i++
|
|
|
|
|
// if we hit a " we check to see if an odd number of backslashes preceed it
|
|
|
|
|
// (meaning that the " is escaped), and if so, we move on. Otherwise, we found
|
|
|
|
|
// the end of the quoted string
|
|
|
|
|
if (gram_str[i] == '"') {
|
|
|
|
|
var escaped = 0
|
|
|
|
|
while (gram_str[i-(1+escaped)] == '\\') escaped++
|
|
|
|
|
if (escaped % 2)
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (gram_str[i] == ' ') {
|
|
|
|
|
out.add(gram_str.slice(begin, i))
|
|
|
|
|
// allow multiple spaces between words
|
|
|
|
|
while (gram_str[i] == ' ') i++
|
|
|
|
|
begin = i
|
|
|
|
|
i--
|
|
|
|
|
}
|
|
|
|
|
if (gram_str[i] == '\n') {
|
|
|
|
|
if (i != begin)
|
|
|
|
|
out.add(gram_str.slice(begin, i))
|
2015-07-04 03:21:36 -04:00
|
|
|
begin = i + 1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return out
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-06 13:48:19 -04:00
|
|
|
fun load_grammer(gram_str: string::string): grammer {
|
2015-07-04 03:21:36 -04:00
|
|
|
var gram.construct(): grammer
|
2015-07-06 12:49:29 -04:00
|
|
|
var leftSide = symbol::symbol("", false)
|
|
|
|
|
var doLeftSide = true
|
|
|
|
|
var rightSide = vector::vector<symbol::symbol>()
|
|
|
|
|
/*split_into_words(io::read_file(path)).for_each(fun(word: string::string) {*/
|
|
|
|
|
/*io::print("word: "); io::println(word);*/
|
|
|
|
|
/*})*/
|
|
|
|
|
/*return gram*/
|
2015-07-06 13:48:19 -04:00
|
|
|
split_into_words(gram_str).for_each(fun(word: string::string) {
|
2015-07-08 13:43:06 -04:00
|
|
|
io::print("word: "); io::println(word)
|
2015-07-06 12:49:29 -04:00
|
|
|
if (word == "=") {
|
|
|
|
|
// do nothing
|
|
|
|
|
} else if (word == "|") {
|
|
|
|
|
gram.rules.add(rule(leftSide, rightSide))
|
|
|
|
|
rightSide = vector::vector<symbol::symbol>()
|
|
|
|
|
} else if (word == ";") {
|
|
|
|
|
gram.rules.add(rule(leftSide, rightSide))
|
|
|
|
|
rightSide = vector::vector<symbol::symbol>()
|
|
|
|
|
doLeftSide = true
|
|
|
|
|
} else {
|
|
|
|
|
if (doLeftSide)
|
|
|
|
|
leftSide = symbol::symbol(word, true)
|
|
|
|
|
else
|
2015-07-06 13:48:19 -04:00
|
|
|
if (word[0] == '"') {
|
2015-07-08 13:43:06 -04:00
|
|
|
// ok, we support both plain terminals "hia*"
|
|
|
|
|
// and decorated terminals "hia*":hi_with_as
|
|
|
|
|
// so first check to find the ending " and see if it's
|
|
|
|
|
// the end of the string
|
|
|
|
|
var last_quote = word.length()-1
|
|
|
|
|
while(word[last_quote] != '"') last_quote--
|
|
|
|
|
rightSide.add(symbol::symbol(word.slice(1,last_quote), true))
|
|
|
|
|
if (last_quote != word.length()-1)
|
|
|
|
|
gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote))))
|
|
|
|
|
else
|
|
|
|
|
gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote))))
|
2015-07-06 13:48:19 -04:00
|
|
|
} else {
|
|
|
|
|
rightSide.add(symbol::symbol(word, false))
|
|
|
|
|
}
|
2015-07-06 12:49:29 -04:00
|
|
|
doLeftSide = false
|
2015-07-04 03:21:36 -04:00
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
return gram
|
|
|
|
|
}
|
2015-07-03 18:34:46 -04:00
|
|
|
|
|
|
|
|
obj grammer (Object) {
|
|
|
|
|
var rules: vector::vector<rule>
|
2015-07-08 13:43:06 -04:00
|
|
|
var regexs: vector::vector<util::pair<string::string, regex::regex>>
|
2015-07-03 18:34:46 -04:00
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun construct(): *grammer {
|
2015-07-03 18:34:46 -04:00
|
|
|
rules.construct()
|
|
|
|
|
regexs.construct()
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun copy_construct(old: *grammer) {
|
2015-07-03 18:34:46 -04:00
|
|
|
rules.copy_construct(&old->rules)
|
|
|
|
|
regexs.copy_construct(&old->regexs)
|
|
|
|
|
}
|
|
|
|
|
fun operator=(other: grammer) {
|
|
|
|
|
destruct()
|
|
|
|
|
copy_construct(&other)
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
rules.destruct()
|
|
|
|
|
regexs.destruct()
|
|
|
|
|
}
|
2015-07-04 03:21:36 -04:00
|
|
|
|
|
|
|
|
fun to_string(): string::string {
|
|
|
|
|
var result = string::string("grammer rules:")
|
|
|
|
|
rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
|
|
|
|
|
result += "\nregexs:"
|
2015-07-08 13:43:06 -04:00
|
|
|
regexs.for_each( fun(i : util::pair<string::string, regex::regex>) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } )
|
2015-07-04 03:21:36 -04:00
|
|
|
return result
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fun rule(lhs: symbol::symbol, rhs: vector::vector<symbol::symbol>): rule {
|
|
|
|
|
var toRet.construct(): rule
|
|
|
|
|
toRet.lhs = lhs
|
|
|
|
|
toRet.rhs = rhs
|
|
|
|
|
return toRet
|
2015-07-03 18:34:46 -04:00
|
|
|
}
|
2015-06-30 02:40:46 -04:00
|
|
|
|
|
|
|
|
obj rule (Object) {
|
|
|
|
|
var lhs: symbol::symbol
|
|
|
|
|
var rhs: vector::vector<symbol::symbol>
|
|
|
|
|
var position: int
|
|
|
|
|
var lookahead: set::set<symbol::symbol>
|
|
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun construct(): *rule {
|
2015-06-30 02:40:46 -04:00
|
|
|
lhs.construct()
|
|
|
|
|
rhs.construct()
|
|
|
|
|
position = 0
|
|
|
|
|
lookahead.construct()
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun copy_construct(other: *rule) {
|
2015-07-03 18:34:46 -04:00
|
|
|
lhs.copy_construct(&other->lhs)
|
|
|
|
|
rhs.copy_construct(&other->rhs)
|
|
|
|
|
position = other->position
|
|
|
|
|
lookahead.copy_construct(&other->lookahead)
|
2015-06-30 02:40:46 -04:00
|
|
|
}
|
|
|
|
|
fun operator=(other: rule) {
|
|
|
|
|
destruct()
|
|
|
|
|
copy_construct(&other)
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
lhs.destruct()
|
|
|
|
|
rhs.destruct()
|
|
|
|
|
lookahead.destruct()
|
|
|
|
|
}
|
2015-07-04 03:21:36 -04:00
|
|
|
|
|
|
|
|
fun to_string(): string::string {
|
|
|
|
|
var result = lhs.name + " -> "
|
2015-07-06 13:48:19 -04:00
|
|
|
rhs.for_each( fun(i : symbol::symbol) { result += i.to_string() + ", "; } )
|
2015-07-04 03:21:36 -04:00
|
|
|
return result
|
|
|
|
|
}
|
2015-06-30 02:40:46 -04:00
|
|
|
}
|
|
|
|
|
|