2015-06-28 20:25:27 -04:00
|
|
|
import regex
|
|
|
|
|
import symbol
|
|
|
|
|
import string
|
|
|
|
|
import vector
|
|
|
|
|
import util
|
|
|
|
|
|
2015-07-06 13:48:19 -04:00
|
|
|
fun lexer(regs: vector::vector<regex::regex>): lexer {
|
2015-07-08 13:43:06 -04:00
|
|
|
return lexer(regs.map( fun(reg: regex::regex): util::pair<string::string, regex::regex> {
|
|
|
|
|
return util::make_pair(reg.regexString,reg)
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fun lexer(regs: vector::vector<util::pair<string::string, regex::regex>>): lexer {
|
2015-07-06 13:48:19 -04:00
|
|
|
var toRet.construct() :lexer
|
2015-07-08 13:43:06 -04:00
|
|
|
regs.for_each( fun(reg: util::pair<string::string, regex::regex>) toRet.add_regex(reg); )
|
2015-07-06 13:48:19 -04:00
|
|
|
return toRet
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-30 02:40:46 -04:00
|
|
|
obj lexer (Object) {
|
2015-07-08 13:43:06 -04:00
|
|
|
var regs: vector::vector<util::pair<string::string, regex::regex>>
|
2015-06-28 20:25:27 -04:00
|
|
|
var input: string::string
|
|
|
|
|
var position: int
|
2015-07-04 17:02:51 -04:00
|
|
|
fun construct(): *lexer {
|
2015-06-28 20:25:27 -04:00
|
|
|
regs.construct()
|
|
|
|
|
input.construct()
|
|
|
|
|
position = 0
|
|
|
|
|
return this
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
regs.destruct()
|
|
|
|
|
input.destruct()
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun copy_construct(old: *lexer) {
|
2015-06-28 20:25:27 -04:00
|
|
|
regs.copy_construct(&old->regs)
|
|
|
|
|
input.copy_construct(&old->input)
|
|
|
|
|
position = old->position
|
|
|
|
|
}
|
|
|
|
|
fun operator=(old: lexer) {
|
|
|
|
|
destruct()
|
|
|
|
|
copy_construct(&old)
|
|
|
|
|
}
|
2015-07-08 13:43:06 -04:00
|
|
|
fun add_regex(name: string::string, newOne: regex::regex) {
|
|
|
|
|
regs.add(util::make_pair(name,newOne))
|
|
|
|
|
}
|
|
|
|
|
fun add_regex(newOne: util::pair<string::string,regex::regex>) {
|
2015-06-28 20:25:27 -04:00
|
|
|
regs.add(newOne)
|
|
|
|
|
}
|
2015-07-08 13:43:06 -04:00
|
|
|
fun add_regex(newOne: regex::regex) {
|
|
|
|
|
regs.add(util::make_pair(newOne.regexString, newOne))
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun add_regex(newOne: *char) {
|
2015-07-08 13:43:06 -04:00
|
|
|
regs.add(util::make_pair(string::string(newOne), regex::regex(newOne)))
|
2015-06-29 01:03:51 -04:00
|
|
|
}
|
2015-06-28 20:25:27 -04:00
|
|
|
fun set_input(in: string::string) {
|
|
|
|
|
input = in
|
|
|
|
|
}
|
|
|
|
|
fun next(): symbol::symbol {
|
2015-06-29 01:03:51 -04:00
|
|
|
if (position >= input.length())
|
|
|
|
|
return symbol::symbol("$EOF$", true)
|
2015-07-08 13:43:06 -04:00
|
|
|
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
|
|
|
|
|
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
|
2015-06-28 20:25:27 -04:00
|
|
|
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
|
|
|
|
{ return first.first < second.first; })
|
2015-06-29 01:03:51 -04:00
|
|
|
if (max.first < 0)
|
|
|
|
|
return symbol::symbol("$INVALID$", true)
|
2015-06-28 20:25:27 -04:00
|
|
|
position += max.first
|
2015-06-29 01:03:51 -04:00
|
|
|
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
|
2015-06-28 20:25:27 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|