53 lines
1.5 KiB
Plaintext
53 lines
1.5 KiB
Plaintext
import regex
|
|
import symbol
|
|
import string
|
|
import vector
|
|
import util
|
|
|
|
obj lexer (Object) {
|
|
var regs: vector::vector<regex::regex>
|
|
var input: string::string
|
|
var position: int
|
|
fun construct(): lexer* {
|
|
regs.construct()
|
|
input.construct()
|
|
position = 0
|
|
return this
|
|
}
|
|
fun destruct() {
|
|
regs.destruct()
|
|
input.destruct()
|
|
}
|
|
fun copy_construct(old: lexer*) {
|
|
regs.copy_construct(&old->regs)
|
|
input.copy_construct(&old->input)
|
|
position = old->position
|
|
}
|
|
fun operator=(old: lexer) {
|
|
destruct()
|
|
copy_construct(&old)
|
|
}
|
|
fun add_regex(newOne: regex::regex) {
|
|
regs.add(newOne)
|
|
}
|
|
fun add_regex(newOne: char*) {
|
|
regs.add(regex::regex(newOne))
|
|
}
|
|
fun set_input(in: string::string) {
|
|
input = in
|
|
}
|
|
fun next(): symbol::symbol {
|
|
if (position >= input.length())
|
|
return symbol::symbol("$EOF$", true)
|
|
var max = regs.map(fun(reg: regex::regex): util::pair<int, string::string> {
|
|
return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); })
|
|
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
|
{ return first.first < second.first; })
|
|
if (max.first < 0)
|
|
return symbol::symbol("$INVALID$", true)
|
|
position += max.first
|
|
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
|
|
}
|
|
}
|
|
|