90 lines
2.5 KiB
Plaintext
90 lines
2.5 KiB
Plaintext
import regex
|
|
import symbol
|
|
import str
|
|
import vec
|
|
import util
|
|
|
|
fun lexer(regs: vec::vec<regex::regex>): lexer {
|
|
/*var toRet:lexer*/
|
|
var toRet.construct() :lexer
|
|
regs.for_each( fun(reg: regex::regex) {
|
|
toRet.add_regex(util::make_pair(reg.regexString, reg));
|
|
})
|
|
return toRet
|
|
}
|
|
|
|
fun lexer(regs: vec::vec<util::pair<symbol::symbol, regex::regex>>): lexer {
|
|
/*var toRet:lexer*/
|
|
var toRet.construct() :lexer
|
|
regs.for_each( fun(reg: util::pair<symbol::symbol, regex::regex>)
|
|
toRet.add_regex(util::make_pair(reg.first.name, reg.second));
|
|
)
|
|
return toRet
|
|
}
|
|
|
|
obj lexer (Object) {
|
|
var regs: vec::vec<util::pair<str::str, regex::regex>>
|
|
var input: str::str
|
|
var position: int
|
|
var line_number: int
|
|
fun construct(): *lexer {
|
|
regs.construct()
|
|
input.construct()
|
|
position = 0
|
|
line_number = 1
|
|
return this
|
|
}
|
|
fun destruct() {
|
|
regs.destruct()
|
|
input.destruct()
|
|
}
|
|
fun copy_construct(old: *lexer) {
|
|
regs.copy_construct(&old->regs)
|
|
input.copy_construct(&old->input)
|
|
position = old->position
|
|
line_number = old->line_number
|
|
}
|
|
fun operator=(old: lexer) {
|
|
destruct()
|
|
copy_construct(&old)
|
|
}
|
|
fun add_regex(name: str::str, newOne: regex::regex) {
|
|
regs.add(util::make_pair(name,newOne))
|
|
}
|
|
fun add_regex(newOne: util::pair<str::str,regex::regex>) {
|
|
regs.add(newOne)
|
|
}
|
|
fun add_regex(newOne: regex::regex) {
|
|
regs.add(util::make_pair(newOne.regexString, newOne))
|
|
}
|
|
fun add_regex(newOne: *char) {
|
|
regs.add(util::make_pair(str::str(newOne), regex::regex(newOne)))
|
|
}
|
|
fun set_input(in: ref str::str) {
|
|
position = 0
|
|
line_number = 1
|
|
input = in
|
|
}
|
|
fun next(): symbol::symbol {
|
|
if (position >= input.length())
|
|
return symbol::eof_symbol()
|
|
var max = -1
|
|
var max_length = -1
|
|
for (var i = 0; i < regs.size; i++;) {
|
|
var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length())
|
|
if (new_length > max_length) {
|
|
max = i
|
|
max_length = new_length
|
|
}
|
|
}
|
|
if (max < 0)
|
|
return symbol::invalid_symbol()
|
|
for (var i = position; i < position+max_length; i++;)
|
|
if (input[i] == '\n')
|
|
line_number++
|
|
position += max_length
|
|
return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position), line_number)
|
|
}
|
|
}
|
|
|