Files
kraken/stdlib/lexer.krak
2018-05-22 19:43:54 -04:00

90 lines
2.5 KiB
Plaintext

import regex
import symbol
import str
import vec
import util
fun lexer(regs: vec::vec<regex::regex>): lexer {
/*var toRet:lexer*/
var toRet.construct() :lexer
regs.for_each( fun(reg: regex::regex) {
toRet.add_regex(util::make_pair(reg.regexString, reg));
})
return toRet
}
fun lexer(regs: vec::vec<util::pair<symbol::symbol, regex::regex>>): lexer {
/*var toRet:lexer*/
var toRet.construct() :lexer
regs.for_each( fun(reg: util::pair<symbol::symbol, regex::regex>)
toRet.add_regex(util::make_pair(reg.first.name, reg.second));
)
return toRet
}
obj lexer (Object) {
var regs: vec::vec<util::pair<str::str, regex::regex>>
var input: str::str
var position: int
var line_number: int
fun construct(): *lexer {
regs.construct()
input.construct()
position = 0
line_number = 1
return this
}
fun destruct() {
regs.destruct()
input.destruct()
}
fun copy_construct(old: *lexer) {
regs.copy_construct(&old->regs)
input.copy_construct(&old->input)
position = old->position
line_number = old->line_number
}
fun operator=(old: lexer) {
destruct()
copy_construct(&old)
}
fun add_regex(name: str::str, newOne: regex::regex) {
regs.add(util::make_pair(name,newOne))
}
fun add_regex(newOne: util::pair<str::str,regex::regex>) {
regs.add(newOne)
}
fun add_regex(newOne: regex::regex) {
regs.add(util::make_pair(newOne.regexString, newOne))
}
fun add_regex(newOne: *char) {
regs.add(util::make_pair(str::str(newOne), regex::regex(newOne)))
}
fun set_input(in: ref str::str) {
position = 0
line_number = 1
input = in
}
fun next(): symbol::symbol {
if (position >= input.length())
return symbol::eof_symbol()
var max = -1
var max_length = -1
for (var i = 0; i < regs.size; i++;) {
var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length())
if (new_length > max_length) {
max = i
max_length = new_length
}
}
if (max < 0)
return symbol::invalid_symbol()
for (var i = position; i < position+max_length; i++;)
if (input[i] == '\n')
line_number++
position += max_length
return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position), line_number)
}
}