import regex import symbol import string import vector import util obj lexer { var regs: vector::vector var input: string::string var position: int fun construct(): lexer* { regs.construct() input.construct() position = 0 return this } fun destruct() { regs.destruct() input.destruct() } fun copy_construct(old: lexer*) { regs.copy_construct(&old->regs) input.copy_construct(&old->input) position = old->position } fun operator=(old: lexer) { destruct() copy_construct(&old) } fun add_regex(newOne: regex::regex) { regs.add(newOne) } fun add_regex(newOne: char*) { regs.add(regex::regex(newOne)) } fun set_input(in: string::string) { input = in } fun next(): symbol::symbol { if (position >= input.length()) return symbol::symbol("$EOF$", true) var max = regs.map(fun(reg: regex::regex): util::pair { return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); }) .max(fun(first: util::pair, second: util::pair): bool { return first.first < second.first; }) if (max.first < 0) return symbol::symbol("$INVALID$", true) position += max.first return symbol::symbol(max.second, true, input.slice(position-max.first, position)) } }