import io import vector import string import mem import set import util import conversions import serialize fun regex(in: *char):regex { return regex(string::string(in)) } fun regex(in: string::string):regex { var out.construct(in):regex return out } obj regexState (Object) { var character: char var next_states: set::set<*regexState> fun construct(charIn:char): *regexState { character = charIn next_states.construct() return this } fun construct(): *regexState { return construct(conversions::to_char(0)) } fun copy_construct(old:*regexState): void { character = old->character next_states.copy_construct(&old->next_states) } fun destruct():void { next_states.destruct() } fun match_char(input: char): set::set<*regexState> { return next_states.filter(fun(it:*regexState):bool { return it->character == input; }) } fun is_end():bool { return next_states.any_true(fun(state: *regexState):bool { return state->character == 1; }) } } obj regex (Object, Serializable) { var regexString: string::string var begin: *regexState var referenceCounter: *int fun construct(): *regex { regexString.construct() return this } fun construct(regexStringIn: string::string): *regex { regexString.copy_construct(®exStringIn) referenceCounter = mem::new() *referenceCounter = 1 var beginningAndEnd = compile(regexStringIn) // init our begin, and the end state as the next state of each end begin = beginningAndEnd.first var end = mem::new()->construct(conversions::to_char(1)) beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); }) return this } fun copy_construct(old:*regex):void { regexString.copy_construct(&old->regexString) begin = old->begin referenceCounter = old->referenceCounter *referenceCounter += 1 /*construct(old->regexString)*/ /*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/ /*var newOne = mem::new()->construct(it->character)*/ /*register(newOne)*/ /*it->next_states.for_each(fun(next_state: *regexState) {*/ /*newOne->next_states.add(cloner(next_state))*/ /*})*/ /*})*/ } fun destruct():void { regexString.destruct() *referenceCounter -= 1 if (*referenceCounter == 0) { mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return it->next_states; } ) mem::delete(referenceCounter) } } fun serialize(): vector::vector { return serialize::serialize(regexString) } fun unserialize(it: ref vector::vector, pos: int): int { var temp = string::string() util::unpack(temp, pos) = serialize::unserialize(it, pos) construct(temp) return pos } fun operator==(other: regex):bool { return regexString == other.regexString } fun operator=(other: regex):void { destruct() copy_construct(&other) } fun compile(regex_string: string::string): util::pair<*regexState, set::set<*regexState>> { var first = mem::new()->construct() var previous_begin = set::set<*regexState>() var previous_end = set::set<*regexState>() var current_begin = set::set(first) var current_end = set::set(first) var alternating = false var escapeing = false for (var i = 0; i < regex_string.length(); i++;) { if (regex_string[i] == '*' && !escapeing) { current_end.for_each(fun(item: *regexState) item->next_states.add_all(current_begin);) current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '+' && !escapeing) { current_end.for_each(fun(item: *regexState) item->next_states.add_all(current_begin);) } else if (regex_string[i] == '?' && !escapeing) { current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '|' && !escapeing) { alternating = true } else if (regex_string[i] == '(' && !escapeing) { // note that we don't have a ')' case, as we skip past it with our indicies var perenEnd = i + 1 for (var depth = 1; depth > 0; perenEnd++;) if (regex_string[perenEnd] == '(') depth++ else if (regex_string[perenEnd] == ')') depth-- var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1)) // NOTE: perenEnd is one past the close peren i = perenEnd-1 if (alternating) { previous_end.for_each(fun(it: *regexState):void { it->next_states.add_all(innerBeginEnd.first->next_states); } ) current_begin.add_all(innerBeginEnd.first->next_states) current_end.add_all(innerBeginEnd.second) } else { current_end.for_each(fun(it: *regexState):void { it->next_states.add_all(innerBeginEnd.first->next_states); } ) previous_begin = current_begin previous_end = current_end current_begin = innerBeginEnd.first->next_states current_end = innerBeginEnd.second } alternating = false } else if (regex_string[i] == '\\' && !escapeing) { escapeing = true } else { var next = mem::new()->construct(regex_string[i]) if (alternating) { previous_end.for_each(fun(it: *regexState):void { it->next_states.add(next); }) current_begin.add(next) current_end.add(next) } else { current_end.for_each(fun(it: *regexState):void { it->next_states.add(next); }) previous_begin = current_begin previous_end = current_end current_begin = set::set(next) current_end = set::set(next) } escapeing = false alternating = false } } var beginAndEnd = util::make_pair(first, current_end) return beginAndEnd } fun long_match(to_match: *char): int { return long_match(string::string(to_match)); } fun long_match(to_match: string::string): int { var next = set::set(begin) var longest = -1 for (var i = 0; i < to_match.length(); i++;) { if (next.size() == 0) return longest if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) longest = i //next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); }) next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[i]); }) } if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) return to_match.length() return longest } }