import io import vector import string import mem import set import util import conversions fun regex(in: char*):regex { return regex(string::string(in)) } fun regex(in: string::string):regex { var out.construct(in):regex return out } obj regexState (Object) { var character: char var next_states: vector::vector fun construct(charIn:char): regexState* { character = charIn next_states.construct() return this } fun construct(): regexState* { return construct(conversions::to_char(0)) } fun copy_construct(old:regexState*): void { character = old->character next_states.copy_construct(&old->next_states) } fun destruct():void { next_states.destruct() } fun match(input: char): vector::vector { return next_states.filter(fun(it:regexState*):bool { return it->character == input; }) } fun is_end():bool { return next_states.any_true(fun(state: regexState*):bool { return state->character == 1; }) } } obj regex (Object) { var regexString: string::string var begin: regexState* fun construct(regexStringIn: string::string): regex* { regexString.copy_construct(®exStringIn) var beginningAndEnd = compile(regexStringIn) // init our begin, and the end state as the next state of each end begin = beginningAndEnd.first var end = mem::new()->construct(conversions::to_char(1)) beginningAndEnd.second.for_each(fun(it: regexState*): void { it->next_states.add(end); }) return this } fun copy_construct(old:regex*):void { //begin = old->begin //regexString.copy_construct(&old->regexString) construct(old->regexString) } fun destruct():void { regexString.destruct() mem::safe_recursive_delete(begin, fun(it: regexState*): set::set { return set::from_vector(it->next_states); } ) } fun operator=(other: regex):void { destruct() construct(other.regexString) } fun compile(regex_string: string::string): util::pair> { var first = mem::new()->construct() var previous_begin = vector::vector() var previous_end = vector::vector() var current_begin = vector::vector(first) var current_end = vector::vector(first) var alternating = false var escapeing = false for (var i = 0; i < regex_string.length(); i++;) { if (regex_string[i] == '*' && !escapeing) { for (var j = 0; j < current_end.size; j++;) current_end[j]->next_states.add_all(current_begin) current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '+' && !escapeing) { for (var j = 0; j < current_end.size; j++;) current_end[j]->next_states.add_all(current_begin) } else if (regex_string[i] == '?' && !escapeing) { current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '|' && !escapeing) { alternating = true } else if (regex_string[i] == '(' && !escapeing) { // note that we don't have a ')' case, as we skip past it with our indicies var perenEnd = i + 1 for (var depth = 1; depth > 0; perenEnd++;) if (regex_string[perenEnd] == '(') depth++ else if (regex_string[perenEnd] == ')') depth-- var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1)) // NOTE: perenEnd is one past the close peren i = perenEnd-1 if (alternating) { previous_end.for_each(fun(it: regexState*):void { it->next_states.add_all(innerBeginEnd.first->next_states); } ) current_begin.add_all(innerBeginEnd.first->next_states) current_end.add_all(innerBeginEnd.second) } else { current_end.for_each(fun(it: regexState*):void { it->next_states.add_all(innerBeginEnd.first->next_states); } ) previous_begin = current_begin previous_end = current_end current_begin = innerBeginEnd.first->next_states current_end = innerBeginEnd.second } alternating = false } else if (regex_string[i] == '\\' && !escapeing) { escapeing = true } else { var next = mem::new()->construct(regex_string[i]) if (alternating) { previous_end.for_each(fun(it: regexState*):void { it->next_states.add(next); }) current_begin.add(next) current_end.add(next) } else { current_end.for_each(fun(it: regexState*):void { it->next_states.add(next); }) previous_begin = current_begin previous_end = current_end current_begin = vector::vector(next) current_end = vector::vector(next) } escapeing = false alternating = false } } var beginAndEnd = util::make_pair(first, current_end) return beginAndEnd } fun long_match(to_match: char*): int { return long_match(string::string(to_match)); } fun long_match(to_match: string::string): int { var next = vector::vector(begin) var longest = -1 for (var i = 0; i < to_match.length(); i++;) { if (next.size == 0) return longest if (next.any_true(fun(state: regexState*):bool { return state->is_end(); })) longest = i //next = next.flatten_map(fun(state: regexState*): vector::vector { return state->match(to_match[i]); }) next = next.flatten_map(fun(state: regexState*): vector::vector { return state->match(to_match[i]); }) } if (next.any_true(fun(state: regexState*):bool { return state->is_end(); })) return to_match.length() return longest } }