import io import vector import string import mem import util import conversions fun regex(in: char*):regex { return regex(string::string(in)) } fun regex(in: string::string):regex { var out.construct(in):regex return out } obj regexState(Object) { var character: char var next_states: vector::vector fun construct(charIn:char): regexState* { character = charIn next_states.construct() return this } fun construct(): regexState* { return construct(conversions::to_char(0)) } fun copy_construct(old:regexState*): void { character = old->character next_states.copy_construct(&old->next_states) } fun destruct():void { next_states.destruct() } fun match(input: char): vector::vector { return next_states.filter(fun(it:regexState*, input:char):bool { return it->character == input; }, input) io::print("in match for: "); io::println(character) io::println("pre") for (var i = 0; i < next_states.size; i++;) io::println(next_states[i]->character) var nx = next_states.filter(fun(it:regexState*, input:char):bool { return it->character == input; }, input) io::println("next") for (var i = 0; i < nx.size; i++;) io::println(nx[i]->character) //return next_states.filter(fun(it:regexState*, input:char):bool { return it->character == input; }, input) return nx } fun is_end():bool { return next_states.any_true(fun(state: regexState*):bool { return state->character == 1; }) } } obj regex(Object) { var regexString: string::string var begin: regexState* fun construct(regexStringIn: string::string): regex* { regexString.copy_construct(®exStringIn) var beginningAndEnd = compile(regexStringIn) // init our begin, and the end state as the next state of each end begin = beginningAndEnd.first beginningAndEnd.second.do(fun(it: regexState*, end: regexState*): void { it->next_states.add(end); }, mem::new()->construct(conversions::to_char(1))) return this } fun copy_construct(old:regex*):void { begin = old->begin regexString.copy_construct(&old->regexString) } fun destruct():void { regexString.destruct() } fun operator=(other: regex):void { destruct() construct(other.regexString) } fun compile(regex_string: string::string): util::pair> { var first = mem::new()->construct() var previous_begin = vector::vector() var previous_end = vector::vector() var current_begin = vector::vector(first) var current_end = vector::vector(first) var alternating = false var escapeing = false for (var i = 0; i < regex_string.length(); i++;) { if (regex_string[i] == '*' && !escapeing) { for (var j = 0; j < current_end.size; j++;) current_end[j]->next_states.add_all(current_begin) current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '+' && !escapeing) { for (var j = 0; j < current_end.size; j++;) current_end[j]->next_states.add_all(current_begin) } else if (regex_string[i] == '?' && !escapeing) { current_begin.add_all(previous_begin) current_end.add_all(previous_end) } else if (regex_string[i] == '|' && !escapeing) { alternating = true } else if (regex_string[i] == '(' && !escapeing) { // note that we don't have a ')' case, as we skip past it with our indicies var perenEnd = i + 1 for (var depth = 1; depth > 0; perenEnd++;) if (regex_string[perenEnd] == '(') depth++ else if (regex_string[perenEnd] == ')') depth-- var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1)) // NOTE: perenEnd is one past the close peren i = perenEnd-1 if (alternating) { previous_end.do(fun(it: regexState*, innerBegin: vector::vector):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states) current_begin.add_all(innerBeginEnd.first->next_states) current_end.add_all(innerBeginEnd.second) } else { current_end.do(fun(it: regexState*, innerBegin: vector::vector):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states) previous_begin = current_begin previous_end = current_end current_begin = innerBeginEnd.first->next_states current_end = innerBeginEnd.second } alternating = false } else if (regex_string[i] == '\\' && !escapeing) { escapeing = true } else { var next = mem::new()->construct(regex_string[i]) if (alternating) { previous_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next) current_begin.add(next) current_end.add(next) } else { current_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next) previous_begin = current_begin previous_end = current_end current_begin = vector::vector(next) current_end = vector::vector(next) } escapeing = false alternating = false } } var beginAndEnd = util::make_pair(first, current_end) return beginAndEnd } fun long_match(to_match: char*): int { return long_match(string::string(to_match)); } fun long_match(to_match: string::string): int { var next = vector::vector(begin) var longest = -1 for (var i = 0; i < to_match.length(); i++;) { if (next.size == 0) return longest if (next.any_true(fun(state: regexState*):bool { return state->is_end(); })) longest = i //next = next.flatten_map(fun(state: regexState*): vector::vector { return state->match(to_match[i]); }) next = next.flatten_map(fun(state: regexState*, c:char): vector::vector { return state->match(c); }, to_match[i]) } if (next.any_true(fun(state: regexState*):bool { return state->is_end(); })) return to_match.length() return longest } }