2015-06-08 21:47:02 -04:00
|
|
|
import io
|
|
|
|
|
import vector
|
|
|
|
|
import string
|
2015-06-09 20:02:02 -04:00
|
|
|
import mem
|
2015-06-27 18:06:02 -04:00
|
|
|
import set
|
2015-06-14 11:13:30 -04:00
|
|
|
import util
|
2015-06-09 20:02:02 -04:00
|
|
|
import conversions
|
2015-06-08 21:47:02 -04:00
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun regex(in: *char):regex {
|
2015-06-08 21:47:02 -04:00
|
|
|
return regex(string::string(in))
|
|
|
|
|
}
|
|
|
|
|
fun regex(in: string::string):regex {
|
|
|
|
|
var out.construct(in):regex
|
|
|
|
|
return out
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-30 02:40:46 -04:00
|
|
|
obj regexState (Object) {
|
2015-06-08 21:47:02 -04:00
|
|
|
var character: char
|
2015-07-04 17:02:51 -04:00
|
|
|
var next_states: vector::vector<*regexState>
|
|
|
|
|
fun construct(charIn:char): *regexState {
|
2015-06-08 21:47:02 -04:00
|
|
|
character = charIn
|
|
|
|
|
next_states.construct()
|
|
|
|
|
return this
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun construct(): *regexState {
|
2015-06-09 20:02:02 -04:00
|
|
|
return construct(conversions::to_char(0))
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun copy_construct(old:*regexState): void {
|
2015-06-09 20:02:02 -04:00
|
|
|
character = old->character
|
|
|
|
|
next_states.copy_construct(&old->next_states)
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
|
|
|
|
fun destruct():void {
|
|
|
|
|
next_states.destruct()
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun match(input: char): vector::vector<*regexState> {
|
|
|
|
|
return next_states.filter(fun(it:*regexState):bool { return it->character == input; })
|
2015-06-12 14:16:28 -04:00
|
|
|
}
|
|
|
|
|
fun is_end():bool {
|
2015-07-04 17:02:51 -04:00
|
|
|
return next_states.any_true(fun(state: *regexState):bool { return state->character == 1; })
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-30 02:40:46 -04:00
|
|
|
obj regex (Object) {
|
2015-06-08 21:47:02 -04:00
|
|
|
var regexString: string::string
|
2015-07-04 17:02:51 -04:00
|
|
|
var begin: *regexState
|
2015-07-08 13:43:06 -04:00
|
|
|
var referenceCounter: *int
|
2015-06-14 11:13:30 -04:00
|
|
|
|
2015-07-08 13:43:06 -04:00
|
|
|
fun construct(): *regex {
|
|
|
|
|
regexString.construct()
|
|
|
|
|
return this
|
|
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
fun construct(regexStringIn: string::string): *regex {
|
2015-06-08 21:47:02 -04:00
|
|
|
regexString.copy_construct(®exStringIn)
|
2015-07-08 13:43:06 -04:00
|
|
|
referenceCounter = mem::new<int>()
|
|
|
|
|
*referenceCounter = 1
|
2015-06-08 21:47:02 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
var beginningAndEnd = compile(regexStringIn)
|
2015-06-14 18:13:52 -04:00
|
|
|
// init our begin, and the end state as the next state of each end
|
|
|
|
|
begin = beginningAndEnd.first
|
2015-06-26 13:29:37 -04:00
|
|
|
var end = mem::new<regexState>()->construct(conversions::to_char(1))
|
2015-07-04 17:02:51 -04:00
|
|
|
beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); })
|
2015-06-08 21:47:02 -04:00
|
|
|
return this
|
|
|
|
|
}
|
2015-06-14 11:13:30 -04:00
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun copy_construct(old:*regex):void {
|
2015-07-08 13:43:06 -04:00
|
|
|
regexString.copy_construct(&old->regexString)
|
|
|
|
|
begin = old->begin
|
|
|
|
|
referenceCounter = old->referenceCounter
|
|
|
|
|
*referenceCounter += 1
|
|
|
|
|
/*construct(old->regexString)*/
|
2015-07-07 00:46:00 -04:00
|
|
|
/*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/
|
|
|
|
|
/*var newOne = mem::new<regexState>()->construct(it->character)*/
|
|
|
|
|
/*register(newOne)*/
|
|
|
|
|
/*it->next_states.for_each(fun(next_state: *regexState) {*/
|
|
|
|
|
/*newOne->next_states.add(cloner(next_state))*/
|
|
|
|
|
/*})*/
|
|
|
|
|
/*})*/
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
2015-06-14 11:13:30 -04:00
|
|
|
|
2015-06-08 21:47:02 -04:00
|
|
|
fun destruct():void {
|
|
|
|
|
regexString.destruct()
|
2015-07-08 13:43:06 -04:00
|
|
|
*referenceCounter -= 1
|
|
|
|
|
if (*referenceCounter == 0) {
|
|
|
|
|
mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } )
|
|
|
|
|
mem::delete(referenceCounter)
|
|
|
|
|
}
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
2015-06-14 11:13:30 -04:00
|
|
|
|
2015-07-04 03:21:36 -04:00
|
|
|
fun operator==(other: regex):bool {
|
|
|
|
|
return regexString == other.regexString
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
fun operator=(other: regex):void {
|
|
|
|
|
destruct()
|
2015-07-07 00:46:00 -04:00
|
|
|
copy_construct(&other)
|
2015-06-14 11:13:30 -04:00
|
|
|
}
|
|
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun compile(regex_string: string::string): util::pair<*regexState, vector::vector<*regexState>> {
|
2015-06-14 11:13:30 -04:00
|
|
|
var first = mem::new<regexState>()->construct()
|
2015-07-04 17:02:51 -04:00
|
|
|
var previous_begin = vector::vector<*regexState>()
|
|
|
|
|
var previous_end = vector::vector<*regexState>()
|
2015-06-14 11:13:30 -04:00
|
|
|
var current_begin = vector::vector(first)
|
|
|
|
|
var current_end = vector::vector(first)
|
|
|
|
|
var alternating = false
|
|
|
|
|
var escapeing = false
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < regex_string.length(); i++;) {
|
|
|
|
|
if (regex_string[i] == '*' && !escapeing) {
|
|
|
|
|
for (var j = 0; j < current_end.size; j++;)
|
|
|
|
|
current_end[j]->next_states.add_all(current_begin)
|
|
|
|
|
current_begin.add_all(previous_begin)
|
|
|
|
|
current_end.add_all(previous_end)
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else if (regex_string[i] == '+' && !escapeing) {
|
|
|
|
|
for (var j = 0; j < current_end.size; j++;)
|
|
|
|
|
current_end[j]->next_states.add_all(current_begin)
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else if (regex_string[i] == '?' && !escapeing) {
|
|
|
|
|
current_begin.add_all(previous_begin)
|
|
|
|
|
current_end.add_all(previous_end)
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else if (regex_string[i] == '|' && !escapeing) {
|
|
|
|
|
alternating = true
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else if (regex_string[i] == '(' && !escapeing) {
|
2015-06-14 18:13:52 -04:00
|
|
|
// note that we don't have a ')' case, as we skip past it with our indicies
|
|
|
|
|
var perenEnd = i + 1
|
|
|
|
|
for (var depth = 1; depth > 0; perenEnd++;)
|
|
|
|
|
if (regex_string[perenEnd] == '(')
|
|
|
|
|
depth++
|
|
|
|
|
else if (regex_string[perenEnd] == ')')
|
|
|
|
|
depth--
|
|
|
|
|
var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1))
|
|
|
|
|
// NOTE: perenEnd is one past the close peren
|
|
|
|
|
i = perenEnd-1
|
|
|
|
|
|
|
|
|
|
if (alternating) {
|
2015-07-04 17:02:51 -04:00
|
|
|
previous_end.for_each(fun(it: *regexState):void { it->next_states.add_all(innerBeginEnd.first->next_states); } )
|
2015-06-14 18:13:52 -04:00
|
|
|
current_begin.add_all(innerBeginEnd.first->next_states)
|
|
|
|
|
current_end.add_all(innerBeginEnd.second)
|
|
|
|
|
} else {
|
2015-07-04 17:02:51 -04:00
|
|
|
current_end.for_each(fun(it: *regexState):void { it->next_states.add_all(innerBeginEnd.first->next_states); } )
|
2015-06-14 18:13:52 -04:00
|
|
|
previous_begin = current_begin
|
|
|
|
|
previous_end = current_end
|
|
|
|
|
current_begin = innerBeginEnd.first->next_states
|
|
|
|
|
current_end = innerBeginEnd.second
|
|
|
|
|
}
|
|
|
|
|
alternating = false
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else if (regex_string[i] == '\\' && !escapeing) {
|
|
|
|
|
escapeing = true
|
2015-06-15 21:32:09 -04:00
|
|
|
|
2015-06-14 11:13:30 -04:00
|
|
|
} else {
|
|
|
|
|
var next = mem::new<regexState>()->construct(regex_string[i])
|
|
|
|
|
if (alternating) {
|
2015-07-04 17:02:51 -04:00
|
|
|
previous_end.for_each(fun(it: *regexState):void { it->next_states.add(next); })
|
2015-06-14 11:13:30 -04:00
|
|
|
current_begin.add(next)
|
|
|
|
|
current_end.add(next)
|
|
|
|
|
} else {
|
2015-07-04 17:02:51 -04:00
|
|
|
current_end.for_each(fun(it: *regexState):void { it->next_states.add(next); })
|
2015-06-14 11:13:30 -04:00
|
|
|
previous_begin = current_begin
|
|
|
|
|
previous_end = current_end
|
|
|
|
|
current_begin = vector::vector(next)
|
|
|
|
|
current_end = vector::vector(next)
|
|
|
|
|
}
|
2015-06-14 18:13:52 -04:00
|
|
|
escapeing = false
|
|
|
|
|
alternating = false
|
2015-06-14 11:13:30 -04:00
|
|
|
}
|
|
|
|
|
}
|
2015-06-14 18:13:52 -04:00
|
|
|
var beginAndEnd = util::make_pair(first, current_end)
|
2015-06-14 11:13:30 -04:00
|
|
|
return beginAndEnd
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-04 17:02:51 -04:00
|
|
|
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
|
2015-06-08 21:47:02 -04:00
|
|
|
fun long_match(to_match: string::string): int {
|
2015-06-14 18:13:52 -04:00
|
|
|
var next = vector::vector(begin)
|
|
|
|
|
var longest = -1
|
2015-06-08 21:47:02 -04:00
|
|
|
for (var i = 0; i < to_match.length(); i++;) {
|
|
|
|
|
if (next.size == 0)
|
|
|
|
|
return longest
|
2015-07-04 17:02:51 -04:00
|
|
|
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
|
2015-06-08 21:47:02 -04:00
|
|
|
longest = i
|
2015-07-04 17:02:51 -04:00
|
|
|
//next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match(to_match[i]); })
|
|
|
|
|
next = next.flatten_map(fun(state: *regexState): vector::vector<*regexState> { return state->match(to_match[i]); })
|
2015-06-08 21:47:02 -04:00
|
|
|
}
|
2015-07-04 17:02:51 -04:00
|
|
|
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
|
2015-06-08 21:47:02 -04:00
|
|
|
return to_match.length()
|
|
|
|
|
return longest
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|