Added straight-string optimization to regex, converted a bit more of the grammer
This commit is contained in:
@@ -154,8 +154,8 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric |
|
|||||||
|
|
||||||
numeric = "[0-9]+" ;
|
numeric = "[0-9]+" ;
|
||||||
# note the hacks around \things. Hmm, I feel like it actually shouldn't be like this. Added \\\* because I want to come back later
|
# note the hacks around \things. Hmm, I feel like it actually shouldn't be like this. Added \\\* because I want to come back later
|
||||||
string = triple_quoted_string | "\"(`|[0-9]|-|=| |[a-z]|\[|]|(\\\\)|(\\n)|(\\t)|(\\\*)|(\\0)|;|'|
|
string = triple_quoted_string | "\"([#-[]| |[]-~]|(\\\\)|(\\n)|(\\t)|(\\\*)|(\\0)|
|
||||||
|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|[A-Z]|{|}|\||:|<|>|\?| |(\\\"))*\"" ;
|
|[ -!]|(\\\"))*\"" ;
|
||||||
comment = cpp_comment | c_comment ;
|
comment = cpp_comment | c_comment ;
|
||||||
cpp_comment = "//[ -~]*
|
cpp_comment = "//[ -~]*
|
||||||
" ;
|
" ;
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ obj regex (Object, Serializable) {
|
|||||||
var regexString: string::string
|
var regexString: string::string
|
||||||
var begin: *regexState
|
var begin: *regexState
|
||||||
var referenceCounter: *int
|
var referenceCounter: *int
|
||||||
|
var is_straight_string: bool
|
||||||
|
|
||||||
fun construct(): *regex {
|
fun construct(): *regex {
|
||||||
regexString.construct()
|
regexString.construct()
|
||||||
@@ -60,32 +61,46 @@ obj regex (Object, Serializable) {
|
|||||||
}
|
}
|
||||||
fun construct(regexStringIn: string::string): *regex {
|
fun construct(regexStringIn: string::string): *regex {
|
||||||
regexString.copy_construct(®exStringIn)
|
regexString.copy_construct(®exStringIn)
|
||||||
|
is_straight_string = true
|
||||||
|
for (var i = 0; i < regexString.length(); i++;) {
|
||||||
|
// simple implementation doesn't count escaped characters as straight string
|
||||||
|
if (regexString[i] == '\\' || regexString[i] == '(' || regexString[i] == ')' || regexString[i] == '[' || regexString[i] == '*' || regexString[i] == '+' || regexString[i] == '?' || regexString[i] == '|') {
|
||||||
|
is_straight_string = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_straight_string) {
|
||||||
referenceCounter = mem::new<int>()
|
referenceCounter = mem::new<int>()
|
||||||
*referenceCounter = 1
|
*referenceCounter = 1
|
||||||
|
|
||||||
var beginningAndEnd = compile(regexStringIn)
|
var beginningAndEnd = compile(regexStringIn)
|
||||||
// init our begin, and the end state as the next state of each end
|
// init our begin, and the end state as the next state of each end
|
||||||
begin = beginningAndEnd.first
|
begin = beginningAndEnd.first
|
||||||
var end = mem::new<regexState>()->construct((1) cast char)
|
var end = mem::new<regexState>()->construct((1) cast char)
|
||||||
beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); })
|
beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); })
|
||||||
|
}
|
||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
|
|
||||||
fun copy_construct(old:*regex):void {
|
fun copy_construct(old:*regex):void {
|
||||||
regexString.copy_construct(&old->regexString)
|
regexString.copy_construct(&old->regexString)
|
||||||
|
is_straight_string = old->is_straight_string
|
||||||
|
if (!is_straight_string) {
|
||||||
begin = old->begin
|
begin = old->begin
|
||||||
referenceCounter = old->referenceCounter
|
referenceCounter = old->referenceCounter
|
||||||
*referenceCounter += 1
|
*referenceCounter += 1
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun destruct():void {
|
fun destruct():void {
|
||||||
regexString.destruct()
|
regexString.destruct()
|
||||||
|
if (!is_straight_string) {
|
||||||
*referenceCounter -= 1
|
*referenceCounter -= 1
|
||||||
if (*referenceCounter == 0) {
|
if (*referenceCounter == 0) {
|
||||||
util::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return it->next_states; } )
|
util::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return it->next_states; } )
|
||||||
mem::delete(referenceCounter)
|
mem::delete(referenceCounter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
fun serialize(): vector::vector<char> {
|
fun serialize(): vector::vector<char> {
|
||||||
return serialize::serialize(regexString)
|
return serialize::serialize(regexString)
|
||||||
}
|
}
|
||||||
@@ -195,6 +210,14 @@ obj regex (Object, Serializable) {
|
|||||||
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
|
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
|
||||||
fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
|
fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
|
||||||
fun long_match(to_match: *char, position: int, end: int): int {
|
fun long_match(to_match: *char, position: int, end: int): int {
|
||||||
|
if (is_straight_string) {
|
||||||
|
if (regexString.length() > end-position)
|
||||||
|
return -1
|
||||||
|
for (var i = 0; i < regexString.length(); i++;)
|
||||||
|
if (regexString[i] != to_match[position+i])
|
||||||
|
return -1
|
||||||
|
return regexString.length();
|
||||||
|
}
|
||||||
var next = set::set(begin)
|
var next = set::set(begin)
|
||||||
var longest = -1
|
var longest = -1
|
||||||
for (var i = 0; i < end-position; i++;) {
|
for (var i = 0; i < end-position; i++;) {
|
||||||
|
|||||||
Reference in New Issue
Block a user