Added straight-string optimization to regex, converted a bit more of the grammer

This commit is contained in:
Nathan Braswell
2016-05-10 01:23:37 -04:00
parent 4d31ca8b0f
commit 133bf29cdf
2 changed files with 40 additions and 17 deletions

View File

@@ -154,8 +154,8 @@ augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric |
numeric = "[0-9]+" ; numeric = "[0-9]+" ;
# note the hacks around \things. Hmm, I feel like it actually shouldn't be like this. Added \\\* because I want to come back later # note the hacks around \things. Hmm, I feel like it actually shouldn't be like this. Added \\\* because I want to come back later
string = triple_quoted_string | "\"(`|[0-9]|-|=| |[a-z]|\[|]|(\\\\)|(\\n)|(\\t)|(\\\*)|(\\0)|;|'| string = triple_quoted_string | "\"([#-[]| |[]-~]|(\\\\)|(\\n)|(\\t)|(\\\*)|(\\0)|
|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|[A-Z]|{|}|\||:|<|>|\?| |(\\\"))*\"" ; |[ -!]|(\\\"))*\"" ;
comment = cpp_comment | c_comment ; comment = cpp_comment | c_comment ;
cpp_comment = "//[ -~]* cpp_comment = "//[ -~]*
" ; " ;

View File

@@ -53,6 +53,7 @@ obj regex (Object, Serializable) {
var regexString: string::string var regexString: string::string
var begin: *regexState var begin: *regexState
var referenceCounter: *int var referenceCounter: *int
var is_straight_string: bool
fun construct(): *regex { fun construct(): *regex {
regexString.construct() regexString.construct()
@@ -60,30 +61,44 @@ obj regex (Object, Serializable) {
} }
fun construct(regexStringIn: string::string): *regex { fun construct(regexStringIn: string::string): *regex {
regexString.copy_construct(&regexStringIn) regexString.copy_construct(&regexStringIn)
referenceCounter = mem::new<int>() is_straight_string = true
*referenceCounter = 1 for (var i = 0; i < regexString.length(); i++;) {
// simple implementation doesn't count escaped characters as straight string
var beginningAndEnd = compile(regexStringIn) if (regexString[i] == '\\' || regexString[i] == '(' || regexString[i] == ')' || regexString[i] == '[' || regexString[i] == '*' || regexString[i] == '+' || regexString[i] == '?' || regexString[i] == '|') {
// init our begin, and the end state as the next state of each end is_straight_string = false
begin = beginningAndEnd.first break
var end = mem::new<regexState>()->construct((1) cast char) }
beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); }) }
if (!is_straight_string) {
referenceCounter = mem::new<int>()
*referenceCounter = 1
var beginningAndEnd = compile(regexStringIn)
// init our begin, and the end state as the next state of each end
begin = beginningAndEnd.first
var end = mem::new<regexState>()->construct((1) cast char)
beginningAndEnd.second.for_each(fun(it: *regexState): void { it->next_states.add(end); })
}
return this return this
} }
fun copy_construct(old:*regex):void { fun copy_construct(old:*regex):void {
regexString.copy_construct(&old->regexString) regexString.copy_construct(&old->regexString)
begin = old->begin is_straight_string = old->is_straight_string
referenceCounter = old->referenceCounter if (!is_straight_string) {
*referenceCounter += 1 begin = old->begin
referenceCounter = old->referenceCounter
*referenceCounter += 1
}
} }
fun destruct():void { fun destruct():void {
regexString.destruct() regexString.destruct()
*referenceCounter -= 1 if (!is_straight_string) {
if (*referenceCounter == 0) { *referenceCounter -= 1
util::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return it->next_states; } ) if (*referenceCounter == 0) {
mem::delete(referenceCounter) util::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return it->next_states; } )
mem::delete(referenceCounter)
}
} }
} }
fun serialize(): vector::vector<char> { fun serialize(): vector::vector<char> {
@@ -195,6 +210,14 @@ obj regex (Object, Serializable) {
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); } fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length()) fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
fun long_match(to_match: *char, position: int, end: int): int { fun long_match(to_match: *char, position: int, end: int): int {
if (is_straight_string) {
if (regexString.length() > end-position)
return -1
for (var i = 0; i < regexString.length(); i++;)
if (regexString[i] != to_match[position+i])
return -1
return regexString.length();
}
var next = set::set(begin) var next = set::set(begin)
var longest = -1 var longest = -1
for (var i = 0; i < end-position; i++;) { for (var i = 0; i < end-position; i++;) {