Optimizations, regex character ranges

This commit is contained in:
Nathan Braswell
2016-05-05 04:51:10 -04:00
parent 02c77899b8
commit 9d7a65294f
8 changed files with 100 additions and 76 deletions

View File

@@ -638,8 +638,7 @@ obj ast_transformation (Object) {
if (factor_part->children.size == 1) {
/*println("Factor has only one child!")*/
var inner_unarad = get_node("unarad", factor_part)
if (get_node("\"[\"", inner_unarad)) {
/*println("Inner Unarad has [!")*/
if (get_node("\"]\"", inner_unarad)) {
var assign_to = transform(get_node("unarad", inner_unarad), scope, template_replacements)
var assign_idx = transform(get_node("expression", inner_unarad), scope, template_replacements)
var possible_bracket_assign = find_and_make_any_operator_overload_call(string("[]="), vector(assign_to, assign_idx, to_assign), scope, template_replacements)

View File

@@ -179,22 +179,22 @@ obj grammer (Object, Serializable) {
}
fun first_vector(rhs: ref vector::vector<symbol::symbol>): set::set<symbol::symbol> {
var toRet = set::set<symbol::symbol>()
if (rhs.size) {
for (var i = 0; i < rhs.size; i++;) {
var lookahead = first_set_map[rhs[i]]
if (lookahead.contains(symbol::null_symbol())) {
// remove the null if this is not the last in the rule
if (i != rhs.size-1)
lookahead.remove(symbol::null_symbol())
toRet.add(lookahead)
} else {
toRet.add(lookahead)
break
}
if (rhs.size) {
for (var i = 0; i < rhs.size; i++;) {
var lookahead = first_set_map[rhs[i]]
if (lookahead.contains(symbol::null_symbol())) {
// remove the null if this is not the last in the rule
if (i != rhs.size-1)
lookahead.remove(symbol::null_symbol())
toRet.add(lookahead)
} else {
toRet.add(lookahead)
break
}
} else {
toRet.add(symbol::null_symbol())
}
} else {
toRet.add(symbol::null_symbol())
}
return toRet
}

View File

@@ -19,6 +19,7 @@ obj parser (Object) {
var to_shift: stack< pair<*tree<int>, int> >
var SPPFStepNodes: vector< pair<*tree<symbol>, int> >
var packed_map: map<*tree<symbol>, bool>
var reduces_to_null_map: map<vector<symbol>, bool>
fun construct(grammerIn: grammer): *parser {
input.construct()
@@ -28,6 +29,7 @@ obj parser (Object) {
to_shift.construct()
SPPFStepNodes.construct()
packed_map.construct()
reduces_to_null_map.construct()
return this
}
fun copy_construct(old: *parser) {
@@ -38,6 +40,7 @@ obj parser (Object) {
to_shift.copy_construct(&old->to_shift)
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
packed_map.copy_construct(&old->packed_map)
reduces_to_null_map.copy_construct(&old->reduces_to_null_map)
}
fun operator=(old: ref parser) {
destruct()
@@ -51,6 +54,7 @@ obj parser (Object) {
to_shift.destruct()
SPPFStepNodes.destruct()
packed_map.destruct()
reduces_to_null_map.destruct()
}
fun parse_input(inputStr: string, name: string): *tree<symbol> {
@@ -386,7 +390,9 @@ obj parser (Object) {
return r.position == 0 && reduces_to_null(r)
}
fun reduces_to_null(r: ref rule): bool {
return gram.first_vector(r.rhs).contains(null_symbol())
if (!reduces_to_null_map.contains_key(r.rhs))
reduces_to_null_map[r.rhs] = gram.first_vector(r.rhs).contains(null_symbol())
return reduces_to_null_map[r.rhs]
}
fun get_nullable_parts(r: ref rule): *tree<symbol> {
if (reduces_to_null(r))

View File

@@ -1,4 +1,6 @@
import io
import string
import ast_transformation
import vector
import string
import mem
@@ -15,10 +17,16 @@ fun regex(in: string::string):regex {
}
obj regexState (Object) {
var character: char
// if only one character, both are the same
var characterBegin: char
var characterEnd: char
var next_states: set::set<*regexState>
fun construct(charIn:char): *regexState {
character = charIn
return construct(charIn, charIn)
}
fun construct(charFirst:char, charSecond:char): *regexState {
characterBegin = charFirst
characterEnd = charSecond
next_states.construct()
return this
}
@@ -26,17 +34,18 @@ obj regexState (Object) {
return construct((0) cast char)
}
fun copy_construct(old:*regexState): void {
character = old->character
characterBegin = old->characterBegin
characterEnd = old->characterEnd
next_states.copy_construct(&old->next_states)
}
fun destruct():void {
next_states.destruct()
}
fun match_char(input: char): set::set<*regexState> {
return next_states.filter(fun(it:*regexState):bool { return it->character == input; })
return next_states.filter(fun(it:*regexState):bool { return it->characterBegin <= input && input <= it->characterEnd; })
}
fun is_end():bool {
return next_states.any_true(fun(state: *regexState):bool { return state->character == 1; })
return next_states.any_true(fun(state: *regexState):bool { return state->characterBegin == 1; })
}
}
@@ -67,14 +76,6 @@ obj regex (Object, Serializable) {
begin = old->begin
referenceCounter = old->referenceCounter
*referenceCounter += 1
/*construct(old->regexString)*/
/*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/
/*var newOne = mem::new<regexState>()->construct(it->character)*/
/*register(newOne)*/
/*it->next_states.for_each(fun(next_state: *regexState) {*/
/*newOne->next_states.add(cloner(next_state))*/
/*})*/
/*})*/
}
fun destruct():void {
@@ -105,6 +106,7 @@ obj regex (Object, Serializable) {
}
fun compile(regex_string: string::string): util::pair<*regexState, set::set<*regexState>> {
/*io::println(regex_string)*/
var first = mem::new<regexState>()->construct()
var previous_begin = set::set<*regexState>()
var previous_end = set::set<*regexState>()
@@ -132,11 +134,14 @@ obj regex (Object, Serializable) {
} else if (regex_string[i] == '(' && !escapeing) {
// note that we don't have a ')' case, as we skip past it with our indicies
var perenEnd = i + 1
for (var depth = 1; depth > 0; perenEnd++;)
for (var depth = 1; depth > 0; perenEnd++;) {
if (perenEnd >= regex_string.length())
ast_transformation::error(string::string("can't find matching peren in: ") + regex_string)
if (regex_string[perenEnd] == '(')
depth++
else if (regex_string[perenEnd] == ')')
depth--
}
var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1))
// NOTE: perenEnd is one past the close peren
i = perenEnd-1
@@ -158,7 +163,13 @@ obj regex (Object, Serializable) {
escapeing = true
} else {
var next = mem::new<regexState>()->construct(regex_string[i])
var next: *regexState
if (regex_string[i] == '[' && !escapeing) {
next = mem::new<regexState>()->construct(regex_string[i+1], regex_string[i+3])
i += 4 // [a-b] is 5, i++ adds one
} else {
next = mem::new<regexState>()->construct(regex_string[i])
}
if (alternating) {
previous_end.for_each(fun(it: *regexState):void { it->next_states.add(next); })
current_begin.add(next)

View File

@@ -34,8 +34,9 @@ obj set<T> (Object, Serializable) {
data.copy_construct(&old->data)
}
fun operator=(rhs: ref set<T>) {
destruct()
copy_construct(&rhs)
/*destruct()*/
/*copy_construct(&rhs)*/
data = rhs.data
}
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
@@ -43,12 +44,12 @@ obj set<T> (Object, Serializable) {
fun unserialize(it: ref vector::vector<char>, pos: int): int {
return data.unserialize(it, pos)
}
fun operator==(rhs: set<T>): bool {
fun operator==(rhs: ref set<T>): bool {
if (size() != rhs.size())
return false
return !data.any_true( fun(item: T): bool return !rhs.contains(item); )
}
fun operator!=(rhs: set<T>): bool {
fun operator!=(rhs: ref set<T>): bool {
return ! (*this == rhs)
}
fun destruct() {
@@ -57,10 +58,10 @@ obj set<T> (Object, Serializable) {
fun size():int {
return data.size
}
fun contains(items: set<T>): bool {
fun contains(items: ref set<T>): bool {
return items.size() == 0 || !items.any_true( fun(item: T): bool return !contains(item); )
}
fun contains(item: T): bool {
fun contains(item: ref T): bool {
return data.find(item) != -1
}
fun operator+=(item: ref T) {
@@ -84,7 +85,7 @@ obj set<T> (Object, Serializable) {
fun add(items: ref set<T>) {
items.for_each( fun(item: ref T) add(item); )
}
fun remove(item: T) {
fun remove(item: ref T) {
var idx = data.find(item)
if (idx == -1) {
/*io::println("CANNOT FIND ITEM TO REMOVE")*/

View File

@@ -94,8 +94,9 @@ obj string (Object, Serializable) {
}
fun operator=(str: ref string): void {
destruct();
data.copy_construct(&str.data)
/*destruct();*/
/*data.copy_construct(&str.data)*/
data = str.data
}
fun destruct():void {
@@ -147,8 +148,9 @@ obj string (Object, Serializable) {
}
fun operator+(str: ref string): string {
var newStr.construct(str):string
var ret.construct(data + newStr.data):string
/*var newStr.construct(str):string*/
/*var ret.construct(data + newStr.data):string*/
var ret.construct(data + str.data):string
return ret
}
@@ -210,7 +212,7 @@ obj string (Object, Serializable) {
out.add(current)
return out
}
fun join(to_join: vector::vector<string>): string {
fun join(to_join: ref vector::vector<string>): string {
var to_ret = to_join.first()
for (var i = 1; i < to_join.size; i++;)
to_ret += *this + to_join[i]

View File

@@ -65,31 +65,36 @@ obj vector<T> (Object, Serializable) {
data = 0
}
fun operator=(other:vector<T>):void {
destruct()
copy_construct(&other)
fun operator=(other:ref vector<T>):void {
if (size < other.size) {
destruct()
copy_construct(&other)
} else {
clear()
for (var i = 0; i < other.size; i++;)
addEnd(other.get(i))
}
}
fun operator+(other:vector<T>):vector<T> {
// lets be at least a little bit smarter by copy_constructing our copy.
// We could get a lot better than this by initially creating enough space
// for both and copy_constructing all of them, but this is just a quick fix
var newVec.copy_construct(this):vector<T>
fun operator+(other: ref vector<T>):vector<T> {
var newVec.construct(size+other.size):vector<T>
for (var i = 0; i < size; i++;)
newVec.addEnd(get(i))
for (var i = 0; i < other.size; i++;)
newVec.addEnd(other.get(i))
return newVec
}
fun operator+(other: T):vector<T> {
fun operator+(other: ref T):vector<T> {
var newVec.copy_construct(this):vector<T>
newVec.addEnd(other)
return newVec
}
fun operator+=(other: T):void {
fun operator+=(other: ref T):void {
addEnd(other)
}
fun operator+=(other:vector<T>):void {
fun operator+=(other: ref vector<T>):void {
for (var i = 0; i < other.size; i++;)
addEnd(other.get(i))
}
@@ -180,7 +185,7 @@ obj vector<T> (Object, Serializable) {
return true
}
fun set(index: int, dataIn: T): void {
fun set(index: int, dataIn: ref T): void {
if (index < 0 || index >= size)
return;
data[index] = dataIn;
@@ -190,7 +195,7 @@ obj vector<T> (Object, Serializable) {
addEnd(dataIn[i]);
}
// same darn trick
fun add_unique<U>(dataIn: U): void {
fun add_unique<U>(dataIn: ref U): void {
if (!contains(dataIn))
addEnd(dataIn)
}