Optimizations, regex character ranges

2016-05-05 04:51:10 -04:00
parent 02c77899b8
commit 9d7a65294f
8 changed files with 100 additions and 76 deletions
--- a/stdlib/ast_transformation.krak
+++ b/stdlib/ast_transformation.krak
@@ -638,8 +638,7 @@ obj ast_transformation (Object) {
            if (factor_part->children.size == 1) {
                /*println("Factor has only one child!")*/
                var inner_unarad = get_node("unarad", factor_part)
-                if (get_node("\"[\"", inner_unarad)) {
-                    /*println("Inner Unarad has [!")*/
+                if (get_node("\"]\"", inner_unarad)) {
                    var assign_to = transform(get_node("unarad", inner_unarad), scope, template_replacements)
                    var assign_idx = transform(get_node("expression", inner_unarad), scope, template_replacements)
                    var possible_bracket_assign = find_and_make_any_operator_overload_call(string("[]="), vector(assign_to, assign_idx, to_assign), scope, template_replacements)
--- a/stdlib/grammer.krak
+++ b/stdlib/grammer.krak
@@ -179,22 +179,22 @@ obj grammer (Object, Serializable) {
    }
    fun first_vector(rhs: ref vector::vector<symbol::symbol>): set::set<symbol::symbol> {
        var toRet = set::set<symbol::symbol>()
-            if (rhs.size) {
-                for (var i = 0; i < rhs.size; i++;) {
-                    var lookahead = first_set_map[rhs[i]]
-                        if (lookahead.contains(symbol::null_symbol())) {
-                            // remove the null if this is not the last in the rule
-                            if (i != rhs.size-1)
-                                lookahead.remove(symbol::null_symbol())
-                                    toRet.add(lookahead)
-                        } else {
-                            toRet.add(lookahead)
-                                break
-                        }
+        if (rhs.size) {
+            for (var i = 0; i < rhs.size; i++;) {
+                var lookahead = first_set_map[rhs[i]]
+                if (lookahead.contains(symbol::null_symbol())) {
+                    // remove the null if this is not the last in the rule
+                    if (i != rhs.size-1)
+                        lookahead.remove(symbol::null_symbol())
+                    toRet.add(lookahead)
+                } else {
+                    toRet.add(lookahead)
+                    break
                }
-            } else {
-                toRet.add(symbol::null_symbol())
            }
+        } else {
+            toRet.add(symbol::null_symbol())
+        }
        return toRet
    }

--- a/stdlib/parser.krak
+++ b/stdlib/parser.krak
@@ -19,6 +19,7 @@ obj parser (Object) {
    var to_shift: stack< pair<*tree<int>, int> >
    var SPPFStepNodes: vector< pair<*tree<symbol>, int> >
    var packed_map: map<*tree<symbol>, bool>
+    var reduces_to_null_map: map<vector<symbol>, bool>

    fun construct(grammerIn: grammer): *parser {
        input.construct()
@@ -28,6 +29,7 @@ obj parser (Object) {
        to_shift.construct()
        SPPFStepNodes.construct()
        packed_map.construct()
+        reduces_to_null_map.construct()
        return this
    }
    fun copy_construct(old: *parser) {
@@ -38,6 +40,7 @@ obj parser (Object) {
        to_shift.copy_construct(&old->to_shift)
        SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
        packed_map.copy_construct(&old->packed_map)
+        reduces_to_null_map.copy_construct(&old->reduces_to_null_map)
    }
    fun operator=(old: ref parser) {
        destruct()
@@ -51,6 +54,7 @@ obj parser (Object) {
        to_shift.destruct()
        SPPFStepNodes.destruct()
        packed_map.destruct()
+        reduces_to_null_map.destruct()
    }

    fun parse_input(inputStr: string, name: string): *tree<symbol> {
@@ -386,7 +390,9 @@ obj parser (Object) {
        return r.position == 0 && reduces_to_null(r)
    }
    fun reduces_to_null(r: ref rule): bool {
-        return gram.first_vector(r.rhs).contains(null_symbol())
+        if (!reduces_to_null_map.contains_key(r.rhs))
+            reduces_to_null_map[r.rhs] = gram.first_vector(r.rhs).contains(null_symbol())
+        return reduces_to_null_map[r.rhs]
    }
    fun get_nullable_parts(r: ref rule): *tree<symbol> {
        if (reduces_to_null(r))
--- a/stdlib/regex.krak
+++ b/stdlib/regex.krak
@@ -1,4 +1,6 @@
 import io
+import string
+import ast_transformation
 import vector
 import string
 import mem
@@ -15,10 +17,16 @@ fun regex(in: string::string):regex {
 }

 obj regexState (Object) {
-    var character: char
+    // if only one character, both are the same
+    var characterBegin: char
+    var characterEnd: char
    var next_states: set::set<*regexState>
    fun construct(charIn:char): *regexState {
-        character = charIn
+        return construct(charIn, charIn)
+    }
+    fun construct(charFirst:char, charSecond:char): *regexState {
+        characterBegin = charFirst
+        characterEnd = charSecond
        next_states.construct()
        return this
    }
@@ -26,17 +34,18 @@ obj regexState (Object) {
        return construct((0) cast char)
    }
    fun copy_construct(old:*regexState): void {
-        character = old->character
+        characterBegin = old->characterBegin
+        characterEnd = old->characterEnd
        next_states.copy_construct(&old->next_states)
    }
    fun destruct():void {
        next_states.destruct()
    }
    fun match_char(input: char): set::set<*regexState> {
-        return next_states.filter(fun(it:*regexState):bool { return it->character == input; })
+        return next_states.filter(fun(it:*regexState):bool { return it->characterBegin <= input && input <= it->characterEnd; })
    }
    fun is_end():bool {
-        return next_states.any_true(fun(state: *regexState):bool { return state->character == 1; })
+        return next_states.any_true(fun(state: *regexState):bool { return state->characterBegin == 1; })
    }
 }

@@ -67,14 +76,6 @@ obj regex (Object, Serializable) {
        begin = old->begin
        referenceCounter = old->referenceCounter
        *referenceCounter += 1
-        /*construct(old->regexString)*/
-        /*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/
-            /*var newOne = mem::new<regexState>()->construct(it->character)*/
-            /*register(newOne)*/
-            /*it->next_states.for_each(fun(next_state: *regexState) {*/
-                /*newOne->next_states.add(cloner(next_state))*/
-            /*})*/
-        /*})*/
    }

    fun destruct():void {
@@ -105,6 +106,7 @@ obj regex (Object, Serializable) {
    }

    fun compile(regex_string: string::string): util::pair<*regexState, set::set<*regexState>> {
+        /*io::println(regex_string)*/
        var first = mem::new<regexState>()->construct()
        var previous_begin = set::set<*regexState>()
        var previous_end = set::set<*regexState>()
@@ -132,11 +134,14 @@ obj regex (Object, Serializable) {
            } else if (regex_string[i] == '(' && !escapeing) {
                // note that we don't have a ')' case, as we skip past it with our indicies
                var perenEnd = i + 1
-                for (var depth = 1; depth > 0; perenEnd++;)
+                for (var depth = 1; depth > 0; perenEnd++;) {
+                    if (perenEnd >= regex_string.length())
+                        ast_transformation::error(string::string("can't find matching peren in: ") + regex_string)
                    if (regex_string[perenEnd] == '(')
                        depth++
                    else if (regex_string[perenEnd] == ')')
                        depth--
+                }
                var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1))
                // NOTE: perenEnd is one past the close peren
                i = perenEnd-1
@@ -158,7 +163,13 @@ obj regex (Object, Serializable) {
                escapeing = true

            } else {
-                var next = mem::new<regexState>()->construct(regex_string[i])
+                var next: *regexState
+                if (regex_string[i] == '[' && !escapeing) {
+                    next = mem::new<regexState>()->construct(regex_string[i+1], regex_string[i+3])
+                    i += 4 // [a-b] is 5, i++ adds one
+                } else {
+                    next = mem::new<regexState>()->construct(regex_string[i])
+                }
                if (alternating) {
                    previous_end.for_each(fun(it: *regexState):void { it->next_states.add(next); })
                    current_begin.add(next)
--- a/stdlib/set.krak
+++ b/stdlib/set.krak
@@ -34,8 +34,9 @@ obj set<T> (Object, Serializable) {
        data.copy_construct(&old->data)
    }
    fun operator=(rhs: ref set<T>) {
-        destruct()
-        copy_construct(&rhs)
+        /*destruct()*/
+        /*copy_construct(&rhs)*/
+        data = rhs.data
    }
    fun serialize(): vector::vector<char> {
        return serialize::serialize(data)
@@ -43,12 +44,12 @@ obj set<T> (Object, Serializable) {
    fun unserialize(it: ref vector::vector<char>, pos: int): int {
        return data.unserialize(it, pos)
    }
-    fun operator==(rhs: set<T>): bool {
+    fun operator==(rhs: ref set<T>): bool {
        if (size() != rhs.size())
            return false
        return !data.any_true( fun(item: T): bool return !rhs.contains(item); )
    }
-    fun operator!=(rhs: set<T>): bool {
+    fun operator!=(rhs: ref set<T>): bool {
        return ! (*this == rhs)
    }
    fun destruct() {
@@ -57,10 +58,10 @@ obj set<T> (Object, Serializable) {
    fun size():int {
        return data.size
    }
-    fun contains(items: set<T>): bool {
+    fun contains(items: ref set<T>): bool {
        return items.size() == 0 || !items.any_true( fun(item: T): bool return !contains(item); )
    }
-    fun contains(item: T): bool {
+    fun contains(item: ref T): bool {
        return data.find(item) != -1
    }
    fun operator+=(item: ref T) {
@@ -84,7 +85,7 @@ obj set<T> (Object, Serializable) {
    fun add(items: ref set<T>) {
        items.for_each( fun(item: ref T) add(item); )
    }
-    fun remove(item: T) {
+    fun remove(item: ref T) {
        var idx = data.find(item)
        if (idx == -1) {
            /*io::println("CANNOT FIND ITEM TO REMOVE")*/
--- a/stdlib/string.krak
+++ b/stdlib/string.krak
@@ -94,8 +94,9 @@ obj string (Object, Serializable) {
    }

    fun operator=(str: ref string): void {
-        destruct();
-        data.copy_construct(&str.data)
+        /*destruct();*/
+        /*data.copy_construct(&str.data)*/
+        data = str.data
    }

    fun destruct():void {
@@ -147,8 +148,9 @@ obj string (Object, Serializable) {
    }

    fun operator+(str: ref string): string {
-        var newStr.construct(str):string
-        var ret.construct(data + newStr.data):string
+        /*var newStr.construct(str):string*/
+        /*var ret.construct(data + newStr.data):string*/
+        var ret.construct(data + str.data):string
        return ret
    }

@@ -210,7 +212,7 @@ obj string (Object, Serializable) {
        out.add(current)
        return out
    }
-    fun join(to_join: vector::vector<string>): string {
+    fun join(to_join: ref vector::vector<string>): string {
        var to_ret = to_join.first()
        for (var i = 1; i < to_join.size; i++;)
            to_ret += *this + to_join[i]
--- a/stdlib/vector.krak
+++ b/stdlib/vector.krak
@@ -65,31 +65,36 @@ obj vector<T> (Object, Serializable) {
        data = 0
    }

-    fun operator=(other:vector<T>):void {
-        destruct()
-        copy_construct(&other)
+    fun operator=(other:ref vector<T>):void {
+        if (size < other.size) {
+            destruct()
+            copy_construct(&other)
+        } else {
+            clear()
+            for (var i = 0; i < other.size; i++;)
+                addEnd(other.get(i))
+        }
    }

-    fun operator+(other:vector<T>):vector<T> {
-        // lets be at least a little bit smarter by copy_constructing our copy.
-        // We could get a lot better than this by initially creating enough space
-        // for both and copy_constructing all of them, but this is just a quick fix
-        var newVec.copy_construct(this):vector<T>
+    fun operator+(other: ref vector<T>):vector<T> {
+        var newVec.construct(size+other.size):vector<T>
+        for (var i = 0; i < size; i++;)
+            newVec.addEnd(get(i))
        for (var i = 0; i < other.size; i++;)
            newVec.addEnd(other.get(i))
        return newVec
    }
-    fun operator+(other: T):vector<T> {
+    fun operator+(other: ref T):vector<T> {
        var newVec.copy_construct(this):vector<T>
        newVec.addEnd(other)
        return newVec
    }

-    fun operator+=(other: T):void {
+    fun operator+=(other: ref T):void {
        addEnd(other)
    }

-    fun operator+=(other:vector<T>):void {
+    fun operator+=(other: ref vector<T>):void {
        for (var i = 0; i < other.size; i++;)
            addEnd(other.get(i))
    }
@@ -180,7 +185,7 @@ obj vector<T> (Object, Serializable) {
        return true
    }

-    fun set(index: int, dataIn: T): void {
+    fun set(index: int, dataIn: ref T): void {
        if (index < 0 || index  >= size)
            return;
        data[index] = dataIn;
@@ -190,7 +195,7 @@ obj vector<T> (Object, Serializable) {
            addEnd(dataIn[i]);
    }
    // same darn trick
-    fun add_unique<U>(dataIn: U): void {
+    fun add_unique<U>(dataIn: ref U): void {
        if (!contains(dataIn))
            addEnd(dataIn)
    }