Speed up parsing something like 10 times, Kalypso runs so much faster now

This commit is contained in:
Nathan Braswell
2016-02-06 23:09:46 -05:00
parent 7a2cef08e8
commit 6aeb5c33f5
4 changed files with 35 additions and 5 deletions

View File

@@ -63,6 +63,7 @@ obj lexer (Object) {
fun next(): symbol::symbol {
if (position >= input.length())
return symbol::eof_symbol()
/*
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
@@ -71,6 +72,20 @@ obj lexer (Object) {
return symbol::invalid_symbol()
position += max.first
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
*/
var max = -1
var max_length = -1
for (var i = 0; i < regs.size; i++;) {
var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length())
if (new_length > max_length) {
max = i
max_length = new_length
}
}
if (max < 0)
return symbol::invalid_symbol()
position += max_length
return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position))
}
}

View File

@@ -180,19 +180,20 @@ obj regex (Object, Serializable) {
}
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
fun long_match(to_match: string::string): int {
fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
fun long_match(to_match: *char, position: int, end: int): int {
var next = set::set(begin)
var longest = -1
for (var i = 0; i < to_match.length(); i++;) {
for (var i = 0; i < end-position; i++;) {
if (next.size() == 0)
return longest
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
longest = i
//next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); })
next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[i]); })
next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[position+i]); })
}
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
return to_match.length()
return end-position
return longest
}
}

View File

@@ -148,6 +148,7 @@ obj string (Object, Serializable) {
out[data.size] = 0
return out;
}
fun getBackingMemory(): *char return data.getBackingMemory();
fun split(delim: *char): vector::vector<string> return split(string(delim))
fun split(delim: string): vector::vector<string> {

View File

@@ -121,11 +121,11 @@ obj vector<T> (Object, Serializable) {
}
fun slice(start: int, end: int): vector<T> {
var new.construct(): vector<T>
if (start < 0)
start += size + 1
if (end < 0)
end += size + 1
var new.construct(end-start): vector<T>
for (var i = start; i < end; i++;)
new.add(data[i])
return new
@@ -229,6 +229,12 @@ obj vector<T> (Object, Serializable) {
for (var i = 0; i < size; i++;)
data[i] = func(data[i])
}
fun map<U>(func: fun(ref T):U):vector<U> {
var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;)
newVec.addEnd(func(data[i]))
return newVec
}
fun map<U>(func: fun(T):U):vector<U> {
var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;)
@@ -257,6 +263,13 @@ obj vector<T> (Object, Serializable) {
return true
return false
}
fun max(func: fun(ref T, ref T):bool): T {
var maxIdx = 0
for (var i = 1; i < size; i++;)
if (func(data[maxIdx], data[i]))
maxIdx = i
return data[maxIdx]
}
fun max(func: fun(T,T):bool): T {
var maxIdx = 0
for (var i = 1; i < size; i++;)