Speed up parsing something like 10 times, Kalypso runs so much faster now

This commit is contained in:
Nathan Braswell
2016-02-06 23:09:46 -05:00
parent 7a2cef08e8
commit 6aeb5c33f5
4 changed files with 35 additions and 5 deletions

View File

@@ -63,6 +63,7 @@ obj lexer (Object) {
fun next(): symbol::symbol { fun next(): symbol::symbol {
if (position >= input.length()) if (position >= input.length())
return symbol::eof_symbol() return symbol::eof_symbol()
/*
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> { var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); }) return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool .max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
@@ -71,6 +72,20 @@ obj lexer (Object) {
return symbol::invalid_symbol() return symbol::invalid_symbol()
position += max.first position += max.first
return symbol::symbol(max.second, true, input.slice(position-max.first, position)) return symbol::symbol(max.second, true, input.slice(position-max.first, position))
*/
var max = -1
var max_length = -1
for (var i = 0; i < regs.size; i++;) {
var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length())
if (new_length > max_length) {
max = i
max_length = new_length
}
}
if (max < 0)
return symbol::invalid_symbol()
position += max_length
return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position))
} }
} }

View File

@@ -180,19 +180,20 @@ obj regex (Object, Serializable) {
} }
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); } fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
fun long_match(to_match: string::string): int { fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
fun long_match(to_match: *char, position: int, end: int): int {
var next = set::set(begin) var next = set::set(begin)
var longest = -1 var longest = -1
for (var i = 0; i < to_match.length(); i++;) { for (var i = 0; i < end-position; i++;) {
if (next.size() == 0) if (next.size() == 0)
return longest return longest
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
longest = i longest = i
//next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); }) //next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); })
next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[i]); }) next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[position+i]); })
} }
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
return to_match.length() return end-position
return longest return longest
} }
} }

View File

@@ -148,6 +148,7 @@ obj string (Object, Serializable) {
out[data.size] = 0 out[data.size] = 0
return out; return out;
} }
fun getBackingMemory(): *char return data.getBackingMemory();
fun split(delim: *char): vector::vector<string> return split(string(delim)) fun split(delim: *char): vector::vector<string> return split(string(delim))
fun split(delim: string): vector::vector<string> { fun split(delim: string): vector::vector<string> {

View File

@@ -121,11 +121,11 @@ obj vector<T> (Object, Serializable) {
} }
fun slice(start: int, end: int): vector<T> { fun slice(start: int, end: int): vector<T> {
var new.construct(): vector<T>
if (start < 0) if (start < 0)
start += size + 1 start += size + 1
if (end < 0) if (end < 0)
end += size + 1 end += size + 1
var new.construct(end-start): vector<T>
for (var i = start; i < end; i++;) for (var i = start; i < end; i++;)
new.add(data[i]) new.add(data[i])
return new return new
@@ -229,6 +229,12 @@ obj vector<T> (Object, Serializable) {
for (var i = 0; i < size; i++;) for (var i = 0; i < size; i++;)
data[i] = func(data[i]) data[i] = func(data[i])
} }
fun map<U>(func: fun(ref T):U):vector<U> {
var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;)
newVec.addEnd(func(data[i]))
return newVec
}
fun map<U>(func: fun(T):U):vector<U> { fun map<U>(func: fun(T):U):vector<U> {
var newVec.construct(size): vector<U> var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;) for (var i = 0; i < size; i++;)
@@ -257,6 +263,13 @@ obj vector<T> (Object, Serializable) {
return true return true
return false return false
} }
fun max(func: fun(ref T, ref T):bool): T {
var maxIdx = 0
for (var i = 1; i < size; i++;)
if (func(data[maxIdx], data[i]))
maxIdx = i
return data[maxIdx]
}
fun max(func: fun(T,T):bool): T { fun max(func: fun(T,T):bool): T {
var maxIdx = 0 var maxIdx = 0
for (var i = 1; i < size; i++;) for (var i = 1; i < size; i++;)