Speed up parsing something like 10 times, Kalypso runs so much faster now
This commit is contained in:
@@ -63,6 +63,7 @@ obj lexer (Object) {
|
||||
fun next(): symbol::symbol {
|
||||
if (position >= input.length())
|
||||
return symbol::eof_symbol()
|
||||
/*
|
||||
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
|
||||
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
|
||||
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
||||
@@ -71,6 +72,20 @@ obj lexer (Object) {
|
||||
return symbol::invalid_symbol()
|
||||
position += max.first
|
||||
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
|
||||
*/
|
||||
var max = -1
|
||||
var max_length = -1
|
||||
for (var i = 0; i < regs.size; i++;) {
|
||||
var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length())
|
||||
if (new_length > max_length) {
|
||||
max = i
|
||||
max_length = new_length
|
||||
}
|
||||
}
|
||||
if (max < 0)
|
||||
return symbol::invalid_symbol()
|
||||
position += max_length
|
||||
return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -180,19 +180,20 @@ obj regex (Object, Serializable) {
|
||||
}
|
||||
|
||||
fun long_match(to_match: *char): int { return long_match(string::string(to_match)); }
|
||||
fun long_match(to_match: string::string): int {
|
||||
fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length())
|
||||
fun long_match(to_match: *char, position: int, end: int): int {
|
||||
var next = set::set(begin)
|
||||
var longest = -1
|
||||
for (var i = 0; i < to_match.length(); i++;) {
|
||||
for (var i = 0; i < end-position; i++;) {
|
||||
if (next.size() == 0)
|
||||
return longest
|
||||
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
|
||||
longest = i
|
||||
//next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); })
|
||||
next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[i]); })
|
||||
next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[position+i]); })
|
||||
}
|
||||
if (next.any_true(fun(state: *regexState):bool { return state->is_end(); }))
|
||||
return to_match.length()
|
||||
return end-position
|
||||
return longest
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,6 +148,7 @@ obj string (Object, Serializable) {
|
||||
out[data.size] = 0
|
||||
return out;
|
||||
}
|
||||
fun getBackingMemory(): *char return data.getBackingMemory();
|
||||
|
||||
fun split(delim: *char): vector::vector<string> return split(string(delim))
|
||||
fun split(delim: string): vector::vector<string> {
|
||||
|
||||
@@ -121,11 +121,11 @@ obj vector<T> (Object, Serializable) {
|
||||
}
|
||||
|
||||
fun slice(start: int, end: int): vector<T> {
|
||||
var new.construct(): vector<T>
|
||||
if (start < 0)
|
||||
start += size + 1
|
||||
if (end < 0)
|
||||
end += size + 1
|
||||
var new.construct(end-start): vector<T>
|
||||
for (var i = start; i < end; i++;)
|
||||
new.add(data[i])
|
||||
return new
|
||||
@@ -229,6 +229,12 @@ obj vector<T> (Object, Serializable) {
|
||||
for (var i = 0; i < size; i++;)
|
||||
data[i] = func(data[i])
|
||||
}
|
||||
fun map<U>(func: fun(ref T):U):vector<U> {
|
||||
var newVec.construct(size): vector<U>
|
||||
for (var i = 0; i < size; i++;)
|
||||
newVec.addEnd(func(data[i]))
|
||||
return newVec
|
||||
}
|
||||
fun map<U>(func: fun(T):U):vector<U> {
|
||||
var newVec.construct(size): vector<U>
|
||||
for (var i = 0; i < size; i++;)
|
||||
@@ -257,6 +263,13 @@ obj vector<T> (Object, Serializable) {
|
||||
return true
|
||||
return false
|
||||
}
|
||||
fun max(func: fun(ref T, ref T):bool): T {
|
||||
var maxIdx = 0
|
||||
for (var i = 1; i < size; i++;)
|
||||
if (func(data[maxIdx], data[i]))
|
||||
maxIdx = i
|
||||
return data[maxIdx]
|
||||
}
|
||||
fun max(func: fun(T,T):bool): T {
|
||||
var maxIdx = 0
|
||||
for (var i = 1; i < size; i++;)
|
||||
|
||||
Reference in New Issue
Block a user