diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index d966a0d..626ed15 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -63,6 +63,7 @@ obj lexer (Object) { fun next(): symbol::symbol { if (position >= input.length()) return symbol::eof_symbol() + /* var max = regs.map(fun(reg_pair: util::pair): util::pair { return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); }) .max(fun(first: util::pair, second: util::pair): bool @@ -71,6 +72,20 @@ obj lexer (Object) { return symbol::invalid_symbol() position += max.first return symbol::symbol(max.second, true, input.slice(position-max.first, position)) + */ + var max = -1 + var max_length = -1 + for (var i = 0; i < regs.size; i++;) { + var new_length = regs[i].second.long_match(input.getBackingMemory(), position, input.length()) + if (new_length > max_length) { + max = i + max_length = new_length + } + } + if (max < 0) + return symbol::invalid_symbol() + position += max_length + return symbol::symbol(regs[max].first, true, input.slice(position-max_length, position)) } } diff --git a/stdlib/regex.krak b/stdlib/regex.krak index abefee1..afc8f41 100644 --- a/stdlib/regex.krak +++ b/stdlib/regex.krak @@ -180,19 +180,20 @@ obj regex (Object, Serializable) { } fun long_match(to_match: *char): int { return long_match(string::string(to_match)); } - fun long_match(to_match: string::string): int { + fun long_match(to_match: string::string): int return long_match(to_match.getBackingMemory(), 0, to_match.length()) + fun long_match(to_match: *char, position: int, end: int): int { var next = set::set(begin) var longest = -1 - for (var i = 0; i < to_match.length(); i++;) { + for (var i = 0; i < end-position; i++;) { if (next.size() == 0) return longest if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) longest = i //next = next.flatten_map<*regexState>(fun(state: *regexState): vector::vector<*regexState> { return state->match_char(to_match[i]); }) - next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[i]); }) + next = next.flatten_map(fun(state: *regexState): set::set<*regexState> { return state->match_char(to_match[position+i]); }) } if (next.any_true(fun(state: *regexState):bool { return state->is_end(); })) - return to_match.length() + return end-position return longest } } diff --git a/stdlib/string.krak b/stdlib/string.krak index 29344ac..b9ae803 100644 --- a/stdlib/string.krak +++ b/stdlib/string.krak @@ -148,6 +148,7 @@ obj string (Object, Serializable) { out[data.size] = 0 return out; } + fun getBackingMemory(): *char return data.getBackingMemory(); fun split(delim: *char): vector::vector return split(string(delim)) fun split(delim: string): vector::vector { diff --git a/stdlib/vector.krak b/stdlib/vector.krak index e5ea470..f9ccc9c 100644 --- a/stdlib/vector.krak +++ b/stdlib/vector.krak @@ -121,11 +121,11 @@ obj vector (Object, Serializable) { } fun slice(start: int, end: int): vector { - var new.construct(): vector if (start < 0) start += size + 1 if (end < 0) end += size + 1 + var new.construct(end-start): vector for (var i = start; i < end; i++;) new.add(data[i]) return new @@ -229,6 +229,12 @@ obj vector (Object, Serializable) { for (var i = 0; i < size; i++;) data[i] = func(data[i]) } + fun map(func: fun(ref T):U):vector { + var newVec.construct(size): vector + for (var i = 0; i < size; i++;) + newVec.addEnd(func(data[i])) + return newVec + } fun map(func: fun(T):U):vector { var newVec.construct(size): vector for (var i = 0; i < size; i++;) @@ -257,6 +263,13 @@ obj vector (Object, Serializable) { return true return false } + fun max(func: fun(ref T, ref T):bool): T { + var maxIdx = 0 + for (var i = 1; i < size; i++;) + if (func(data[maxIdx], data[i])) + maxIdx = i + return data[maxIdx] + } fun max(func: fun(T,T):bool): T { var maxIdx = 0 for (var i = 1; i < size; i++;)