Tons of stuff. Regex still a work in progress, along with related template member function scoping bugs

This commit is contained in:
Nathan Braswell
2015-06-09 20:02:02 -04:00
parent 47bc52f00c
commit d90cb4b6db
14 changed files with 78 additions and 28 deletions

View File

@@ -4,6 +4,7 @@ project(Kraken)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)

View File

@@ -61,6 +61,8 @@ class RNGLRParser: public Parser {
std::vector<NodeTree<Symbol>*> nullableParts; std::vector<NodeTree<Symbol>*> nullableParts;
std::map<NodeTree<Symbol>, bool> packedMap; std::map<NodeTree<Symbol>, bool> packedMap;
std::map<ParseRule*, bool> reduceToNullMap;
}; };
#endif #endif

View File

@@ -15,7 +15,7 @@ class RegExState {
void addNext(RegExState* nextState); void addNext(RegExState* nextState);
bool characterIs(char inCharacter); bool characterIs(char inCharacter);
std::vector<RegExState*>* advance(char advanceCharacter); std::vector<RegExState*> advance(char advanceCharacter);
std::vector<RegExState*> getNextStates(); std::vector<RegExState*> getNextStates();
bool isGoal(); bool isGoal();

View File

@@ -108,9 +108,9 @@ expression = expression WS "<<" WS term | expression WS right_shift WS shiftand
shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ; shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ;
term = term WS "/" WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ; term = term WS "/" WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ;
factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ;
unarad = number | scoped_identifier | scoped_identifier WS template_inst | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | access_operation | unarad WS "[" WS expression WS "]" | lambda ; unarad = number | scoped_identifier | scoped_identifier WS template_inst | access_operation | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | unarad WS "[" WS expression WS "]" | lambda ;
number = integer | floating_literal ; number = integer | floating_literal ;
access_operation = unarad "." identifier | unarad "->" identifier ; access_operation = unarad "." identifier | unarad "->" identifier | unarad "." identifier WS template_inst | unarad "->" identifier WS template_inst ;
assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ; assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ;
# if it's being assigned to, we allow type inferencing # if it's being assigned to, we allow type inferencing

View File

@@ -267,7 +267,7 @@ void ASTTransformation::thirdPass(NodeTree<ASTData>* ast, NodeTree<Symbol>* pars
//Note that this pass can instantiate class AND function templates //Note that this pass can instantiate class AND function templates
for (NodeTree<Symbol>* i : children) { for (NodeTree<Symbol>* i : children) {
if (i->getDataRef()->getName() == "type_def") { if (i->getDataRef()->getName() == "type_def") {
if (i->getChildren()[1]->getData().getName() == "template_dec") // It's a template if (i->getChildren().size() > 1 && i->getChildren()[1]->getData().getName() == "template_dec") // It's a template
continue; //We've already set up the class templates continue; //We've already set up the class templates
std::vector<NodeTree<Symbol>*> typedefChildren = i->getChildren(); std::vector<NodeTree<Symbol>*> typedefChildren = i->getChildren();
std::string name = concatSymbolTree(typedefChildren[0]); std::string name = concatSymbolTree(typedefChildren[0]);
@@ -751,6 +751,7 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree
std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl; std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl;
std::cerr << "Ambigious program when parsed by this grammer! This is a bug, please report it." << std::endl; std::cerr << "Ambigious program when parsed by this grammer! This is a bug, please report it." << std::endl;
std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl; std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl;
std::cerr << concatSymbolTree(from) << std::endl;
throw "Ambigious parse!"; throw "Ambigious parse!";
} else { } else {
// Should get rid of this eventually. Right now it handles cases like sign, alpha, a comma, etc // Should get rid of this eventually. Right now it handles cases like sign, alpha, a comma, etc
@@ -961,6 +962,8 @@ NodeTree<ASTData>* ASTTransformation::templateClassLookup(NodeTree<ASTData>* sco
auto possibleMatches = scopeLookup(scope, lookup); auto possibleMatches = scopeLookup(scope, lookup);
std::cout << "Template Class instantiation has " << possibleMatches.size() << " possible matches." << std::endl; std::cout << "Template Class instantiation has " << possibleMatches.size() << " possible matches." << std::endl;
for (auto i : possibleMatches) { for (auto i : possibleMatches) {
if (i->getDataRef()->type != type_def)
continue;
NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition; NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition;
auto nameTraitsPairs = makeTemplateNameTraitPairs(templateSyntaxTree->getChildren()[1]); auto nameTraitsPairs = makeTemplateNameTraitPairs(templateSyntaxTree->getChildren()[1]);
@@ -1108,6 +1111,8 @@ NodeTree<ASTData>* ASTTransformation::templateFunctionLookup(NodeTree<ASTData>*
std::cout << "Template Function instantiation has " << possibleMatches.size() << " possible matches." << std::endl; std::cout << "Template Function instantiation has " << possibleMatches.size() << " possible matches." << std::endl;
int index = 1; int index = 1;
for (auto i : possibleMatches) { for (auto i : possibleMatches) {
if (i->getDataRef()->type != function)
continue;
std::cout << "Possibility " << index++ << std::endl; std::cout << "Possibility " << index++ << std::endl;
NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition; NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition;
if (!templateSyntaxTree) { if (!templateSyntaxTree) {
@@ -1239,6 +1244,10 @@ std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>
std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules, std::set<NodeTree<ASTData>*> visited) { std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules, std::set<NodeTree<ASTData>*> visited) {
std::cout << "Scp]|[e looking up " << lookup << std::endl; std::cout << "Scp]|[e looking up " << lookup << std::endl;
std::cout << "current: " << scope->getDataRef()->toString() << std::endl;
for (auto i : scope->getDataRef()->scope)
std::cout << "\t" << i.first << std::endl;
//std::cout << i.first << " : " << i.second->toString() << std::endl;
// Don't visit this node again when looking for the smae lookup. Note that we don't prevent coming back for the scope operator, as that should be able to come back. // Don't visit this node again when looking for the smae lookup. Note that we don't prevent coming back for the scope operator, as that should be able to come back.
if (visited.find(scope) != visited.end()) if (visited.find(scope) != visited.end())
return std::vector<NodeTree<ASTData>*>(); return std::vector<NodeTree<ASTData>*>();
@@ -1559,7 +1568,8 @@ NodeTree<ASTData>* ASTTransformation::findOrInstantiateFunctionTemplate(std::vec
std::cout << ", " << i << " : " << templateChildren[i]->getDataRef()->getName(); std::cout << ", " << i << " : " << templateChildren[i]->getDataRef()->getName();
std::cout << std::endl; std::cout << std::endl;
instantiatedFunction = new NodeTree<ASTData>("function", ASTData(function, Symbol(scopelessFullyInstantiatedName, true), typeFromTypeNode(templateChildren[templateChildren.size()-2], scope, newTemplateTypeReplacement))); // return type should be looked up in template's scope
instantiatedFunction = new NodeTree<ASTData>("function", ASTData(function, Symbol(scopelessFullyInstantiatedName, true), typeFromTypeNode(templateChildren[templateChildren.size()-2], templateDefinition, newTemplateTypeReplacement)));
addToScope("~enclosing_scope", templateDefinition->getDataRef()->scope["~enclosing_scope"][0], instantiatedFunction); addToScope("~enclosing_scope", templateDefinition->getDataRef()->scope["~enclosing_scope"][0], instantiatedFunction);
addToScope(scopelessFullyInstantiatedName, instantiatedFunction, templateDefinition->getDataRef()->scope["~enclosing_scope"][0]); addToScope(scopelessFullyInstantiatedName, instantiatedFunction, templateDefinition->getDataRef()->scope["~enclosing_scope"][0]);
templateDefinition->getDataRef()->scope["~enclosing_scope"][0]->addChild(instantiatedFunction); // Add this object the the highest scope's templateDefinition->getDataRef()->scope["~enclosing_scope"][0]->addChild(instantiatedFunction); // Add this object the the highest scope's

View File

@@ -426,8 +426,13 @@ bool RNGLRParser::fullyReducesToNull(ParseRule* rule) {
} }
bool RNGLRParser::reducesToNull(ParseRule* rule) { bool RNGLRParser::reducesToNull(ParseRule* rule) {
auto itr = reduceToNullMap.find(rule);
if (itr != reduceToNullMap.end())
return itr->second;
std::vector<Symbol> avoidList; std::vector<Symbol> avoidList;
return reducesToNull(rule, avoidList); auto val = reducesToNull(rule, avoidList);
reduceToNullMap[rule] = val;
return val;
} }
bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector<Symbol> avoidList) { bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector<Symbol> avoidList) {

View File

@@ -148,15 +148,10 @@ int RegEx::longMatch(std::string stringToMatch) {
//Go through every current state. Check to see if it is goal, if so update last goal. //Go through every current state. Check to see if it is goal, if so update last goal.
//Also, add each state's advance to nextStates //Also, add each state's advance to nextStates
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) { for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal()) { if (currentStates[j]->isGoal())
lastMatch = i; lastMatch = i;
//std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl; std::vector<RegExState*> addStates = currentStates[j]->advance(stringToMatch.at(i));
} else { nextStates.insert(nextStates.end(), addStates.begin(), addStates.end());
//std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
}
std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());
delete addStates;
} }
//Now, clear our current states and add eaczh one of our addStates if it is not already in current states //Now, clear our current states and add eaczh one of our addStates if it is not already in current states

View File

@@ -20,11 +20,11 @@ bool RegExState::characterIs(char inCharacter) {
return character == inCharacter; return character == inCharacter;
} }
std::vector<RegExState*>* RegExState::advance(char advanceCharacter) { std::vector<RegExState*> RegExState::advance(char advanceCharacter) {
std::vector<RegExState*>* advanceStates = new std::vector<RegExState*>(); std::vector<RegExState*> advanceStates;
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) { for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
if (nextStates[i] != NULL && nextStates[i]->characterIs(advanceCharacter)) if (nextStates[i] != NULL && nextStates[i]->characterIs(advanceCharacter))
advanceStates->push_back(nextStates[i]); advanceStates.push_back(nextStates[i]);
} }
return advanceStates; return advanceStates;
} }

11
stdlib/conversions.krak Normal file
View File

@@ -0,0 +1,11 @@
fun to_char<T>(in: T) : char {
var out:char
__if_comp__ __C__ {
simple_passthrough(in = in: out = out:) """
char out = (char) in;
"""
}
return out;
}

View File

@@ -1,6 +1,8 @@
import io import io
import vector import vector
import string import string
import mem
import conversions
fun regex(in: char*):regex { fun regex(in: char*):regex {
return regex(string::string(in)) return regex(string::string(in))
@@ -19,11 +21,11 @@ obj regexState(Object) {
return this return this
} }
fun construct(): regexState* { fun construct(): regexState* {
return construct(0) return construct(conversions::to_char(0))
} }
fun copy_construct(old:regexState*): void { fun copy_construct(old:regexState*): void {
character = regexState->character character = old->character
next_states.copy_construct(&regexState->next_states) next_states.copy_construct(&old->next_states)
} }
fun destruct():void { fun destruct():void {
next_states.destruct() next_states.destruct()
@@ -37,17 +39,16 @@ obj regex(Object) {
var regexString: string::string var regexString: string::string
var begin: regexState var begin: regexState
fun construct(regexStringIn: string::string): regex* { fun construct(regexStringIn: string::string): regex* {
regexState.construct() begin.construct()
regexString.copy_construct(&regexStringIn) regexString.copy_construct(&regexStringIn)
var traverse = &begin var traverse = &begin
for (var i = 0; i < regexString.length(); i++;) { for (var i = 0; i < regexString.length(); i++;) {
var next = new<regexState>()->construct(regexString[i]) var next = mem::new<regexState>()->construct(regexString[i])
traverse->next_states->add(next) traverse->next_states->add(next)
traverse = next traverse = next
} }
traverse->next_states->add(new<regexState>()->construct(1)) traverse->next_states->add(mem::new<regexState>()->construct(conversions::to_char(1)))
return this return this
} }
fun copy_construct(old:regex*):void { fun copy_construct(old:regex*):void {
@@ -61,13 +62,16 @@ obj regex(Object) {
fun long_match(to_match: char*): int { return long_match(string::string(to_match)); } fun long_match(to_match: char*): int { return long_match(string::string(to_match)); }
fun long_match(to_match: string::string): int { fun long_match(to_match: string::string): int {
var next = vector::vector(&begin) var next = vector::vector(&begin)
//var next.construct() :vector::vector<regexState*>
var longest = 0 var longest = 0
for (var i = 0; i < to_match.length(); i++;) { for (var i = 0; i < to_match.length(); i++;) {
if (next.size == 0) if (next.size == 0)
return longest return longest
if (next.any_true(fun(state: regexState*):bool { return state->character == 1; })) if (next.any_true(fun(state: regexState*):bool { return state->character == 1; }))
longest = i longest = i
next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); }) //next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); })
next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
//next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
} }
if (next.any_true(fun(state: regexState*):bool { return state->character == 1; })) if (next.any_true(fun(state: regexState*):bool { return state->character == 1; }))
return to_match.length() return to_match.length()

View File

@@ -1,5 +1,5 @@
import vector; import vector
import mem; import mem
fun string(in:char*):string { fun string(in:char*):string {
var out:string = in var out:string = in

View File

@@ -2,6 +2,12 @@ import mem:*;
import util:*; import util:*;
import io:*; import io:*;
fun vector<T>(in:T):vector<T> {
var out.construct():vector<T>
out.add(in)
return out
}
obj vector<T> (Object) { obj vector<T> (Object) {
var data: T*; var data: T*;
var size: int; var size: int;
@@ -125,6 +131,13 @@ obj vector<T> (Object) {
} }
return newVec return newVec
} }
fun filter(func: fun(T):bool):vector<T> {
var newVec.construct(): vector<T>
for (var i = 0; i < size; i++;)
if (func(data[i]))
newVec.addEnd(data[i])
return newVec
}
fun any_true(func: fun(T):bool):bool { fun any_true(func: fun(T):bool):bool {
for (var i = 0; i < size; i++;) for (var i = 0; i < size; i++;)
if (func(data[i])) if (func(data[i]))

View File

@@ -0,0 +1 @@
A

View File

@@ -0,0 +1,8 @@
import io:*
import conversions:*
fun main():int {
println(to_char(65))
return 0
}