diff --git a/include/ASTData.h b/include/ASTData.h index 7daf15d..b025ea1 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -12,7 +12,7 @@ enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, function, code_block, typed_parameter, expression, boolean_expression, statement, - if_statement, return_statement, assignment_statement, declaration_statement, + if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement, function_call, value}; enum ValueType {none, boolean, integer, floating, double_percision, char_string }; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 27c818d..386b738 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -27,30 +27,38 @@ typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_pa typed_parameter = type WS parameter ; opt_parameter_list = parameter_list | ; -parameter_list = parameter_list WS parameter | parameter ; +parameter_list = parameter_list WS "," WS parameter | parameter ; parameter = boolean_expression ; -code_block = "{" WS statement_list WS "}" ; -statement_list = statement_list WS statement | statement ; -statement = if_statement | return_statement | boolean_expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; -function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; -scope = scope identifier "::" | ; +if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement ; -if_statement = "if" WS boolean_expression WS statement ; +while_loop = "while" WS boolean_expression WS statement ; + +for_update = "\(" WS statement_list WS "\)" ; +for_loop = "for" WS for_update WS statement ; return_statement = "return" WS boolean_expression WS ";" ; +code_block = "{" WS statement_list WS "}" ; + +statement_list = statement_list WS statement | statement ; +statement = if_statement | while_loop | for_loop | return_statement | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; +function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; +scope = scope identifier "::" | ; + boolean_expression = boolean_expression WS "\|\|" WS and_boolean_expression | and_boolean_expression ; and_boolean_expression = and_boolean_expression "&&" bool_exp | bool_exp ; bool_exp = "!" WS bool_exp | expression WS comparator WS expression | bool | expression ; -comparator = "==" | "<=" | ">=" | "!=" ; +comparator = "==" | "<=" | ">=" | "!=" | "<" | ">" ; -expression = expression WS "-" WS term | expression WS "\+" WS term | term ; -term = term WS forward_slash WS factor | term WS "\*" WS factor | factor ; -factor = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; +expression = expression WS "<<" WS term | expression WS ">>" WS shiftand | shiftand ; +shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ; +term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ; +factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; +unarad = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; number = integer | float | double ; -assignment_statement = identifier WS "=" WS boolean_expression ; +assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; declaration_statement = type WS identifier WS "=" WS boolean_expression ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; @@ -60,6 +68,6 @@ integer = sign numeric | sign hexadecimal | "null" ; float = sign numeric "." numeric "f" ; double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; -alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|_)+" ; +alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|\?|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index a072818..0cce3f8 100644 --- a/main.cpp +++ b/main.cpp @@ -88,7 +88,7 @@ int main(int argc, char* argv[]) { //std::cout << "Doing stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; // std::cout << parser.stateSetToString() << std::endl; - // std::cout << "finished stateSetToString from Main" << std::endl; + // std::cout << "finished stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl; // std::cout << parser.tableToString() << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl; diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 7a4bb39..81c0bda 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -66,52 +66,40 @@ std::string ASTData::ASTTypeToString(ASTType type) { switch (type) { case translation_unit: return "translation_unit"; - break; case interpreter_directive: return "interpreter_directive"; - break; case identifier: return "identifier"; - break; case import: return "import"; - break; case function: return "function"; - break; case code_block: return "code_block"; - break; case typed_parameter: return "typed_parameter"; - break; case expression: return "expression"; - break; case boolean_expression: return "boolean_expression"; - break; case statement: return "statement"; - break; case if_statement: return "if_statement"; - break; + case while_loop: + return "while_loop"; + case for_loop: + return "for_loop"; case return_statement: return "return_statement"; - break; case assignment_statement: return "assignment_statement"; - break; case declaration_statement: return "declaration_statement"; - break; case function_call: return "function_call"; - break; case value: return "value"; - break; default: return "unknown_ASTType"; } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 36cca8a..2802b20 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -61,23 +61,15 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else { return transform(children[0]); //Just a promoted bool_exp, so do child } - } else if (name == "expression") { - //If this is an actual part of an expression, not just a premoted term + //Here's the order of ops stuff + } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { + //If this is an actual part of an expression, not just a premoted child if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted term, so do child - } - } else if (name == "term") { - //If this is an actual part of an expression, not just a premoted factor - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted factor, so do child + return transform(children[0]); //Just a promoted child, so do it instead } } else if (name == "factor") { return transform(children[0]); //Just a premoted number or function call or something, so use it instead @@ -85,10 +77,28 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(statement)); } else if (name == "if_statement") { newNode = new NodeTree(name, ASTData(if_statement)); + } else if (name == "while_loop") { + newNode = new NodeTree(name, ASTData(while_loop)); + } else if (name == "for_loop") { + newNode = new NodeTree(name, ASTData(for_loop)); } else if (name == "return_statement") { newNode = new NodeTree(name, ASTData(return_statement)); } else if (name == "assignment_statement") { newNode = new NodeTree(name, ASTData(assignment_statement)); + std::string assignFuncName = concatSymbolTree(children[1]); + if (assignFuncName == "=") { + newNode->addChild(transform(children[0])); + newNode->addChild(transform(children[2])); + } else { + //For assignments like += or *=, expand the syntatic sugar. + NodeTree* lhs = transform(children[0]); + NodeTree* childCall = new NodeTree(assignFuncName, ASTData(function_call, Symbol(assignFuncName, true))); + childCall->addChild(lhs); + childCall->addChild(transform(children[2])); + newNode->addChild(lhs); + newNode->addChild(childCall); + } + return newNode; } else if (name == "declaration_statement") { newNode = new NodeTree(name, ASTData(declaration_statement)); NodeTree* newIdentifier = transform(children[1]); //Transform the identifier diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index abef745..45e25e3 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -27,10 +27,8 @@ std::string CGenerator::generate(NodeTree* from) { break; case import: return "#include <" + data.symbol.getName() + ">\n"; - break; case identifier: return data.symbol.getName(); - break; case function: output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; for (int i = 0; i < children.size()-1; i++) { @@ -40,7 +38,6 @@ std::string CGenerator::generate(NodeTree* from) { } output+= ")\n" + generate(children[children.size()-1]); return output; - break; case code_block: output += "{\n"; tabLevel++; @@ -49,22 +46,23 @@ std::string CGenerator::generate(NodeTree* from) { tabLevel--; output += tabs() + "}"; return output; - break; case expression: output += " " + data.symbol.getName() + ", "; - break; case boolean_expression: output += " " + data.symbol.getName() + " "; - break; case statement: return tabs() + generate(children[0]) + ";\n"; - break; case if_statement: output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); if (children.size() > 2) output += " else " + generate(children[2]); return output; - break; + case while_loop: + output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + return output; + case for_loop: + output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + return output; case return_statement: return "return " + generate(children[0]); case assignment_statement: @@ -76,7 +74,7 @@ std::string CGenerator::generate(NodeTree* from) { //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); - if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=") { + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "%") { return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; } output += data.symbol.getName() + "("; diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index ea96c50..e27208a 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -29,7 +29,12 @@ const bool ParseRule::operator!=(const ParseRule &other) { } ParseRule* ParseRule::clone() { - return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) ); + std::vector* newLookahead = NULL; + if (lookahead) { + newLookahead = new std::vector(); + *newLookahead = *lookahead; + } + return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) ); } void ParseRule::setLeftHandle(Symbol leftHandle) { diff --git a/src/Parser.cpp b/src/Parser.cpp index 83cbfaa..f8236c5 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -88,7 +88,7 @@ void Parser::createStateSet() { std::queue* toDo = new std::queue(); toDo->push(zeroState); //std::cout << "Begining for main set for loop" << std::endl; - while (toDo->front()) { + while (toDo->size()) { //closure closure(toDo->front()); //Add the new states @@ -181,7 +181,7 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { } } followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end()); - //delete symbolFirstSet; + delete symbolFirstSet; rule->advancePointer(); } if (rule->isAtEnd()) { @@ -209,10 +209,13 @@ void Parser::closure(State* state) { std::vector* stateTotal = state->getTotal(); for (std::vector::size_type i = 0; i < stateTotal->size(); i++) { ParseRule* currentStateRule = (*stateTotal)[i]; + //If it's at it's end, move on. We can't advance it. + if(currentStateRule->isAtEnd()) + continue; for (std::vector::size_type j = 0; j < loadedGrammer.size(); j++) { //If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side ParseRule* currentGramRule = loadedGrammer[j]->clone(); - if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) { + if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) { //std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl; //Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. //std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl; @@ -225,6 +228,7 @@ void Parser::closure(State* state) { //std::cout << (*stateTotal)[k]->toString() << std::endl; (*stateTotal)[k]->addLookahead(currentGramRule->getLookahead()); isAlreadyInState = true; + delete currentGramRule; break; } } @@ -311,7 +315,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu std::string Parser::stateSetToString() { std::string concat = ""; for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) { - concat += stateSets[i]->toString(); + concat += intToString(i) + " is " + stateSets[i]->toString(); } return concat; } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index b413321..ec64df5 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -81,7 +81,10 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { std::cout << "Nearby is:" << std::endl; int range = 5; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) - std::cout << input[j].toString() << " "; + if (j == i) + std::cout << "||*||*||" << input[j].toString() << "||*||*|| "; + else + std::cout << input[j].toString() << " "; std::cout << std::endl; break; } @@ -339,11 +342,13 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: //if (newStates[i]->basisEquals(*((*stateSets)[j]))) { stateAlreadyInAllStates = true; //If it does exist, we should add it as the shift/goto in the action table + //std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl; if (!((*stateSets)[j]->basisEquals(*(newStates[i])))) toDo->push((*stateSets)[j]); (*stateSets)[j]->combineStates(*(newStates[i])); + //std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl; addStateReductionsToTable((*stateSets)[j]); table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); @@ -368,8 +373,9 @@ void RNGLRParser::addStateReductionsToTable(State* state) { //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + for (std::vector::size_type j = 0; j < lookahead->size(); j++) { table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + } //If this has an appropriate ruduction to null, get the reduce trees out } else if (reducesToNull((*currStateTotal)[i])) { //std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl; diff --git a/src/State.cpp b/src/State.cpp index b77caf1..012d468 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -79,12 +79,9 @@ void State::combineStates(State &other) { std::vector* State::getTotal() { total.clear(); - for (std::vector::size_type i = 0; i < basis.size(); i++) { - total.push_back(basis[i]); - } - for (std::vector::size_type i = 0; i < remaining.size(); i++) { - total.push_back(remaining[i]); - } + //std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl; + total.insert(total.begin(), basis.begin(), basis.end()); + total.insert(total.end(), remaining.begin(), remaining.end()); return(&total); } std::vector* State::getBasis() { @@ -111,6 +108,7 @@ void State::addRuleCombineLookahead(ParseRule* rule) { if (rule->equalsExceptLookahead(*(total[i]))) { total[i]->addLookahead(rule->getLookahead()); alreadyIn = true; + break; } } if (!alreadyIn) diff --git a/src/Table.cpp b/src/Table.cpp index 690ebd2..a94d6b2 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -128,7 +128,7 @@ std::vector* Table::get(int state, Symbol token) { action->push_back(new ParseAction(ParseAction::ACCEPT)); } - //If ourside the symbol range of this state (same as NULL), reject + //If outside the symbol range of this state (same as NULL), reject if ( symbolIndex >= table[state]->size() ) { action = new std::vector(); action->push_back(new ParseAction(ParseAction::REJECT)); @@ -141,7 +141,7 @@ std::vector* Table::get(int state, Symbol token) { } //Otherwise, we have something, so return it - return (action); + return action; } ParseAction* Table::getShift(int state, Symbol token) { @@ -163,8 +163,9 @@ std::string Table::toString() { concat += "\n"; for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) { - concat += intToString(i) + "\t"; + concat += intToString(i) + " is the state\t"; for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) { + concat += "for " + symbolIndexVec[j].toString() + " do "; if ( (*(table[i]))[j] != NULL) { for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) { concat += (*((*(table[i]))[j]))[k]->toString() + "\t";