From b62c3e729f25f44a3cbfb6115bce5cbb65974cca Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 3 Jul 2015 18:34:46 -0400 Subject: [PATCH] Some more work, and a --parse-only option to support the new kraken.vim vim plugin that adds Syntastic support (and syntax highlighting) --- .gitignore | 2 +- include/Importer.h | 3 +- include/Parser.h | 2 +- include/RNGLRParser.h | 2 +- main.cpp | 35 +++++++++------ src/Importer.cpp | 68 +++++++++++++++++------------- src/RNGLRParser.cpp | 23 ++++++++-- stdlib/grammer.krak | 31 ++++++++++++-- stdlib/io.krak | 30 ++++++++++++- tests/syntax_error.krak | 7 +++ tests/test_fileio.expected_results | 3 ++ tests/test_fileio.krak | 7 +++ 12 files changed, 155 insertions(+), 58 deletions(-) create mode 100644 tests/syntax_error.krak create mode 100644 tests/test_fileio.expected_results create mode 100644 tests/test_fileio.krak diff --git a/.gitignore b/.gitignore index daa570b..cc19a32 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,4 @@ stats *krakout* kraklist.txt .*.un~ -tests/test_topLevelVarInit/ +RNGLR.pdf diff --git a/include/Importer.h b/include/Importer.h index 48e933e..644c3b4 100644 --- a/include/Importer.h +++ b/include/Importer.h @@ -20,7 +20,7 @@ class ASTTransformation; class Importer { public: - Importer(Parser* parserIn, std::vector includePaths, std::string outputName); + Importer(Parser* parserIn, std::vector includePaths, std::string outputName, bool only_parseIn = false); ~Importer(); void import(std::string fileName); NodeTree* getUnit(std::string fileName); @@ -36,6 +36,7 @@ class Importer { NodeTree* ast; NodeTree* syntaxTree; }; + bool only_parse; std::vector importedTrips; std::vector includePaths; Parser* parser; diff --git a/include/Parser.h b/include/Parser.h index 73f878d..2a4eba5 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -28,7 +28,7 @@ class Parser { virtual void loadGrammer(std::string grammerInputString); virtual void createStateSet(); virtual std::string stateSetToString(); - virtual NodeTree* parseInput(std::string inputString) = 0; + virtual NodeTree* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting virtual std::string grammerToString(); virtual std::string grammerToDOT(); diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index d1a4dd3..1dad38e 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -17,7 +17,7 @@ class RNGLRParser: public Parser { public: RNGLRParser(); ~RNGLRParser(); - NodeTree* parseInput(std::string inputString); + NodeTree* parseInput(std::string inputString, std::string filename); // filename for error reporting void printReconstructedFrontier(int frontier); private: diff --git a/main.cpp b/main.cpp index 6f51cb9..6febef3 100644 --- a/main.cpp +++ b/main.cpp @@ -29,6 +29,7 @@ int main(int argc, char* argv[]) { std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl; return 0; } + std::string grammerFileString = "../krakenGrammer.kgm"; if (argc >= 2 && std::string(argv[1]) == "--test") { StringReader::test(); @@ -40,7 +41,7 @@ int main(int argc, char* argv[]) { if (argc >= 3) { std::string testResults, line; int passed = 0, failed = 0; - Tester test(argv[0], "../krakenGrammer.kgm"); + Tester test(argv[0], grammerFileString); // find the max length so we can pad the string and align the results unsigned int maxLineLength = 0; for (int i = 2; i < argc; i++) { @@ -66,10 +67,16 @@ int main(int argc, char* argv[]) { krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1)); includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path - std::string grammerFileString = "../krakenGrammer.kgm"; std::string programName; std::string outputName; - if (argc > 3) { + bool parse_only = false; + //std::cout << "argv[1] == " << argv[1] << std::endl; + if (std::string(argv[1]) == "--parse-only") { + parse_only = true; + grammerFileString = argv[2]; + programName = argv[3]; + //outputName = argv[3]; + } else if (argc > 3) { grammerFileString = argv[1]; programName = argv[2]; outputName = argv[3]; @@ -103,14 +110,13 @@ int main(int argc, char* argv[]) { } grammerInFile.close(); - //LALRParser parser; RNGLRParser parser; parser.loadGrammer(grammerInputFileString); //Start binary stuff bool compGramGood = false; if (compiledGrammerInFile.is_open()) { - std::cout << "Compiled grammer file exists, reading it in" << std::endl; + //std::cout << "Compiled grammer file exists, reading it in" << std::endl; std::streampos compGramSize = compiledGrammerInFile.tellg(); char* binaryTablePointer = new char [compGramSize]; compiledGrammerInFile.seekg(0, std::ios::beg); @@ -118,7 +124,7 @@ int main(int argc, char* argv[]) { compiledGrammerInFile.close(); //Check magic number if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { - std::cout << "Valid Kraken Compiled Grammer File" << std::endl; + //std::cout << "Valid Kraken Compiled Grammer File" << std::endl; int gramStringLength = *((int*)(binaryTablePointer+4)); //std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " //<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; @@ -128,7 +134,7 @@ int main(int argc, char* argv[]) { std::cout << "The Grammer has been changed, will re-create" << std::endl; } else { compGramGood = true; - std::cout << "Grammer file is up to date." << std::endl; + //std::cout << "Grammer file is up to date." << std::endl; parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section } } else { @@ -158,17 +164,18 @@ int main(int argc, char* argv[]) { } //End binary stuff - std::cout << "\nParsing" << std::endl; + //std::cout << "\nParsing" << std::endl; + //std::cout << "\toutput name: " << outputName << std::endl; + //std::cout << "\tprogram name: " << programName << std::endl; + Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in - std::cout << "\n output name: " << outputName << std::endl; - std::cout << "\n program name: " << programName << std::endl; - Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in - - for (auto i : includePaths) - std::cout << i << std::endl; + //for (auto i : includePaths) + //std::cout << i << std::endl; importer.import(programName); std::map*> ASTs = importer.getASTMap(); + if (parse_only) + return 0; //Do optimization, etc. here. //None at this time, instead going straight to C in this first (more naive) version diff --git a/src/Importer.cpp b/src/Importer.cpp index 9f450fa..09b4142 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -1,12 +1,15 @@ #include "Importer.h" -Importer::Importer(Parser* parserIn, std::vector includePaths, std::string outputNameIn) { +Importer::Importer(Parser* parserIn, std::vector includePaths, std::string outputNameIn, bool only_parseIn) { + only_parse = only_parseIn; //constructor outputName = outputNameIn; - if (mkdir(("./" + outputName).c_str(), 0755)) { - std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl; - std::cerr << "Could not make directory " << outputName << std::endl; + if (!only_parse) { + if (mkdir(("./" + outputName).c_str(), 0755)) { + //std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl; + //std::cerr << "Could not make directory " << outputName << std::endl; + } } parser = parserIn; @@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree* ast, NodeTree* Importer::getUnit(std::string fileName) { - std::cout << "\n\nImporting " << fileName << " "; + //std::cout << "\n\nImporting " << fileName << " "; //Check to see if we've already done it if (imported.find(fileName) != imported.end()) { - std::cout << "Already Imported!" << std::endl; + //std::cout << "Already Imported!" << std::endl; return imported[fileName]; } - std::cout << "Not yet imported" << std::endl; + //std::cout << "Not yet imported" << std::endl; return NULL; } @@ -87,7 +90,8 @@ NodeTree* Importer::importFirstPass(std::string fileName) { if (!parseTree) return NULL; //Call with ourself to allow the transformation to call us to import files that it needs - ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself + if (!only_parse) + ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself } return ast; } @@ -97,8 +101,10 @@ void Importer::import(std::string fileName) { //Start the ball rolling by importing and running the first pass on the first file. //This will import, first pass and register all the other files too. - std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl; + //std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl; importFirstPass(fileName); //First pass defines all objects + if (only_parse) + return; std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl; for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements, @@ -141,39 +147,41 @@ void Importer::import(std::string fileName) { NodeTree* Importer::parseAndTrim(std::string fileName) { std::ifstream programInFile; - std::ofstream outFile, outFileTransformed; + //std::ofstream outFile, outFileTransformed; - - std::cout << "outputName " << outputName << std::endl; - std::cout << "fileName " << fileName << std::endl; + //std::cout << "outputName " << outputName << std::endl; + //std::cout << "fileName " << fileName << std::endl; auto pathPieces = split(fileName, '/'); std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out"; - std::cout << "outputFileName " << outputFileName << std::endl; + //std::cout << "outputFileName " << outputFileName << std::endl; + std::string inputFileName; for (auto i : includePaths) { programInFile.open(i+fileName); - if (programInFile.is_open()) + if (programInFile.is_open()) { + inputFileName = i+fileName; break; - else + } else { std::cout << i+fileName << " is no good" << std::endl; + } } if (!programInFile.is_open()) { std::cout << "Problem opening programInFile " << fileName << "\n"; return NULL; } - outFile.open(outputFileName); - if (!outFile.is_open()) { - std::cout << "Probelm opening output file " << outputFileName << "\n"; - return NULL; - } + //outFile.open(outputFileName); + //if (!outFile.is_open()) { + //std::cout << "Probelm opening output file " << outputFileName << "\n"; + //return NULL; + //} - outFileTransformed.open((outputFileName + ".transformed.dot").c_str()); - if (!outFileTransformed.is_open()) { - std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n"; - return NULL; - } + //outFileTransformed.open((outputFileName + ".transformed.dot").c_str()); + //if (!outFileTransformed.is_open()) { + //std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n"; + //return NULL; + //} std::string programInputFileString, line; while(programInFile.good()) { @@ -183,18 +191,18 @@ NodeTree* Importer::parseAndTrim(std::string fileName) { programInFile.close(); //std::cout << programInputFileString << std::endl; - NodeTree* parseTree = parser->parseInput(programInputFileString); + NodeTree* parseTree = parser->parseInput(programInputFileString, inputFileName); if (parseTree) { //std::cout << parseTree->DOTGraphString() << std::endl; //outFile << parseTree->DOTGraphString() << std::endl; } else { std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl; - outFile.close(); outFileTransformed.close(); + //outFile.close(); outFileTransformed.close(); throw "unexceptablblllll"; return NULL; } - outFile.close(); + //outFile.close(); //Remove Transformations @@ -211,7 +219,7 @@ NodeTree* Importer::parseAndTrim(std::string fileName) { } else { std::cout << "Tree returned from transformation is NULL!" << std::endl; } - outFileTransformed.close(); + //outFileTransformed.close(); std::cout << "Returning parse tree" << std::endl; return parseTree; diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index b7bdd6b..869f8cf 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) { } } -NodeTree* RNGLRParser::parseInput(std::string inputString) { +NodeTree* RNGLRParser::parseInput(std::string inputString, std::string filename) { input.clear(); gss.clear(); while(!toReduce.empty()) toReduce.pop(); @@ -30,6 +30,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { SPPFStepNodes.clear(); nullableParts.clear(); packedMap.clear(); + bool errord = false; //Check for no tokens bool accepting = false; @@ -52,16 +53,21 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { lexer.setInput(inputString); //Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation. //It could be converted to on-line later. + int tokenNum = 1; Symbol currentToken = lexer.next(); input.push_back(currentToken); while (currentToken != EOFSymbol) { currentToken = lexer.next(); //std::cout << "CurrentToken is " << currentToken.toString() << std::endl; if (currentToken == invalidSymbol) { + std::cerr << filename << ":" << findLine(tokenNum) << std::endl; + errord = true; + std::cerr << "lex error" << std::endl; std::cerr << "Invalid Symbol!" << std::endl; throw "Invalid Symbol, cannot lex"; } input.push_back(currentToken); + tokenNum++; } // std::cout << "\nDone with Lexing, length:" << input.size() << std::endl; @@ -99,8 +105,11 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { // std::cout << "Checking if frontier " << i << " is empty" << std::endl; if (gss.frontierIsEmpty(i)) { //std::cout << "Frontier " << i << " is empty." << std::endl; - std::cerr << "Parsing failed on " << input[i].toString() << std::endl; - std::cerr << "Problem is on line: " << findLine(i) << std::endl; + //std::cerr << "Parsing failed on " << input[i].toString() << std::endl; + //std::cerr << "Problem is on line: " << findLine(i) << std::endl; + std::cerr << filename << ":" << findLine(i) << std::endl; + errord = true; + std::cerr << "parse error" << std::endl; std::cerr << "Nearby is:" << std::endl; int range = 10; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) @@ -138,6 +147,12 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { return gss.getEdge(accState, v0); } + if (!errord) { + std::cerr << filename << ":" << findLine(input.size())-2 << std::endl; + std::cerr << "parse error" << std::endl; + std::cerr << "Nearby is:" << std::endl; + } + std::cerr << "Rejected!" << std::endl; // std::cout << "GSS:\n" << gss.toString() << std::endl; return NULL; @@ -522,7 +537,7 @@ std::vector*> RNGLRParser::getPathEdges(std::vector + var regexs: set::set + + fun construct(): grammer* { + rules.construct() + regexs.construct() + } + fun copy_construct(old: grammer*) { + rules.copy_construct(&old->rules) + regexs.copy_construct(&old->regexs) + } + fun operator=(other: grammer) { + destruct() + copy_construct(&other) + } + fun destruct() { + rules.destruct() + regexs.destruct() + } +} obj rule (Object) { var lhs: symbol::symbol @@ -16,10 +39,10 @@ obj rule (Object) { lookahead.construct() } fun copy_construct(old: rule*) { - lhs.copy_construct(&rule->lhs) - rhs.copy_construct(&rule->rhs) - position = rule->position - lookahead.copy_construct(&rule->lookahead) + lhs.copy_construct(&other->lhs) + rhs.copy_construct(&other->rhs) + position = other->position + lookahead.copy_construct(&other->lookahead) } fun operator=(other: rule) { destruct() diff --git a/stdlib/io.krak b/stdlib/io.krak index 86e471a..f74865e 100644 --- a/stdlib/io.krak +++ b/stdlib/io.krak @@ -1,4 +1,4 @@ -import string:*; +import string; import mem:* __if_comp__ __C__ simple_passthrough """ @@ -32,7 +32,7 @@ fun print(toPrint: char) : void { return; } -fun print(toPrint: string) : void { +fun print(toPrint: string::string) : void { var charArr = toPrint.toCharArray() defer delete(charArr) print(charArr); @@ -73,3 +73,29 @@ fun print(toPrint: double) : void{ return; } +// Ok, just some DEAD simple file io for now +fun read_file(path: string::string): string::string { + var char_path = path.toCharArray() + defer delete(char_path) + var data: char* + __if_comp__ __C__ { + simple_passthrough(char_path = char_path:data = data:) """ + FILE *fp = fopen(char_path, "r"); + fseek(fp, 0L, SEEK_END); + long size = ftell(fp); + fseek(fp, 0L, SEEK_SET); + char *data = malloc(size+1); + size_t readSize = fread(data, 1, size, fp); + data[readSize] = 0; + fclose(fp); + """ + } + var toRet = string::string(data) + __if_comp__ __C__ { + simple_passthrough(data = data::) """ + free(data) + """ + } + return toRet +} + diff --git a/tests/syntax_error.krak b/tests/syntax_error.krak new file mode 100644 index 0000000..2018115 --- /dev/null +++ b/tests/syntax_error.krak @@ -0,0 +1,7 @@ +import io + +fun main():int { + return 0 +} + + diff --git a/tests/test_fileio.expected_results b/tests/test_fileio.expected_results new file mode 100644 index 0000000..f2b3c31 --- /dev/null +++ b/tests/test_fileio.expected_results @@ -0,0 +1,3 @@ +this can be anything +because it reads itself +beautiful diff --git a/tests/test_fileio.krak b/tests/test_fileio.krak new file mode 100644 index 0000000..0d98486 --- /dev/null +++ b/tests/test_fileio.krak @@ -0,0 +1,7 @@ +import io:* +import string:* + +fun main():int { + print(read_file(string("test_fileio.expected_results"))) + return 0 +}