Some more work, and a --parse-only option to support the new kraken.vim vim plugin that adds Syntastic support (and syntax highlighting)

2015-07-03 18:34:46 -04:00
parent 2fcace72ed
commit b62c3e729f
12 changed files with 155 additions and 58 deletions
@@ -10,4 +10,4 @@ stats
 *krakout*
 kraklist.txt
 .*.un~
-tests/test_topLevelVarInit/
+RNGLR.pdf
@@ -20,7 +20,7 @@ class ASTTransformation;

 class Importer {
 	public:
-		Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName);
+		Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
 		~Importer();
 		void import(std::string fileName);
 		NodeTree<ASTData>* getUnit(std::string fileName);
@@ -36,6 +36,7 @@ class Importer {
 			NodeTree<ASTData>* ast;
 			NodeTree<Symbol>* syntaxTree;
 		};
+        bool only_parse;
 		std::vector<importTriplet> importedTrips;
 		std::vector<std::string> includePaths;
 		Parser* parser;
@@ -28,7 +28,7 @@ class Parser {
 		virtual void loadGrammer(std::string grammerInputString);
 		virtual void createStateSet();
 		virtual std::string stateSetToString();
-		virtual NodeTree<Symbol>* parseInput(std::string inputString) = 0;
+		virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting
 		virtual std::string grammerToString();
 		virtual std::string grammerToDOT();

@@ -17,7 +17,7 @@ class RNGLRParser: public Parser {
 	public:
 		RNGLRParser();
 		~RNGLRParser();
-		NodeTree<Symbol>* parseInput(std::string inputString);
+		NodeTree<Symbol>* parseInput(std::string inputString, std::string filename); // filename for error reporting
        void printReconstructedFrontier(int frontier);

 	private:
@@ -29,6 +29,7 @@ int main(int argc, char* argv[]) {
        std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
        return 0;
    }
+	std::string grammerFileString = "../krakenGrammer.kgm";

 	if (argc >= 2 && std::string(argv[1]) == "--test") {
 		StringReader::test();
@@ -40,7 +41,7 @@ int main(int argc, char* argv[]) {
 		if (argc >= 3) {
 			std::string testResults, line;
 			int passed = 0, failed = 0;
-			Tester test(argv[0], "../krakenGrammer.kgm");
+			Tester test(argv[0], grammerFileString);
            // find the max length so we can pad the string and align the results
            unsigned int maxLineLength = 0;
 			for (int i = 2; i < argc; i++) {
@@ -66,10 +67,16 @@ int main(int argc, char* argv[]) {
 	krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
 	includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path

-	std::string grammerFileString = "../krakenGrammer.kgm";
 	std::string programName;
 	std::string outputName;
-    if (argc > 3) {
+    bool parse_only = false;
+    //std::cout << "argv[1] == " << argv[1] << std::endl;
+    if (std::string(argv[1]) == "--parse-only") {
+        parse_only = true;
+        grammerFileString = argv[2];
+        programName = argv[3];
+        //outputName = argv[3];
+    } else if (argc > 3) {
        grammerFileString = argv[1];
        programName = argv[2];
        outputName = argv[3];
@@ -103,14 +110,13 @@ int main(int argc, char* argv[]) {
 	}
 	grammerInFile.close();

-	//LALRParser parser;
 	RNGLRParser parser;
 	parser.loadGrammer(grammerInputFileString);

 	//Start binary stuff
 	bool compGramGood = false;
 	if (compiledGrammerInFile.is_open()) {
-		std::cout << "Compiled grammer file exists, reading it in" << std::endl;
+		//std::cout << "Compiled grammer file exists, reading it in" << std::endl;
 		std::streampos compGramSize = compiledGrammerInFile.tellg();
 		char* binaryTablePointer = new char [compGramSize];
 		compiledGrammerInFile.seekg(0, std::ios::beg);
@@ -118,7 +124,7 @@ int main(int argc, char* argv[]) {
 		compiledGrammerInFile.close();
 		//Check magic number
 		if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
-			std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
+			//std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
 			int gramStringLength = *((int*)(binaryTablePointer+4));
 			//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
 			//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
@@ -128,7 +134,7 @@ int main(int argc, char* argv[]) {
 				std::cout << "The Grammer has been changed, will re-create" << std::endl;
 			} else {
 				compGramGood = true;
-				std::cout << "Grammer file is up to date." << std::endl;
+				//std::cout << "Grammer file is up to date." << std::endl;
 				parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
 			}
 		} else {
@@ -158,17 +164,18 @@ int main(int argc, char* argv[]) {
 	}
 	//End binary stuff

-	std::cout << "\nParsing" << std::endl;
+    //std::cout << "\nParsing" << std::endl;
+	//std::cout << "\toutput name: " << outputName << std::endl;
+	//std::cout << "\tprogram name: " << programName << std::endl;
+	Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in

-	std::cout << "\n output name: " << outputName << std::endl;
-	std::cout << "\n program name: " << programName << std::endl;
-	Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in
-
-	for (auto i : includePaths)
-		std::cout << i << std::endl;
+	//for (auto i : includePaths)
+		//std::cout << i << std::endl;

 	importer.import(programName);
 	std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
+    if (parse_only)
+        return 0;

 	//Do optimization, etc. here.
 	//None at this time, instead going straight to C in this first (more naive) version
@@ -1,12 +1,15 @@
 #include "Importer.h"

-Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn) {
+Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
+    only_parse = only_parseIn;
 	//constructor
    outputName = outputNameIn;

-    if (mkdir(("./" + outputName).c_str(), 0755)) {
-        std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
-        std::cerr << "Could not make directory " << outputName << std::endl;
+    if (!only_parse) {
+        if (mkdir(("./" + outputName).c_str(), 0755)) {
+            //std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
+            //std::cerr << "Could not make directory " << outputName << std::endl;
+        }
    }

 	parser = parserIn;
@@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Sy
 }

 NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
-	std::cout << "\n\nImporting " << fileName << " ";
+	//std::cout << "\n\nImporting " << fileName << " ";
 	//Check to see if we've already done it
 	if (imported.find(fileName) != imported.end()) {
-		std::cout << "Already Imported!" << std::endl;
+		//std::cout << "Already Imported!" << std::endl;
 		return imported[fileName];
 	}
-	std::cout << "Not yet imported" << std::endl;
+	//std::cout << "Not yet imported" << std::endl;

 	return NULL;
 }
@@ -87,7 +90,8 @@ NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
        if (!parseTree)
            return NULL;
 		//Call with ourself to allow the transformation to call us to import files that it needs
-		ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
+        if (!only_parse)
+            ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
 	}
 	return ast;
 }
@@ -97,8 +101,10 @@ void Importer::import(std::string fileName) {
 	//Start the ball rolling by importing and running the first pass on the first file.
 	//This will import, first pass and register all the other files too.

-	std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
+	//std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
 	importFirstPass(fileName);	//First pass defines all objects
+    if (only_parse)
+        return;

 	std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
 	for (importTriplet i : importedTrips)					//Second pass defines data inside objects, outside declaration statements,
@@ -141,39 +147,41 @@ void Importer::import(std::string fileName) {
 NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {

 	std::ifstream programInFile;
-	std::ofstream outFile, outFileTransformed;
+	//std::ofstream outFile, outFileTransformed;

-
-    std::cout << "outputName " << outputName << std::endl;
-    std::cout << "fileName " << fileName << std::endl;
+    //std::cout << "outputName " << outputName << std::endl;
+    //std::cout << "fileName " << fileName << std::endl;

    auto pathPieces = split(fileName, '/');
    std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
-    std::cout << "outputFileName " << outputFileName << std::endl;
+    //std::cout << "outputFileName " << outputFileName << std::endl;

+    std::string inputFileName;
 	for (auto i : includePaths) {
 		programInFile.open(i+fileName);
-		if (programInFile.is_open())
+		if (programInFile.is_open()) {
+            inputFileName = i+fileName;
 			break;
-		else
+        } else {
 			std::cout << i+fileName << " is no good" << std::endl;
+        }
 	}
 	if (!programInFile.is_open()) {
 		std::cout << "Problem opening programInFile " << fileName << "\n";
 		return NULL;
 	}

-	outFile.open(outputFileName);
-	if (!outFile.is_open()) {
-		std::cout << "Probelm opening output file " << outputFileName << "\n";
-		return NULL;
-	}
+	//outFile.open(outputFileName);
+	//if (!outFile.is_open()) {
+		//std::cout << "Probelm opening output file " << outputFileName << "\n";
+		//return NULL;
+	//}

-	outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
-	if (!outFileTransformed.is_open()) {
-		std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
-		return NULL;
-	}
+	//outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
+	//if (!outFileTransformed.is_open()) {
+		//std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
+		//return NULL;
+	//}

 	std::string programInputFileString, line;
 	while(programInFile.good()) {
@@ -183,18 +191,18 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
 	programInFile.close();

 	//std::cout << programInputFileString << std::endl;
-	NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
+	NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName);

 	if (parseTree) {
        //std::cout << parseTree->DOTGraphString() << std::endl;
 		//outFile << parseTree->DOTGraphString() << std::endl;
 	} else {
 		std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
-	    outFile.close(); outFileTransformed.close();
+		//outFile.close(); outFileTransformed.close();
        throw "unexceptablblllll";
        return NULL;
    }
-	outFile.close();
+	//outFile.close();

 	//Remove Transformations

@@ -211,7 +219,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
 	} else {
 		std::cout << "Tree returned from transformation is NULL!" << std::endl;
 	}
-	outFileTransformed.close();
+	//outFileTransformed.close();

    std::cout << "Returning parse tree" << std::endl;
 	return parseTree;
@@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) {
    }
 }

-NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
+NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename) {
 	input.clear();
 	gss.clear();
 	while(!toReduce.empty()) toReduce.pop();
@@ -30,6 +30,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 	SPPFStepNodes.clear();
 	nullableParts.clear();
 	packedMap.clear();
+    bool errord = false;

 	//Check for no tokens
 	bool accepting = false;
@@ -52,16 +53,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 	lexer.setInput(inputString);
 	//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
 	//It could be converted to on-line later.
+    int tokenNum = 1;
 	Symbol currentToken = lexer.next();
 	input.push_back(currentToken);
 	while (currentToken != EOFSymbol) {
 		currentToken = lexer.next();
 		//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
 		if (currentToken == invalidSymbol) {
+			std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
+            errord = true;
+            std::cerr << "lex error" << std::endl;
 			std::cerr << "Invalid Symbol!" << std::endl;
 			throw "Invalid Symbol, cannot lex";
 		}
 		input.push_back(currentToken);
+        tokenNum++;
 	}

 	// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
@@ -99,8 +105,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 		// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
 		if (gss.frontierIsEmpty(i)) {
 			//std::cout << "Frontier " << i << " is empty." << std::endl;
-			std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
-			std::cerr << "Problem is on line: " << findLine(i) << std::endl;
+			//std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
+			//std::cerr << "Problem is on line: " << findLine(i) << std::endl;
+			std::cerr << filename << ":" << findLine(i) << std::endl;
+            errord = true;
+            std::cerr << "parse error" << std::endl;
 			std::cerr << "Nearby is:" << std::endl;
 			int range = 10;
 			for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
@@ -138,6 +147,12 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 		return gss.getEdge(accState, v0);
 	}

+    if (!errord) {
+        std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
+        std::cerr << "parse error" << std::endl;
+        std::cerr << "Nearby is:" << std::endl;
+    }
+
 	std::cerr << "Rejected!" << std::endl;
 	// std::cout << "GSS:\n" << gss.toString() << std::endl;
 	return NULL;
@@ -522,7 +537,7 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
 }

 int RNGLRParser::findLine(int tokenNum) {
-	int lineNo = 0;
+	int lineNo = 1;
 	for (int i = 0; i < tokenNum; i++) {
 		std::string tokenString = input[i].getValue();
 		for (int j = 0; j < tokenString.size(); j++)
@@ -2,6 +2,29 @@ import string
 import vector
 import set
 import symbol
+import regex
+
+obj grammer (Object) {
+    var rules: vector::vector<rule>
+    var regexs: set::set<regex>
+
+    fun construct(): grammer* {
+        rules.construct()
+        regexs.construct()
+    }
+    fun copy_construct(old: grammer*) {
+        rules.copy_construct(&old->rules)
+        regexs.copy_construct(&old->regexs)
+    }
+    fun operator=(other: grammer) {
+        destruct()
+        copy_construct(&other)
+    }
+    fun destruct() {
+        rules.destruct()
+        regexs.destruct()
+    }
+}

 obj rule (Object) {
    var lhs: symbol::symbol
@@ -16,10 +39,10 @@ obj rule (Object) {
        lookahead.construct()
    }
    fun copy_construct(old: rule*) {
-        lhs.copy_construct(&rule->lhs)
-        rhs.copy_construct(&rule->rhs)
-        position = rule->position
-        lookahead.copy_construct(&rule->lookahead)
+        lhs.copy_construct(&other->lhs)
+        rhs.copy_construct(&other->rhs)
+        position = other->position
+        lookahead.copy_construct(&other->lookahead)
    }
    fun operator=(other: rule) {
        destruct()
@@ -1,4 +1,4 @@
-import string:*;
+import string;
 import mem:*

 __if_comp__ __C__ simple_passthrough """
@@ -32,7 +32,7 @@ fun print(toPrint: char) : void {
 	return;
 }

-fun print(toPrint: string) : void {
+fun print(toPrint: string::string) : void {
    var charArr = toPrint.toCharArray()
    defer delete(charArr)
    print(charArr);
@@ -73,3 +73,29 @@ fun print(toPrint: double) : void{
 	return;
 }

+// Ok, just some DEAD simple file io for now
+fun read_file(path: string::string): string::string {
+    var char_path = path.toCharArray()
+    defer delete(char_path)
+    var data: char*
+	__if_comp__ __C__ {
+		simple_passthrough(char_path = char_path:data = data:) """
+			FILE *fp = fopen(char_path, "r");
+            fseek(fp, 0L, SEEK_END);
+            long size = ftell(fp);
+            fseek(fp, 0L, SEEK_SET);
+            char *data = malloc(size+1);
+            size_t readSize = fread(data, 1, size, fp);
+            data[readSize] = 0;
+            fclose(fp);
+		"""
+	}
+    var toRet = string::string(data)
+	__if_comp__ __C__ {
+		simple_passthrough(data = data::) """
+            free(data)
+		"""
+	}
+    return toRet
+}
+
@@ -0,0 +1,7 @@
+import io
+
+fun main():int {
+    return 0
+}
+
+
@@ -0,0 +1,3 @@
+this can be anything
+because it reads itself
+beautiful
@@ -0,0 +1,7 @@
+import io:*
+import string:*
+
+fun main():int {
+    print(read_file(string("test_fileio.expected_results")))
+    return 0
+}