From b62c3e729f25f44a3cbfb6115bce5cbb65974cca Mon Sep 17 00:00:00 2001
From: Nathan Braswell <miloignis@gmail.com>
Date: Fri, 3 Jul 2015 18:34:46 -0400
Subject: [PATCH] Some more work, and a --parse-only option to support the new
 kraken.vim vim plugin that adds Syntastic support (and syntax highlighting)

---
 .gitignore                         |  2 +-
 include/Importer.h                 |  3 +-
 include/Parser.h                   |  2 +-
 include/RNGLRParser.h              |  2 +-
 main.cpp                           | 35 +++++++++------
 src/Importer.cpp                   | 68 +++++++++++++++++-------------
 src/RNGLRParser.cpp                | 23 ++++++++--
 stdlib/grammer.krak                | 31 ++++++++++++--
 stdlib/io.krak                     | 30 ++++++++++++-
 tests/syntax_error.krak            |  7 +++
 tests/test_fileio.expected_results |  3 ++
 tests/test_fileio.krak             |  7 +++
 12 files changed, 155 insertions(+), 58 deletions(-)
 create mode 100644 tests/syntax_error.krak
 create mode 100644 tests/test_fileio.expected_results
 create mode 100644 tests/test_fileio.krak
diff --git a/.gitignore b/.gitignore
index daa570b..cc19a32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,4 @@ stats
 *krakout*
 kraklist.txt
 .*.un~
-tests/test_topLevelVarInit/
+RNGLR.pdf
diff --git a/include/Importer.h b/include/Importer.h
index 48e933e..644c3b4 100644
--- a/include/Importer.h
+++ b/include/Importer.h
@@ -20,7 +20,7 @@ class ASTTransformation;
 
 class Importer {
 	public:
-		Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName);
+		Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
 		~Importer();
 		void import(std::string fileName);
 		NodeTree<ASTData>* getUnit(std::string fileName);
@@ -36,6 +36,7 @@ class Importer {
 			NodeTree<ASTData>* ast;
 			NodeTree<Symbol>* syntaxTree;
 		};
+        bool only_parse;
 		std::vector<importTriplet> importedTrips;
 		std::vector<std::string> includePaths;
 		Parser* parser;
diff --git a/include/Parser.h b/include/Parser.h
index 73f878d..2a4eba5 100644
--- a/include/Parser.h
+++ b/include/Parser.h
@@ -28,7 +28,7 @@ class Parser {
 		virtual void loadGrammer(std::string grammerInputString);
 		virtual void createStateSet();
 		virtual std::string stateSetToString();
-		virtual NodeTree<Symbol>* parseInput(std::string inputString) = 0;
+		virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting
 		virtual std::string grammerToString();
 		virtual std::string grammerToDOT();
 
diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h
index d1a4dd3..1dad38e 100644
--- a/include/RNGLRParser.h
+++ b/include/RNGLRParser.h
@@ -17,7 +17,7 @@ class RNGLRParser: public Parser {
 	public:
 		RNGLRParser();
 		~RNGLRParser();
-		NodeTree<Symbol>* parseInput(std::string inputString);
+		NodeTree<Symbol>* parseInput(std::string inputString, std::string filename); // filename for error reporting
         void printReconstructedFrontier(int frontier);
 
 	private:
diff --git a/main.cpp b/main.cpp
index 6f51cb9..6febef3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -29,6 +29,7 @@ int main(int argc, char* argv[]) {
         std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
         return 0;
     }
+	std::string grammerFileString = "../krakenGrammer.kgm";
 
 	if (argc >= 2 && std::string(argv[1]) == "--test") {
 		StringReader::test();
@@ -40,7 +41,7 @@ int main(int argc, char* argv[]) {
 		if (argc >= 3) {
 			std::string testResults, line;
 			int passed = 0, failed = 0;
-			Tester test(argv[0], "../krakenGrammer.kgm");
+			Tester test(argv[0], grammerFileString);
             // find the max length so we can pad the string and align the results
             unsigned int maxLineLength = 0;
 			for (int i = 2; i < argc; i++) {
@@ -66,10 +67,16 @@ int main(int argc, char* argv[]) {
 	krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
 	includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path
 
-	std::string grammerFileString = "../krakenGrammer.kgm";
 	std::string programName;
 	std::string outputName;
-    if (argc > 3) {
+    bool parse_only = false;
+    //std::cout << "argv[1] == " << argv[1] << std::endl;
+    if (std::string(argv[1]) == "--parse-only") {
+        parse_only = true;
+        grammerFileString = argv[2];
+        programName = argv[3];
+        //outputName = argv[3];
+    } else if (argc > 3) {
         grammerFileString = argv[1];
         programName = argv[2];
         outputName = argv[3];
@@ -103,14 +110,13 @@ int main(int argc, char* argv[]) {
 	}
 	grammerInFile.close();
 
-	//LALRParser parser;
 	RNGLRParser parser;
 	parser.loadGrammer(grammerInputFileString);
 
 	//Start binary stuff
 	bool compGramGood = false;
 	if (compiledGrammerInFile.is_open()) {
-		std::cout << "Compiled grammer file exists, reading it in" << std::endl;
+		//std::cout << "Compiled grammer file exists, reading it in" << std::endl;
 		std::streampos compGramSize = compiledGrammerInFile.tellg();
 		char* binaryTablePointer = new char [compGramSize];
 		compiledGrammerInFile.seekg(0, std::ios::beg);
@@ -118,7 +124,7 @@ int main(int argc, char* argv[]) {
 		compiledGrammerInFile.close();
 		//Check magic number
 		if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
-			std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
+			//std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
 			int gramStringLength = *((int*)(binaryTablePointer+4));
 			//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
 			//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
@@ -128,7 +134,7 @@ int main(int argc, char* argv[]) {
 				std::cout << "The Grammer has been changed, will re-create" << std::endl;
 			} else {
 				compGramGood = true;
-				std::cout << "Grammer file is up to date." << std::endl;
+				//std::cout << "Grammer file is up to date." << std::endl;
 				parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
 			}
 		} else {
@@ -158,17 +164,18 @@ int main(int argc, char* argv[]) {
 	}
 	//End binary stuff
 
-	std::cout << "\nParsing" << std::endl;
+    //std::cout << "\nParsing" << std::endl;
+	//std::cout << "\toutput name: " << outputName << std::endl;
+	//std::cout << "\tprogram name: " << programName << std::endl;
+	Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in
 
-	std::cout << "\n output name: " << outputName << std::endl;
-	std::cout << "\n program name: " << programName << std::endl;
-	Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in
-
-	for (auto i : includePaths)
-		std::cout << i << std::endl;
+	//for (auto i : includePaths)
+		//std::cout << i << std::endl;
 
 	importer.import(programName);
 	std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
+    if (parse_only)
+        return 0;
 
 	//Do optimization, etc. here.
 	//None at this time, instead going straight to C in this first (more naive) version
diff --git a/src/Importer.cpp b/src/Importer.cpp
index 9f450fa..09b4142 100644
--- a/src/Importer.cpp
+++ b/src/Importer.cpp
@@ -1,12 +1,15 @@
 #include "Importer.h"
 
-Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn) {
+Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
+    only_parse = only_parseIn;
 	//constructor
     outputName = outputNameIn;
 
-    if (mkdir(("./" + outputName).c_str(), 0755)) {
-        std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
-        std::cerr << "Could not make directory " << outputName << std::endl;
+    if (!only_parse) {
+        if (mkdir(("./" + outputName).c_str(), 0755)) {
+            //std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
+            //std::cerr << "Could not make directory " << outputName << std::endl;
+        }
     }
 
 	parser = parserIn;
@@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Sy
 }
 
 NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
-	std::cout << "\n\nImporting " << fileName << " ";
+	//std::cout << "\n\nImporting " << fileName << " ";
 	//Check to see if we've already done it
 	if (imported.find(fileName) != imported.end()) {
-		std::cout << "Already Imported!" << std::endl;
+		//std::cout << "Already Imported!" << std::endl;
 		return imported[fileName];
 	}
-	std::cout << "Not yet imported" << std::endl;
+	//std::cout << "Not yet imported" << std::endl;
 
 	return NULL;
 }
@@ -87,7 +90,8 @@ NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
         if (!parseTree)
             return NULL;
 		//Call with ourself to allow the transformation to call us to import files that it needs
-		ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
+        if (!only_parse)
+            ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
 	}
 	return ast;
 }
@@ -97,8 +101,10 @@ void Importer::import(std::string fileName) {
 	//Start the ball rolling by importing and running the first pass on the first file.
 	//This will import, first pass and register all the other files too.
 
-	std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
+	//std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
 	importFirstPass(fileName);	//First pass defines all objects
+    if (only_parse)
+        return;
 
 	std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
 	for (importTriplet i : importedTrips)					//Second pass defines data inside objects, outside declaration statements,
@@ -141,39 +147,41 @@ void Importer::import(std::string fileName) {
 NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
 
 	std::ifstream programInFile;
-	std::ofstream outFile, outFileTransformed;
+	//std::ofstream outFile, outFileTransformed;
 
-
-    std::cout << "outputName " << outputName << std::endl;
-    std::cout << "fileName " << fileName << std::endl;
+    //std::cout << "outputName " << outputName << std::endl;
+    //std::cout << "fileName " << fileName << std::endl;
 
     auto pathPieces = split(fileName, '/');
     std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
-    std::cout << "outputFileName " << outputFileName << std::endl;
+    //std::cout << "outputFileName " << outputFileName << std::endl;
 
+    std::string inputFileName;
 	for (auto i : includePaths) {
 		programInFile.open(i+fileName);
-		if (programInFile.is_open())
+		if (programInFile.is_open()) {
+            inputFileName = i+fileName;
 			break;
-		else
+        } else {
 			std::cout << i+fileName << " is no good" << std::endl;
+        }
 	}
 	if (!programInFile.is_open()) {
 		std::cout << "Problem opening programInFile " << fileName << "\n";
 		return NULL;
 	}
 
-	outFile.open(outputFileName);
-	if (!outFile.is_open()) {
-		std::cout << "Probelm opening output file " << outputFileName << "\n";
-		return NULL;
-	}
+	//outFile.open(outputFileName);
+	//if (!outFile.is_open()) {
+		//std::cout << "Probelm opening output file " << outputFileName << "\n";
+		//return NULL;
+	//}
 
-	outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
-	if (!outFileTransformed.is_open()) {
-		std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
-		return NULL;
-	}
+	//outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
+	//if (!outFileTransformed.is_open()) {
+		//std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
+		//return NULL;
+	//}
 
 	std::string programInputFileString, line;
 	while(programInFile.good()) {
@@ -183,18 +191,18 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
 	programInFile.close();
 
 	//std::cout << programInputFileString << std::endl;
-	NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
+	NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName);
 
 	if (parseTree) {
         //std::cout << parseTree->DOTGraphString() << std::endl;
 		//outFile << parseTree->DOTGraphString() << std::endl;
 	} else {
 		std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
-	    outFile.close(); outFileTransformed.close();
+		//outFile.close(); outFileTransformed.close();
         throw "unexceptablblllll";
         return NULL;
     }
-	outFile.close();
+	//outFile.close();
 
 	//Remove Transformations
 
@@ -211,7 +219,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
 	} else {
 		std::cout << "Tree returned from transformation is NULL!" << std::endl;
 	}
-	outFileTransformed.close();
+	//outFileTransformed.close();
 
     std::cout << "Returning parse tree" << std::endl;
 	return parseTree;
diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp
index b7bdd6b..869f8cf 100644
--- a/src/RNGLRParser.cpp
+++ b/src/RNGLRParser.cpp
@@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) {
     }
 }
 
-NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
+NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename) {
 	input.clear();
 	gss.clear();
 	while(!toReduce.empty()) toReduce.pop();
@@ -30,6 +30,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 	SPPFStepNodes.clear();
 	nullableParts.clear();
 	packedMap.clear();
+    bool errord = false;
 
 	//Check for no tokens
 	bool accepting = false;
@@ -52,16 +53,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 	lexer.setInput(inputString);
 	//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
 	//It could be converted to on-line later.
+    int tokenNum = 1;
 	Symbol currentToken = lexer.next();
 	input.push_back(currentToken);
 	while (currentToken != EOFSymbol) {
 		currentToken = lexer.next();
 		//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
 		if (currentToken == invalidSymbol) {
+			std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
+            errord = true;
+            std::cerr << "lex error" << std::endl;
 			std::cerr << "Invalid Symbol!" << std::endl;
 			throw "Invalid Symbol, cannot lex";
 		}
 		input.push_back(currentToken);
+        tokenNum++;
 	}
 
 	// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
@@ -99,8 +105,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 		// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
 		if (gss.frontierIsEmpty(i)) {
 			//std::cout << "Frontier " << i << " is empty." << std::endl;
-			std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
-			std::cerr << "Problem is on line: " << findLine(i) << std::endl;
+			//std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
+			//std::cerr << "Problem is on line: " << findLine(i) << std::endl;
+			std::cerr << filename << ":" << findLine(i) << std::endl;
+            errord = true;
+            std::cerr << "parse error" << std::endl;
 			std::cerr << "Nearby is:" << std::endl;
 			int range = 10;
 			for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
@@ -138,6 +147,12 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
 		return gss.getEdge(accState, v0);
 	}
 
+    if (!errord) {
+        std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
+        std::cerr << "parse error" << std::endl;
+        std::cerr << "Nearby is:" << std::endl;
+    }
+
 	std::cerr << "Rejected!" << std::endl;
 	// std::cout << "GSS:\n" << gss.toString() << std::endl;
 	return NULL;
@@ -522,7 +537,7 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
 }
 
 int RNGLRParser::findLine(int tokenNum) {
-	int lineNo = 0;
+	int lineNo = 1;
 	for (int i = 0; i < tokenNum; i++) {
 		std::string tokenString = input[i].getValue();
 		for (int j = 0; j < tokenString.size(); j++)
diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak
index 8062431..6cfcadf 100644
--- a/stdlib/grammer.krak
+++ b/stdlib/grammer.krak
@@ -2,6 +2,29 @@ import string
 import vector
 import set
 import symbol
+import regex
+
+obj grammer (Object) {
+    var rules: vector::vector<rule>
+    var regexs: set::set<regex>
+
+    fun construct(): grammer* {
+        rules.construct()
+        regexs.construct()
+    }
+    fun copy_construct(old: grammer*) {
+        rules.copy_construct(&old->rules)
+        regexs.copy_construct(&old->regexs)
+    }
+    fun operator=(other: grammer) {
+        destruct()
+        copy_construct(&other)
+    }
+    fun destruct() {
+        rules.destruct()
+        regexs.destruct()
+    }
+}
 
 obj rule (Object) {
     var lhs: symbol::symbol
@@ -16,10 +39,10 @@ obj rule (Object) {
         lookahead.construct()
     }
     fun copy_construct(old: rule*) {
-        lhs.copy_construct(&rule->lhs)
-        rhs.copy_construct(&rule->rhs)
-        position = rule->position
-        lookahead.copy_construct(&rule->lookahead)
+        lhs.copy_construct(&other->lhs)
+        rhs.copy_construct(&other->rhs)
+        position = other->position
+        lookahead.copy_construct(&other->lookahead)
     }
     fun operator=(other: rule) {
         destruct()
diff --git a/stdlib/io.krak b/stdlib/io.krak
index 86e471a..f74865e 100644
--- a/stdlib/io.krak
+++ b/stdlib/io.krak
@@ -1,4 +1,4 @@
-import string:*;
+import string;
 import mem:*
 
 __if_comp__ __C__ simple_passthrough """
@@ -32,7 +32,7 @@ fun print(toPrint: char) : void {
 	return;
 }
 
-fun print(toPrint: string) : void {
+fun print(toPrint: string::string) : void {
     var charArr = toPrint.toCharArray()
     defer delete(charArr)
     print(charArr);
@@ -73,3 +73,29 @@ fun print(toPrint: double) : void{
 	return;
 }
 
+// Ok, just some DEAD simple file io for now
+fun read_file(path: string::string): string::string {
+    var char_path = path.toCharArray()
+    defer delete(char_path)
+    var data: char*
+	__if_comp__ __C__ {
+		simple_passthrough(char_path = char_path:data = data:) """
+			FILE *fp = fopen(char_path, "r");
+            fseek(fp, 0L, SEEK_END);
+            long size = ftell(fp);
+            fseek(fp, 0L, SEEK_SET);
+            char *data = malloc(size+1);
+            size_t readSize = fread(data, 1, size, fp);
+            data[readSize] = 0;
+            fclose(fp);
+		"""
+	}
+    var toRet = string::string(data)
+	__if_comp__ __C__ {
+		simple_passthrough(data = data::) """
+            free(data)
+		"""
+	}
+    return toRet
+}
+
diff --git a/tests/syntax_error.krak b/tests/syntax_error.krak
new file mode 100644
index 0000000..2018115
--- /dev/null
+++ b/tests/syntax_error.krak
@@ -0,0 +1,7 @@
+import io
+
+fun main():int {
+    return 0
+}
+
+
diff --git a/tests/test_fileio.expected_results b/tests/test_fileio.expected_results
new file mode 100644
index 0000000..f2b3c31
--- /dev/null
+++ b/tests/test_fileio.expected_results
@@ -0,0 +1,3 @@
+this can be anything
+because it reads itself
+beautiful
diff --git a/tests/test_fileio.krak b/tests/test_fileio.krak
new file mode 100644
index 0000000..0d98486
--- /dev/null
+++ b/tests/test_fileio.krak
@@ -0,0 +1,7 @@
+import io:*
+import string:*
+
+fun main():int {
+    print(read_file(string("test_fileio.expected_results")))
+    return 0
+}