Some more work, and a --parse-only option to support the new kraken.vim vim plugin that adds Syntastic support (and syntax highlighting)

This commit is contained in:
Nathan Braswell
2015-07-03 18:34:46 -04:00
parent 2fcace72ed
commit b62c3e729f
12 changed files with 155 additions and 58 deletions

2
.gitignore vendored
View File

@@ -10,4 +10,4 @@ stats
*krakout* *krakout*
kraklist.txt kraklist.txt
.*.un~ .*.un~
tests/test_topLevelVarInit/ RNGLR.pdf

View File

@@ -20,7 +20,7 @@ class ASTTransformation;
class Importer { class Importer {
public: public:
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName); Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
~Importer(); ~Importer();
void import(std::string fileName); void import(std::string fileName);
NodeTree<ASTData>* getUnit(std::string fileName); NodeTree<ASTData>* getUnit(std::string fileName);
@@ -36,6 +36,7 @@ class Importer {
NodeTree<ASTData>* ast; NodeTree<ASTData>* ast;
NodeTree<Symbol>* syntaxTree; NodeTree<Symbol>* syntaxTree;
}; };
bool only_parse;
std::vector<importTriplet> importedTrips; std::vector<importTriplet> importedTrips;
std::vector<std::string> includePaths; std::vector<std::string> includePaths;
Parser* parser; Parser* parser;

View File

@@ -28,7 +28,7 @@ class Parser {
virtual void loadGrammer(std::string grammerInputString); virtual void loadGrammer(std::string grammerInputString);
virtual void createStateSet(); virtual void createStateSet();
virtual std::string stateSetToString(); virtual std::string stateSetToString();
virtual NodeTree<Symbol>* parseInput(std::string inputString) = 0; virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting
virtual std::string grammerToString(); virtual std::string grammerToString();
virtual std::string grammerToDOT(); virtual std::string grammerToDOT();

View File

@@ -17,7 +17,7 @@ class RNGLRParser: public Parser {
public: public:
RNGLRParser(); RNGLRParser();
~RNGLRParser(); ~RNGLRParser();
NodeTree<Symbol>* parseInput(std::string inputString); NodeTree<Symbol>* parseInput(std::string inputString, std::string filename); // filename for error reporting
void printReconstructedFrontier(int frontier); void printReconstructedFrontier(int frontier);
private: private:

View File

@@ -29,6 +29,7 @@ int main(int argc, char* argv[]) {
std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl; std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
return 0; return 0;
} }
std::string grammerFileString = "../krakenGrammer.kgm";
if (argc >= 2 && std::string(argv[1]) == "--test") { if (argc >= 2 && std::string(argv[1]) == "--test") {
StringReader::test(); StringReader::test();
@@ -40,7 +41,7 @@ int main(int argc, char* argv[]) {
if (argc >= 3) { if (argc >= 3) {
std::string testResults, line; std::string testResults, line;
int passed = 0, failed = 0; int passed = 0, failed = 0;
Tester test(argv[0], "../krakenGrammer.kgm"); Tester test(argv[0], grammerFileString);
// find the max length so we can pad the string and align the results // find the max length so we can pad the string and align the results
unsigned int maxLineLength = 0; unsigned int maxLineLength = 0;
for (int i = 2; i < argc; i++) { for (int i = 2; i < argc; i++) {
@@ -66,10 +67,16 @@ int main(int argc, char* argv[]) {
krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1)); krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path
std::string grammerFileString = "../krakenGrammer.kgm";
std::string programName; std::string programName;
std::string outputName; std::string outputName;
if (argc > 3) { bool parse_only = false;
//std::cout << "argv[1] == " << argv[1] << std::endl;
if (std::string(argv[1]) == "--parse-only") {
parse_only = true;
grammerFileString = argv[2];
programName = argv[3];
//outputName = argv[3];
} else if (argc > 3) {
grammerFileString = argv[1]; grammerFileString = argv[1];
programName = argv[2]; programName = argv[2];
outputName = argv[3]; outputName = argv[3];
@@ -103,14 +110,13 @@ int main(int argc, char* argv[]) {
} }
grammerInFile.close(); grammerInFile.close();
//LALRParser parser;
RNGLRParser parser; RNGLRParser parser;
parser.loadGrammer(grammerInputFileString); parser.loadGrammer(grammerInputFileString);
//Start binary stuff //Start binary stuff
bool compGramGood = false; bool compGramGood = false;
if (compiledGrammerInFile.is_open()) { if (compiledGrammerInFile.is_open()) {
std::cout << "Compiled grammer file exists, reading it in" << std::endl; //std::cout << "Compiled grammer file exists, reading it in" << std::endl;
std::streampos compGramSize = compiledGrammerInFile.tellg(); std::streampos compGramSize = compiledGrammerInFile.tellg();
char* binaryTablePointer = new char [compGramSize]; char* binaryTablePointer = new char [compGramSize];
compiledGrammerInFile.seekg(0, std::ios::beg); compiledGrammerInFile.seekg(0, std::ios::beg);
@@ -118,7 +124,7 @@ int main(int argc, char* argv[]) {
compiledGrammerInFile.close(); compiledGrammerInFile.close();
//Check magic number //Check magic number
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
std::cout << "Valid Kraken Compiled Grammer File" << std::endl; //std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
int gramStringLength = *((int*)(binaryTablePointer+4)); int gramStringLength = *((int*)(binaryTablePointer+4));
//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " //std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; //<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
@@ -128,7 +134,7 @@ int main(int argc, char* argv[]) {
std::cout << "The Grammer has been changed, will re-create" << std::endl; std::cout << "The Grammer has been changed, will re-create" << std::endl;
} else { } else {
compGramGood = true; compGramGood = true;
std::cout << "Grammer file is up to date." << std::endl; //std::cout << "Grammer file is up to date." << std::endl;
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
} }
} else { } else {
@@ -158,17 +164,18 @@ int main(int argc, char* argv[]) {
} }
//End binary stuff //End binary stuff
std::cout << "\nParsing" << std::endl; //std::cout << "\nParsing" << std::endl;
//std::cout << "\toutput name: " << outputName << std::endl;
//std::cout << "\tprogram name: " << programName << std::endl;
Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in
std::cout << "\n output name: " << outputName << std::endl; //for (auto i : includePaths)
std::cout << "\n program name: " << programName << std::endl; //std::cout << i << std::endl;
Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in
for (auto i : includePaths)
std::cout << i << std::endl;
importer.import(programName); importer.import(programName);
std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap(); std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
if (parse_only)
return 0;
//Do optimization, etc. here. //Do optimization, etc. here.
//None at this time, instead going straight to C in this first (more naive) version //None at this time, instead going straight to C in this first (more naive) version

View File

@@ -1,12 +1,15 @@
#include "Importer.h" #include "Importer.h"
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn) { Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
only_parse = only_parseIn;
//constructor //constructor
outputName = outputNameIn; outputName = outputNameIn;
if (mkdir(("./" + outputName).c_str(), 0755)) { if (!only_parse) {
std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl; if (mkdir(("./" + outputName).c_str(), 0755)) {
std::cerr << "Could not make directory " << outputName << std::endl; //std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
//std::cerr << "Could not make directory " << outputName << std::endl;
}
} }
parser = parserIn; parser = parserIn;
@@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Sy
} }
NodeTree<ASTData>* Importer::getUnit(std::string fileName) { NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
std::cout << "\n\nImporting " << fileName << " "; //std::cout << "\n\nImporting " << fileName << " ";
//Check to see if we've already done it //Check to see if we've already done it
if (imported.find(fileName) != imported.end()) { if (imported.find(fileName) != imported.end()) {
std::cout << "Already Imported!" << std::endl; //std::cout << "Already Imported!" << std::endl;
return imported[fileName]; return imported[fileName];
} }
std::cout << "Not yet imported" << std::endl; //std::cout << "Not yet imported" << std::endl;
return NULL; return NULL;
} }
@@ -87,7 +90,8 @@ NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
if (!parseTree) if (!parseTree)
return NULL; return NULL;
//Call with ourself to allow the transformation to call us to import files that it needs //Call with ourself to allow the transformation to call us to import files that it needs
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself if (!only_parse)
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
} }
return ast; return ast;
} }
@@ -97,8 +101,10 @@ void Importer::import(std::string fileName) {
//Start the ball rolling by importing and running the first pass on the first file. //Start the ball rolling by importing and running the first pass on the first file.
//This will import, first pass and register all the other files too. //This will import, first pass and register all the other files too.
std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl; //std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
importFirstPass(fileName); //First pass defines all objects importFirstPass(fileName); //First pass defines all objects
if (only_parse)
return;
std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl; std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements, for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements,
@@ -141,39 +147,41 @@ void Importer::import(std::string fileName) {
NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) { NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
std::ifstream programInFile; std::ifstream programInFile;
std::ofstream outFile, outFileTransformed; //std::ofstream outFile, outFileTransformed;
//std::cout << "outputName " << outputName << std::endl;
std::cout << "outputName " << outputName << std::endl; //std::cout << "fileName " << fileName << std::endl;
std::cout << "fileName " << fileName << std::endl;
auto pathPieces = split(fileName, '/'); auto pathPieces = split(fileName, '/');
std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out"; std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
std::cout << "outputFileName " << outputFileName << std::endl; //std::cout << "outputFileName " << outputFileName << std::endl;
std::string inputFileName;
for (auto i : includePaths) { for (auto i : includePaths) {
programInFile.open(i+fileName); programInFile.open(i+fileName);
if (programInFile.is_open()) if (programInFile.is_open()) {
inputFileName = i+fileName;
break; break;
else } else {
std::cout << i+fileName << " is no good" << std::endl; std::cout << i+fileName << " is no good" << std::endl;
}
} }
if (!programInFile.is_open()) { if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << fileName << "\n"; std::cout << "Problem opening programInFile " << fileName << "\n";
return NULL; return NULL;
} }
outFile.open(outputFileName); //outFile.open(outputFileName);
if (!outFile.is_open()) { //if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << outputFileName << "\n"; //std::cout << "Probelm opening output file " << outputFileName << "\n";
return NULL; //return NULL;
} //}
outFileTransformed.open((outputFileName + ".transformed.dot").c_str()); //outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
if (!outFileTransformed.is_open()) { //if (!outFileTransformed.is_open()) {
std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n"; //std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
return NULL; //return NULL;
} //}
std::string programInputFileString, line; std::string programInputFileString, line;
while(programInFile.good()) { while(programInFile.good()) {
@@ -183,18 +191,18 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
programInFile.close(); programInFile.close();
//std::cout << programInputFileString << std::endl; //std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString); NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName);
if (parseTree) { if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl; //std::cout << parseTree->DOTGraphString() << std::endl;
//outFile << parseTree->DOTGraphString() << std::endl; //outFile << parseTree->DOTGraphString() << std::endl;
} else { } else {
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl; std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
outFile.close(); outFileTransformed.close(); //outFile.close(); outFileTransformed.close();
throw "unexceptablblllll"; throw "unexceptablblllll";
return NULL; return NULL;
} }
outFile.close(); //outFile.close();
//Remove Transformations //Remove Transformations
@@ -211,7 +219,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
} else { } else {
std::cout << "Tree returned from transformation is NULL!" << std::endl; std::cout << "Tree returned from transformation is NULL!" << std::endl;
} }
outFileTransformed.close(); //outFileTransformed.close();
std::cout << "Returning parse tree" << std::endl; std::cout << "Returning parse tree" << std::endl;
return parseTree; return parseTree;

View File

@@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) {
} }
} }
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) { NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename) {
input.clear(); input.clear();
gss.clear(); gss.clear();
while(!toReduce.empty()) toReduce.pop(); while(!toReduce.empty()) toReduce.pop();
@@ -30,6 +30,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
SPPFStepNodes.clear(); SPPFStepNodes.clear();
nullableParts.clear(); nullableParts.clear();
packedMap.clear(); packedMap.clear();
bool errord = false;
//Check for no tokens //Check for no tokens
bool accepting = false; bool accepting = false;
@@ -52,16 +53,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
lexer.setInput(inputString); lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation. //Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later. //It could be converted to on-line later.
int tokenNum = 1;
Symbol currentToken = lexer.next(); Symbol currentToken = lexer.next();
input.push_back(currentToken); input.push_back(currentToken);
while (currentToken != EOFSymbol) { while (currentToken != EOFSymbol) {
currentToken = lexer.next(); currentToken = lexer.next();
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl; //std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
if (currentToken == invalidSymbol) { if (currentToken == invalidSymbol) {
std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
errord = true;
std::cerr << "lex error" << std::endl;
std::cerr << "Invalid Symbol!" << std::endl; std::cerr << "Invalid Symbol!" << std::endl;
throw "Invalid Symbol, cannot lex"; throw "Invalid Symbol, cannot lex";
} }
input.push_back(currentToken); input.push_back(currentToken);
tokenNum++;
} }
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl; // std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
@@ -99,8 +105,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << "Checking if frontier " << i << " is empty" << std::endl; // std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) { if (gss.frontierIsEmpty(i)) {
//std::cout << "Frontier " << i << " is empty." << std::endl; //std::cout << "Frontier " << i << " is empty." << std::endl;
std::cerr << "Parsing failed on " << input[i].toString() << std::endl; //std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
std::cerr << "Problem is on line: " << findLine(i) << std::endl; //std::cerr << "Problem is on line: " << findLine(i) << std::endl;
std::cerr << filename << ":" << findLine(i) << std::endl;
errord = true;
std::cerr << "parse error" << std::endl;
std::cerr << "Nearby is:" << std::endl; std::cerr << "Nearby is:" << std::endl;
int range = 10; int range = 10;
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
@@ -138,6 +147,12 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
return gss.getEdge(accState, v0); return gss.getEdge(accState, v0);
} }
if (!errord) {
std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
std::cerr << "parse error" << std::endl;
std::cerr << "Nearby is:" << std::endl;
}
std::cerr << "Rejected!" << std::endl; std::cerr << "Rejected!" << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl; // std::cout << "GSS:\n" << gss.toString() << std::endl;
return NULL; return NULL;
@@ -522,7 +537,7 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
} }
int RNGLRParser::findLine(int tokenNum) { int RNGLRParser::findLine(int tokenNum) {
int lineNo = 0; int lineNo = 1;
for (int i = 0; i < tokenNum; i++) { for (int i = 0; i < tokenNum; i++) {
std::string tokenString = input[i].getValue(); std::string tokenString = input[i].getValue();
for (int j = 0; j < tokenString.size(); j++) for (int j = 0; j < tokenString.size(); j++)

View File

@@ -2,6 +2,29 @@ import string
import vector import vector
import set import set
import symbol import symbol
import regex
obj grammer (Object) {
var rules: vector::vector<rule>
var regexs: set::set<regex>
fun construct(): grammer* {
rules.construct()
regexs.construct()
}
fun copy_construct(old: grammer*) {
rules.copy_construct(&old->rules)
regexs.copy_construct(&old->regexs)
}
fun operator=(other: grammer) {
destruct()
copy_construct(&other)
}
fun destruct() {
rules.destruct()
regexs.destruct()
}
}
obj rule (Object) { obj rule (Object) {
var lhs: symbol::symbol var lhs: symbol::symbol
@@ -16,10 +39,10 @@ obj rule (Object) {
lookahead.construct() lookahead.construct()
} }
fun copy_construct(old: rule*) { fun copy_construct(old: rule*) {
lhs.copy_construct(&rule->lhs) lhs.copy_construct(&other->lhs)
rhs.copy_construct(&rule->rhs) rhs.copy_construct(&other->rhs)
position = rule->position position = other->position
lookahead.copy_construct(&rule->lookahead) lookahead.copy_construct(&other->lookahead)
} }
fun operator=(other: rule) { fun operator=(other: rule) {
destruct() destruct()

View File

@@ -1,4 +1,4 @@
import string:*; import string;
import mem:* import mem:*
__if_comp__ __C__ simple_passthrough """ __if_comp__ __C__ simple_passthrough """
@@ -32,7 +32,7 @@ fun print(toPrint: char) : void {
return; return;
} }
fun print(toPrint: string) : void { fun print(toPrint: string::string) : void {
var charArr = toPrint.toCharArray() var charArr = toPrint.toCharArray()
defer delete(charArr) defer delete(charArr)
print(charArr); print(charArr);
@@ -73,3 +73,29 @@ fun print(toPrint: double) : void{
return; return;
} }
// Ok, just some DEAD simple file io for now
fun read_file(path: string::string): string::string {
var char_path = path.toCharArray()
defer delete(char_path)
var data: char*
__if_comp__ __C__ {
simple_passthrough(char_path = char_path:data = data:) """
FILE *fp = fopen(char_path, "r");
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
char *data = malloc(size+1);
size_t readSize = fread(data, 1, size, fp);
data[readSize] = 0;
fclose(fp);
"""
}
var toRet = string::string(data)
__if_comp__ __C__ {
simple_passthrough(data = data::) """
free(data)
"""
}
return toRet
}

7
tests/syntax_error.krak Normal file
View File

@@ -0,0 +1,7 @@
import io
fun main():int {
return 0
}

View File

@@ -0,0 +1,3 @@
this can be anything
because it reads itself
beautiful

7
tests/test_fileio.krak Normal file
View File

@@ -0,0 +1,7 @@
import io:*
import string:*
fun main():int {
print(read_file(string("test_fileio.expected_results")))
return 0
}