Some more work, and a --parse-only option to support the new kraken.vim vim plugin that adds Syntastic support (and syntax highlighting)

This commit is contained in:
Nathan Braswell
2015-07-03 18:34:46 -04:00
parent 2fcace72ed
commit b62c3e729f
12 changed files with 155 additions and 58 deletions

2
.gitignore vendored
View File

@@ -10,4 +10,4 @@ stats
*krakout*
kraklist.txt
.*.un~
tests/test_topLevelVarInit/
RNGLR.pdf

View File

@@ -20,7 +20,7 @@ class ASTTransformation;
class Importer {
public:
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName);
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
~Importer();
void import(std::string fileName);
NodeTree<ASTData>* getUnit(std::string fileName);
@@ -36,6 +36,7 @@ class Importer {
NodeTree<ASTData>* ast;
NodeTree<Symbol>* syntaxTree;
};
bool only_parse;
std::vector<importTriplet> importedTrips;
std::vector<std::string> includePaths;
Parser* parser;

View File

@@ -28,7 +28,7 @@ class Parser {
virtual void loadGrammer(std::string grammerInputString);
virtual void createStateSet();
virtual std::string stateSetToString();
virtual NodeTree<Symbol>* parseInput(std::string inputString) = 0;
virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting
virtual std::string grammerToString();
virtual std::string grammerToDOT();

View File

@@ -17,7 +17,7 @@ class RNGLRParser: public Parser {
public:
RNGLRParser();
~RNGLRParser();
NodeTree<Symbol>* parseInput(std::string inputString);
NodeTree<Symbol>* parseInput(std::string inputString, std::string filename); // filename for error reporting
void printReconstructedFrontier(int frontier);
private:

View File

@@ -29,6 +29,7 @@ int main(int argc, char* argv[]) {
std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
return 0;
}
std::string grammerFileString = "../krakenGrammer.kgm";
if (argc >= 2 && std::string(argv[1]) == "--test") {
StringReader::test();
@@ -40,7 +41,7 @@ int main(int argc, char* argv[]) {
if (argc >= 3) {
std::string testResults, line;
int passed = 0, failed = 0;
Tester test(argv[0], "../krakenGrammer.kgm");
Tester test(argv[0], grammerFileString);
// find the max length so we can pad the string and align the results
unsigned int maxLineLength = 0;
for (int i = 2; i < argc; i++) {
@@ -66,10 +67,16 @@ int main(int argc, char* argv[]) {
krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path
std::string grammerFileString = "../krakenGrammer.kgm";
std::string programName;
std::string outputName;
if (argc > 3) {
bool parse_only = false;
//std::cout << "argv[1] == " << argv[1] << std::endl;
if (std::string(argv[1]) == "--parse-only") {
parse_only = true;
grammerFileString = argv[2];
programName = argv[3];
//outputName = argv[3];
} else if (argc > 3) {
grammerFileString = argv[1];
programName = argv[2];
outputName = argv[3];
@@ -103,14 +110,13 @@ int main(int argc, char* argv[]) {
}
grammerInFile.close();
//LALRParser parser;
RNGLRParser parser;
parser.loadGrammer(grammerInputFileString);
//Start binary stuff
bool compGramGood = false;
if (compiledGrammerInFile.is_open()) {
std::cout << "Compiled grammer file exists, reading it in" << std::endl;
//std::cout << "Compiled grammer file exists, reading it in" << std::endl;
std::streampos compGramSize = compiledGrammerInFile.tellg();
char* binaryTablePointer = new char [compGramSize];
compiledGrammerInFile.seekg(0, std::ios::beg);
@@ -118,7 +124,7 @@ int main(int argc, char* argv[]) {
compiledGrammerInFile.close();
//Check magic number
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
//std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
int gramStringLength = *((int*)(binaryTablePointer+4));
//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
@@ -128,7 +134,7 @@ int main(int argc, char* argv[]) {
std::cout << "The Grammer has been changed, will re-create" << std::endl;
} else {
compGramGood = true;
std::cout << "Grammer file is up to date." << std::endl;
//std::cout << "Grammer file is up to date." << std::endl;
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
}
} else {
@@ -158,17 +164,18 @@ int main(int argc, char* argv[]) {
}
//End binary stuff
std::cout << "\nParsing" << std::endl;
//std::cout << "\nParsing" << std::endl;
//std::cout << "\toutput name: " << outputName << std::endl;
//std::cout << "\tprogram name: " << programName << std::endl;
Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in
std::cout << "\n output name: " << outputName << std::endl;
std::cout << "\n program name: " << programName << std::endl;
Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in
for (auto i : includePaths)
std::cout << i << std::endl;
//for (auto i : includePaths)
//std::cout << i << std::endl;
importer.import(programName);
std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
if (parse_only)
return 0;
//Do optimization, etc. here.
//None at this time, instead going straight to C in this first (more naive) version

View File

@@ -1,12 +1,15 @@
#include "Importer.h"
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn) {
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
only_parse = only_parseIn;
//constructor
outputName = outputNameIn;
if (mkdir(("./" + outputName).c_str(), 0755)) {
std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
std::cerr << "Could not make directory " << outputName << std::endl;
if (!only_parse) {
if (mkdir(("./" + outputName).c_str(), 0755)) {
//std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
//std::cerr << "Could not make directory " << outputName << std::endl;
}
}
parser = parserIn;
@@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Sy
}
NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
std::cout << "\n\nImporting " << fileName << " ";
//std::cout << "\n\nImporting " << fileName << " ";
//Check to see if we've already done it
if (imported.find(fileName) != imported.end()) {
std::cout << "Already Imported!" << std::endl;
//std::cout << "Already Imported!" << std::endl;
return imported[fileName];
}
std::cout << "Not yet imported" << std::endl;
//std::cout << "Not yet imported" << std::endl;
return NULL;
}
@@ -87,7 +90,8 @@ NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
if (!parseTree)
return NULL;
//Call with ourself to allow the transformation to call us to import files that it needs
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
if (!only_parse)
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
}
return ast;
}
@@ -97,8 +101,10 @@ void Importer::import(std::string fileName) {
//Start the ball rolling by importing and running the first pass on the first file.
//This will import, first pass and register all the other files too.
std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
//std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
importFirstPass(fileName); //First pass defines all objects
if (only_parse)
return;
std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements,
@@ -141,39 +147,41 @@ void Importer::import(std::string fileName) {
NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
std::ifstream programInFile;
std::ofstream outFile, outFileTransformed;
//std::ofstream outFile, outFileTransformed;
std::cout << "outputName " << outputName << std::endl;
std::cout << "fileName " << fileName << std::endl;
//std::cout << "outputName " << outputName << std::endl;
//std::cout << "fileName " << fileName << std::endl;
auto pathPieces = split(fileName, '/');
std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
std::cout << "outputFileName " << outputFileName << std::endl;
//std::cout << "outputFileName " << outputFileName << std::endl;
std::string inputFileName;
for (auto i : includePaths) {
programInFile.open(i+fileName);
if (programInFile.is_open())
if (programInFile.is_open()) {
inputFileName = i+fileName;
break;
else
} else {
std::cout << i+fileName << " is no good" << std::endl;
}
}
if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << fileName << "\n";
return NULL;
}
outFile.open(outputFileName);
if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << outputFileName << "\n";
return NULL;
}
//outFile.open(outputFileName);
//if (!outFile.is_open()) {
//std::cout << "Probelm opening output file " << outputFileName << "\n";
//return NULL;
//}
outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
if (!outFileTransformed.is_open()) {
std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
return NULL;
}
//outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
//if (!outFileTransformed.is_open()) {
//std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
//return NULL;
//}
std::string programInputFileString, line;
while(programInFile.good()) {
@@ -183,18 +191,18 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
programInFile.close();
//std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName);
if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl;
//outFile << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
outFile.close(); outFileTransformed.close();
//outFile.close(); outFileTransformed.close();
throw "unexceptablblllll";
return NULL;
}
outFile.close();
//outFile.close();
//Remove Transformations
@@ -211,7 +219,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
outFileTransformed.close();
//outFileTransformed.close();
std::cout << "Returning parse tree" << std::endl;
return parseTree;

View File

@@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) {
}
}
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename) {
input.clear();
gss.clear();
while(!toReduce.empty()) toReduce.pop();
@@ -30,6 +30,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
SPPFStepNodes.clear();
nullableParts.clear();
packedMap.clear();
bool errord = false;
//Check for no tokens
bool accepting = false;
@@ -52,16 +53,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later.
int tokenNum = 1;
Symbol currentToken = lexer.next();
input.push_back(currentToken);
while (currentToken != EOFSymbol) {
currentToken = lexer.next();
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
if (currentToken == invalidSymbol) {
std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
errord = true;
std::cerr << "lex error" << std::endl;
std::cerr << "Invalid Symbol!" << std::endl;
throw "Invalid Symbol, cannot lex";
}
input.push_back(currentToken);
tokenNum++;
}
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
@@ -99,8 +105,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) {
//std::cout << "Frontier " << i << " is empty." << std::endl;
std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
std::cerr << "Problem is on line: " << findLine(i) << std::endl;
//std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
//std::cerr << "Problem is on line: " << findLine(i) << std::endl;
std::cerr << filename << ":" << findLine(i) << std::endl;
errord = true;
std::cerr << "parse error" << std::endl;
std::cerr << "Nearby is:" << std::endl;
int range = 10;
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
@@ -138,6 +147,12 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
return gss.getEdge(accState, v0);
}
if (!errord) {
std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
std::cerr << "parse error" << std::endl;
std::cerr << "Nearby is:" << std::endl;
}
std::cerr << "Rejected!" << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl;
return NULL;
@@ -522,7 +537,7 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
}
int RNGLRParser::findLine(int tokenNum) {
int lineNo = 0;
int lineNo = 1;
for (int i = 0; i < tokenNum; i++) {
std::string tokenString = input[i].getValue();
for (int j = 0; j < tokenString.size(); j++)

View File

@@ -2,6 +2,29 @@ import string
import vector
import set
import symbol
import regex
obj grammer (Object) {
var rules: vector::vector<rule>
var regexs: set::set<regex>
fun construct(): grammer* {
rules.construct()
regexs.construct()
}
fun copy_construct(old: grammer*) {
rules.copy_construct(&old->rules)
regexs.copy_construct(&old->regexs)
}
fun operator=(other: grammer) {
destruct()
copy_construct(&other)
}
fun destruct() {
rules.destruct()
regexs.destruct()
}
}
obj rule (Object) {
var lhs: symbol::symbol
@@ -16,10 +39,10 @@ obj rule (Object) {
lookahead.construct()
}
fun copy_construct(old: rule*) {
lhs.copy_construct(&rule->lhs)
rhs.copy_construct(&rule->rhs)
position = rule->position
lookahead.copy_construct(&rule->lookahead)
lhs.copy_construct(&other->lhs)
rhs.copy_construct(&other->rhs)
position = other->position
lookahead.copy_construct(&other->lookahead)
}
fun operator=(other: rule) {
destruct()

View File

@@ -1,4 +1,4 @@
import string:*;
import string;
import mem:*
__if_comp__ __C__ simple_passthrough """
@@ -32,7 +32,7 @@ fun print(toPrint: char) : void {
return;
}
fun print(toPrint: string) : void {
fun print(toPrint: string::string) : void {
var charArr = toPrint.toCharArray()
defer delete(charArr)
print(charArr);
@@ -73,3 +73,29 @@ fun print(toPrint: double) : void{
return;
}
// Ok, just some DEAD simple file io for now
fun read_file(path: string::string): string::string {
var char_path = path.toCharArray()
defer delete(char_path)
var data: char*
__if_comp__ __C__ {
simple_passthrough(char_path = char_path:data = data:) """
FILE *fp = fopen(char_path, "r");
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
char *data = malloc(size+1);
size_t readSize = fread(data, 1, size, fp);
data[readSize] = 0;
fclose(fp);
"""
}
var toRet = string::string(data)
__if_comp__ __C__ {
simple_passthrough(data = data::) """
free(data)
"""
}
return toRet
}

7
tests/syntax_error.krak Normal file
View File

@@ -0,0 +1,7 @@
import io
fun main():int {
return 0
}

View File

@@ -0,0 +1,3 @@
this can be anything
because it reads itself
beautiful

7
tests/test_fileio.krak Normal file
View File

@@ -0,0 +1,7 @@
import io:*
import string:*
fun main():int {
print(read_file(string("test_fileio.expected_results")))
return 0
}