Some more work, and a --parse-only option to support the new kraken.vim vim plugin that adds Syntastic support (and syntax highlighting)
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,4 +10,4 @@ stats
|
||||
*krakout*
|
||||
kraklist.txt
|
||||
.*.un~
|
||||
tests/test_topLevelVarInit/
|
||||
RNGLR.pdf
|
||||
|
||||
@@ -20,7 +20,7 @@ class ASTTransformation;
|
||||
|
||||
class Importer {
|
||||
public:
|
||||
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName);
|
||||
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
|
||||
~Importer();
|
||||
void import(std::string fileName);
|
||||
NodeTree<ASTData>* getUnit(std::string fileName);
|
||||
@@ -36,6 +36,7 @@ class Importer {
|
||||
NodeTree<ASTData>* ast;
|
||||
NodeTree<Symbol>* syntaxTree;
|
||||
};
|
||||
bool only_parse;
|
||||
std::vector<importTriplet> importedTrips;
|
||||
std::vector<std::string> includePaths;
|
||||
Parser* parser;
|
||||
|
||||
@@ -28,7 +28,7 @@ class Parser {
|
||||
virtual void loadGrammer(std::string grammerInputString);
|
||||
virtual void createStateSet();
|
||||
virtual std::string stateSetToString();
|
||||
virtual NodeTree<Symbol>* parseInput(std::string inputString) = 0;
|
||||
virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename) = 0; // filename for error reporting
|
||||
virtual std::string grammerToString();
|
||||
virtual std::string grammerToDOT();
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ class RNGLRParser: public Parser {
|
||||
public:
|
||||
RNGLRParser();
|
||||
~RNGLRParser();
|
||||
NodeTree<Symbol>* parseInput(std::string inputString);
|
||||
NodeTree<Symbol>* parseInput(std::string inputString, std::string filename); // filename for error reporting
|
||||
void printReconstructedFrontier(int frontier);
|
||||
|
||||
private:
|
||||
|
||||
35
main.cpp
35
main.cpp
@@ -29,6 +29,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
std::string grammerFileString = "../krakenGrammer.kgm";
|
||||
|
||||
if (argc >= 2 && std::string(argv[1]) == "--test") {
|
||||
StringReader::test();
|
||||
@@ -40,7 +41,7 @@ int main(int argc, char* argv[]) {
|
||||
if (argc >= 3) {
|
||||
std::string testResults, line;
|
||||
int passed = 0, failed = 0;
|
||||
Tester test(argv[0], "../krakenGrammer.kgm");
|
||||
Tester test(argv[0], grammerFileString);
|
||||
// find the max length so we can pad the string and align the results
|
||||
unsigned int maxLineLength = 0;
|
||||
for (int i = 2; i < argc; i++) {
|
||||
@@ -66,10 +67,16 @@ int main(int argc, char* argv[]) {
|
||||
krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
|
||||
includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path
|
||||
|
||||
std::string grammerFileString = "../krakenGrammer.kgm";
|
||||
std::string programName;
|
||||
std::string outputName;
|
||||
if (argc > 3) {
|
||||
bool parse_only = false;
|
||||
//std::cout << "argv[1] == " << argv[1] << std::endl;
|
||||
if (std::string(argv[1]) == "--parse-only") {
|
||||
parse_only = true;
|
||||
grammerFileString = argv[2];
|
||||
programName = argv[3];
|
||||
//outputName = argv[3];
|
||||
} else if (argc > 3) {
|
||||
grammerFileString = argv[1];
|
||||
programName = argv[2];
|
||||
outputName = argv[3];
|
||||
@@ -103,14 +110,13 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
grammerInFile.close();
|
||||
|
||||
//LALRParser parser;
|
||||
RNGLRParser parser;
|
||||
parser.loadGrammer(grammerInputFileString);
|
||||
|
||||
//Start binary stuff
|
||||
bool compGramGood = false;
|
||||
if (compiledGrammerInFile.is_open()) {
|
||||
std::cout << "Compiled grammer file exists, reading it in" << std::endl;
|
||||
//std::cout << "Compiled grammer file exists, reading it in" << std::endl;
|
||||
std::streampos compGramSize = compiledGrammerInFile.tellg();
|
||||
char* binaryTablePointer = new char [compGramSize];
|
||||
compiledGrammerInFile.seekg(0, std::ios::beg);
|
||||
@@ -118,7 +124,7 @@ int main(int argc, char* argv[]) {
|
||||
compiledGrammerInFile.close();
|
||||
//Check magic number
|
||||
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
|
||||
std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
|
||||
//std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
|
||||
int gramStringLength = *((int*)(binaryTablePointer+4));
|
||||
//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
|
||||
//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
|
||||
@@ -128,7 +134,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "The Grammer has been changed, will re-create" << std::endl;
|
||||
} else {
|
||||
compGramGood = true;
|
||||
std::cout << "Grammer file is up to date." << std::endl;
|
||||
//std::cout << "Grammer file is up to date." << std::endl;
|
||||
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
|
||||
}
|
||||
} else {
|
||||
@@ -158,17 +164,18 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
//End binary stuff
|
||||
|
||||
std::cout << "\nParsing" << std::endl;
|
||||
//std::cout << "\nParsing" << std::endl;
|
||||
//std::cout << "\toutput name: " << outputName << std::endl;
|
||||
//std::cout << "\tprogram name: " << programName << std::endl;
|
||||
Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in
|
||||
|
||||
std::cout << "\n output name: " << outputName << std::endl;
|
||||
std::cout << "\n program name: " << programName << std::endl;
|
||||
Importer importer(&parser, includePaths, outputName); // Output name for directory to put stuff in
|
||||
|
||||
for (auto i : includePaths)
|
||||
std::cout << i << std::endl;
|
||||
//for (auto i : includePaths)
|
||||
//std::cout << i << std::endl;
|
||||
|
||||
importer.import(programName);
|
||||
std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
|
||||
if (parse_only)
|
||||
return 0;
|
||||
|
||||
//Do optimization, etc. here.
|
||||
//None at this time, instead going straight to C in this first (more naive) version
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
#include "Importer.h"
|
||||
|
||||
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn) {
|
||||
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
|
||||
only_parse = only_parseIn;
|
||||
//constructor
|
||||
outputName = outputNameIn;
|
||||
|
||||
if (mkdir(("./" + outputName).c_str(), 0755)) {
|
||||
std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
|
||||
std::cerr << "Could not make directory " << outputName << std::endl;
|
||||
if (!only_parse) {
|
||||
if (mkdir(("./" + outputName).c_str(), 0755)) {
|
||||
//std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
|
||||
//std::cerr << "Could not make directory " << outputName << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
parser = parserIn;
|
||||
@@ -69,13 +72,13 @@ void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Sy
|
||||
}
|
||||
|
||||
NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
|
||||
std::cout << "\n\nImporting " << fileName << " ";
|
||||
//std::cout << "\n\nImporting " << fileName << " ";
|
||||
//Check to see if we've already done it
|
||||
if (imported.find(fileName) != imported.end()) {
|
||||
std::cout << "Already Imported!" << std::endl;
|
||||
//std::cout << "Already Imported!" << std::endl;
|
||||
return imported[fileName];
|
||||
}
|
||||
std::cout << "Not yet imported" << std::endl;
|
||||
//std::cout << "Not yet imported" << std::endl;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -87,7 +90,8 @@ NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
|
||||
if (!parseTree)
|
||||
return NULL;
|
||||
//Call with ourself to allow the transformation to call us to import files that it needs
|
||||
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
|
||||
if (!only_parse)
|
||||
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
|
||||
}
|
||||
return ast;
|
||||
}
|
||||
@@ -97,8 +101,10 @@ void Importer::import(std::string fileName) {
|
||||
//Start the ball rolling by importing and running the first pass on the first file.
|
||||
//This will import, first pass and register all the other files too.
|
||||
|
||||
std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
|
||||
//std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
|
||||
importFirstPass(fileName); //First pass defines all objects
|
||||
if (only_parse)
|
||||
return;
|
||||
|
||||
std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
|
||||
for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements,
|
||||
@@ -141,39 +147,41 @@ void Importer::import(std::string fileName) {
|
||||
NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
|
||||
|
||||
std::ifstream programInFile;
|
||||
std::ofstream outFile, outFileTransformed;
|
||||
//std::ofstream outFile, outFileTransformed;
|
||||
|
||||
|
||||
std::cout << "outputName " << outputName << std::endl;
|
||||
std::cout << "fileName " << fileName << std::endl;
|
||||
//std::cout << "outputName " << outputName << std::endl;
|
||||
//std::cout << "fileName " << fileName << std::endl;
|
||||
|
||||
auto pathPieces = split(fileName, '/');
|
||||
std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
|
||||
std::cout << "outputFileName " << outputFileName << std::endl;
|
||||
//std::cout << "outputFileName " << outputFileName << std::endl;
|
||||
|
||||
std::string inputFileName;
|
||||
for (auto i : includePaths) {
|
||||
programInFile.open(i+fileName);
|
||||
if (programInFile.is_open())
|
||||
if (programInFile.is_open()) {
|
||||
inputFileName = i+fileName;
|
||||
break;
|
||||
else
|
||||
} else {
|
||||
std::cout << i+fileName << " is no good" << std::endl;
|
||||
}
|
||||
}
|
||||
if (!programInFile.is_open()) {
|
||||
std::cout << "Problem opening programInFile " << fileName << "\n";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outFile.open(outputFileName);
|
||||
if (!outFile.is_open()) {
|
||||
std::cout << "Probelm opening output file " << outputFileName << "\n";
|
||||
return NULL;
|
||||
}
|
||||
//outFile.open(outputFileName);
|
||||
//if (!outFile.is_open()) {
|
||||
//std::cout << "Probelm opening output file " << outputFileName << "\n";
|
||||
//return NULL;
|
||||
//}
|
||||
|
||||
outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
|
||||
if (!outFileTransformed.is_open()) {
|
||||
std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
|
||||
return NULL;
|
||||
}
|
||||
//outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
|
||||
//if (!outFileTransformed.is_open()) {
|
||||
//std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
|
||||
//return NULL;
|
||||
//}
|
||||
|
||||
std::string programInputFileString, line;
|
||||
while(programInFile.good()) {
|
||||
@@ -183,18 +191,18 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
|
||||
programInFile.close();
|
||||
|
||||
//std::cout << programInputFileString << std::endl;
|
||||
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
|
||||
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName);
|
||||
|
||||
if (parseTree) {
|
||||
//std::cout << parseTree->DOTGraphString() << std::endl;
|
||||
//outFile << parseTree->DOTGraphString() << std::endl;
|
||||
} else {
|
||||
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
|
||||
outFile.close(); outFileTransformed.close();
|
||||
//outFile.close(); outFileTransformed.close();
|
||||
throw "unexceptablblllll";
|
||||
return NULL;
|
||||
}
|
||||
outFile.close();
|
||||
//outFile.close();
|
||||
|
||||
//Remove Transformations
|
||||
|
||||
@@ -211,7 +219,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
|
||||
} else {
|
||||
std::cout << "Tree returned from transformation is NULL!" << std::endl;
|
||||
}
|
||||
outFileTransformed.close();
|
||||
//outFileTransformed.close();
|
||||
|
||||
std::cout << "Returning parse tree" << std::endl;
|
||||
return parseTree;
|
||||
|
||||
@@ -22,7 +22,7 @@ void RNGLRParser::printReconstructedFrontier(int frontier) {
|
||||
}
|
||||
}
|
||||
|
||||
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename) {
|
||||
input.clear();
|
||||
gss.clear();
|
||||
while(!toReduce.empty()) toReduce.pop();
|
||||
@@ -30,6 +30,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
SPPFStepNodes.clear();
|
||||
nullableParts.clear();
|
||||
packedMap.clear();
|
||||
bool errord = false;
|
||||
|
||||
//Check for no tokens
|
||||
bool accepting = false;
|
||||
@@ -52,16 +53,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
lexer.setInput(inputString);
|
||||
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
|
||||
//It could be converted to on-line later.
|
||||
int tokenNum = 1;
|
||||
Symbol currentToken = lexer.next();
|
||||
input.push_back(currentToken);
|
||||
while (currentToken != EOFSymbol) {
|
||||
currentToken = lexer.next();
|
||||
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
|
||||
if (currentToken == invalidSymbol) {
|
||||
std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
|
||||
errord = true;
|
||||
std::cerr << "lex error" << std::endl;
|
||||
std::cerr << "Invalid Symbol!" << std::endl;
|
||||
throw "Invalid Symbol, cannot lex";
|
||||
}
|
||||
input.push_back(currentToken);
|
||||
tokenNum++;
|
||||
}
|
||||
|
||||
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
|
||||
@@ -99,8 +105,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
|
||||
if (gss.frontierIsEmpty(i)) {
|
||||
//std::cout << "Frontier " << i << " is empty." << std::endl;
|
||||
std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
|
||||
std::cerr << "Problem is on line: " << findLine(i) << std::endl;
|
||||
//std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
|
||||
//std::cerr << "Problem is on line: " << findLine(i) << std::endl;
|
||||
std::cerr << filename << ":" << findLine(i) << std::endl;
|
||||
errord = true;
|
||||
std::cerr << "parse error" << std::endl;
|
||||
std::cerr << "Nearby is:" << std::endl;
|
||||
int range = 10;
|
||||
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
|
||||
@@ -138,6 +147,12 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
return gss.getEdge(accState, v0);
|
||||
}
|
||||
|
||||
if (!errord) {
|
||||
std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
|
||||
std::cerr << "parse error" << std::endl;
|
||||
std::cerr << "Nearby is:" << std::endl;
|
||||
}
|
||||
|
||||
std::cerr << "Rejected!" << std::endl;
|
||||
// std::cout << "GSS:\n" << gss.toString() << std::endl;
|
||||
return NULL;
|
||||
@@ -522,7 +537,7 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
|
||||
}
|
||||
|
||||
int RNGLRParser::findLine(int tokenNum) {
|
||||
int lineNo = 0;
|
||||
int lineNo = 1;
|
||||
for (int i = 0; i < tokenNum; i++) {
|
||||
std::string tokenString = input[i].getValue();
|
||||
for (int j = 0; j < tokenString.size(); j++)
|
||||
|
||||
@@ -2,6 +2,29 @@ import string
|
||||
import vector
|
||||
import set
|
||||
import symbol
|
||||
import regex
|
||||
|
||||
obj grammer (Object) {
|
||||
var rules: vector::vector<rule>
|
||||
var regexs: set::set<regex>
|
||||
|
||||
fun construct(): grammer* {
|
||||
rules.construct()
|
||||
regexs.construct()
|
||||
}
|
||||
fun copy_construct(old: grammer*) {
|
||||
rules.copy_construct(&old->rules)
|
||||
regexs.copy_construct(&old->regexs)
|
||||
}
|
||||
fun operator=(other: grammer) {
|
||||
destruct()
|
||||
copy_construct(&other)
|
||||
}
|
||||
fun destruct() {
|
||||
rules.destruct()
|
||||
regexs.destruct()
|
||||
}
|
||||
}
|
||||
|
||||
obj rule (Object) {
|
||||
var lhs: symbol::symbol
|
||||
@@ -16,10 +39,10 @@ obj rule (Object) {
|
||||
lookahead.construct()
|
||||
}
|
||||
fun copy_construct(old: rule*) {
|
||||
lhs.copy_construct(&rule->lhs)
|
||||
rhs.copy_construct(&rule->rhs)
|
||||
position = rule->position
|
||||
lookahead.copy_construct(&rule->lookahead)
|
||||
lhs.copy_construct(&other->lhs)
|
||||
rhs.copy_construct(&other->rhs)
|
||||
position = other->position
|
||||
lookahead.copy_construct(&other->lookahead)
|
||||
}
|
||||
fun operator=(other: rule) {
|
||||
destruct()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import string:*;
|
||||
import string;
|
||||
import mem:*
|
||||
|
||||
__if_comp__ __C__ simple_passthrough """
|
||||
@@ -32,7 +32,7 @@ fun print(toPrint: char) : void {
|
||||
return;
|
||||
}
|
||||
|
||||
fun print(toPrint: string) : void {
|
||||
fun print(toPrint: string::string) : void {
|
||||
var charArr = toPrint.toCharArray()
|
||||
defer delete(charArr)
|
||||
print(charArr);
|
||||
@@ -73,3 +73,29 @@ fun print(toPrint: double) : void{
|
||||
return;
|
||||
}
|
||||
|
||||
// Ok, just some DEAD simple file io for now
|
||||
fun read_file(path: string::string): string::string {
|
||||
var char_path = path.toCharArray()
|
||||
defer delete(char_path)
|
||||
var data: char*
|
||||
__if_comp__ __C__ {
|
||||
simple_passthrough(char_path = char_path:data = data:) """
|
||||
FILE *fp = fopen(char_path, "r");
|
||||
fseek(fp, 0L, SEEK_END);
|
||||
long size = ftell(fp);
|
||||
fseek(fp, 0L, SEEK_SET);
|
||||
char *data = malloc(size+1);
|
||||
size_t readSize = fread(data, 1, size, fp);
|
||||
data[readSize] = 0;
|
||||
fclose(fp);
|
||||
"""
|
||||
}
|
||||
var toRet = string::string(data)
|
||||
__if_comp__ __C__ {
|
||||
simple_passthrough(data = data::) """
|
||||
free(data)
|
||||
"""
|
||||
}
|
||||
return toRet
|
||||
}
|
||||
|
||||
|
||||
7
tests/syntax_error.krak
Normal file
7
tests/syntax_error.krak
Normal file
@@ -0,0 +1,7 @@
|
||||
import io
|
||||
|
||||
fun main():int {
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
3
tests/test_fileio.expected_results
Normal file
3
tests/test_fileio.expected_results
Normal file
@@ -0,0 +1,3 @@
|
||||
this can be anything
|
||||
because it reads itself
|
||||
beautiful
|
||||
7
tests/test_fileio.krak
Normal file
7
tests/test_fileio.krak
Normal file
@@ -0,0 +1,7 @@
|
||||
import io:*
|
||||
import string:*
|
||||
|
||||
fun main():int {
|
||||
print(read_file(string("test_fileio.expected_results")))
|
||||
return 0
|
||||
}
|
||||
Reference in New Issue
Block a user