This commit is contained in:
Nathan Braswell
2014-03-14 16:55:33 -04:00
39 changed files with 1734 additions and 506 deletions

View File

@@ -2,9 +2,11 @@ cmake_minimum_required (VERSION 2.6)
project(Kraken)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp )
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp src/Importer.cpp )
include_directories( ${MY_INCLUDES} )

View File

@@ -0,0 +1,41 @@
Kraken Compiled Grammer file format (.kgm.comp)
This file is generated on first run, and regenerated everytime the grammer changes.
It contains the RNGLR table generated from the specified grammer so that it does not
have to be remade every time Kraken is run, saving a lot of time.
(at time of writing, non-cached: ~30 seconds, cached: <1 second)
This is a binary format. The first bytes are a magic number (KRAK in asci)
The next bytes are an unsigned integer indicating how many characters follow.
Next are these characters, which are the grammer file as one long string.
Next is the parse table length, followed by the table itself, exported with the table's export method.
It can be imported with the import method.
Note that within the parse table's data are parse actions, and within that, Symbols.
The format: (more or less)
____________________
|KRAK
|length_of_grammer_text
|GRAMMER_TEXT
|PARSE_TABLE
|-|length_of_symbol_index_vector
|-|SYMBOL_INDEX_VECTOR
|-|length_of_out_table_vector
|-|OUT_TABLE_VECTOR
|-|-|length_of_mid_table_vector
|-|-|MID_TABLE_VECTOR
|-|-|-|length_of_in_table_vector
|-|-|-|IN_TABLE_VECTOR
|-|-|-|-|length_of_parse_action
|-|-|-|-|PARSE_ACTION
|-|-|-|-|-|ActionType
|-|-|-|-|-|ParseRule__if_exists
|-|-|-|-|-|-|pointerIndex
|-|-|-|-|-|-|Symbol_left_handel
|-|-|-|-|-|-|rightside_vector_symbol
|-|-|-|-|-|shiftState
____________________

View File

@@ -4,20 +4,26 @@ Kraken
The Kraken Programming Language
The Kraken Programming Language is in its infancy.
Currently, it consists of a RNGLALR parser written in C++ and a very experimental grammer that is evolving quickly.
When compiled, the kraken program (as it is not yet a compiler) will take in a text file to be parsed, the grammer file to use, and a filename to output a DOT file to.
Kraken will then generate the RN parsing tables from the grammer and then parse the input and export a DOT file that can be renderd into a graph using Graphviz.
Currently, it consists of a RNGLALR parser written in C++, an experimental grammer that is evolving, and a C code generator.
When compiled, the kraken compiler will take in a text file to be parsed, the grammer file to use, and an output file name.
Kraken will then generate the RN parsing tables from the grammer OR load them from a binary file if Kraken has been run with this exact version of the grammer before. Then it will parse the input and export DOT files for every .krak file in the project (these can be renderd into a graph using Graphviz), a C file for every file in the project, and a .sh script containing the compiler command to compile the C files together into a binary.
It is invoked in this way:
kraken inputTextFile inputGrammerFile outputFile.dot
kraken inputTextFile inputGrammerFile outputName
Dependencies
============
It is built using CMake, which is also its only dependency.
Goals
=====
It has the following design goals:
-Compiled
-Clean
-Fast (both running and writing)
-Good for Systems (including Operating Systems) programming
-Minimal "magic" code. (no runtime, other libraries automatically included)
* Compiled
* Clean
* Fast (both running and writing)
* Good for Systems (including Operating Systems) programming
* Minimal "magic" code. (no runtime, other libraries automatically included)
It is inspired by C/C++, Python, and Go.

View File

@@ -2,34 +2,34 @@
#define ASTDATA_H
#include <vector>
#include <set>
#include <map>
#include "Symbol.h"
//Circular dependency
class Type;
#include "Type.h"
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
enum ASTType {undef, translation_unit, interpreter_directive, import, identifier,
function, code_block,
typed_parameter, expression, boolean_expression, statement,
if_statement, return_statement, assignment_statement, declaration_statement,
function_call, value};
enum ValueType {none, boolean, integer, floating, double_percision, char_string };
enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, type_def,
function, code_block, typed_parameter, expression, boolean_expression, statement,
if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement,
if_comp, simple_passthrough, function_call, value};
class ASTData {
public:
ASTData();
ASTData(ASTType type, ValueType valueType = none);
ASTData(ASTType type, Symbol symbol, ValueType valueType = none);
ASTData(ASTType type, Type *valueType = NULL);
ASTData(ASTType type, Symbol symbol, Type *valueType = NULL);
~ASTData();
std::string toString();
static std::string ASTTypeToString(ASTType type);
static std::string ValueTypeToString(ValueType type);
static ValueType strToType(std::string type);
ASTType type;
ValueType valueType;
Type* valueType;
Symbol symbol;
std::map<std::string, std::vector<NodeTree<ASTData>*>> scope;
private:
};

View File

@@ -1,17 +1,31 @@
#ifndef ASTTRANSFORMATION_H
#define ASTTRANSFORMATION_H
#include <set>
#include <map>
#include "Type.h"
#include "ASTData.h"
#include "NodeTransformation.h"
#include "Importer.h"
class Importer;
class ASTTransformation: public NodeTransformation<Symbol,ASTData> {
public:
ASTTransformation();
ASTTransformation(Importer* importerIn);
~ASTTransformation();
virtual NodeTree<ASTData>* transform(NodeTree<Symbol>* from);
NodeTree<ASTData>* transform(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::vector<Type> types = std::vector<Type>());
std::vector<NodeTree<ASTData>*> transformChildren(std::vector<NodeTree<Symbol>*> children, std::set<int> skipChildren, NodeTree<ASTData>* scope, std::vector<Type> types);
std::vector<Type> mapNodesToTypes(std::vector<NodeTree<ASTData>*> nodes);
std::string concatSymbolTree(NodeTree<Symbol>* root);
NodeTree<ASTData>* scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<NodeTree<ASTData>*> nodes);
NodeTree<ASTData>* scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type> types = std::vector<Type>());
Type* typeFromString(std::string type, NodeTree<ASTData>* scope);
private:
//Nothing
Importer * importer;
std::map<std::string, std::vector<NodeTree<ASTData>*>> languageLevelScope;
};
#endif

View File

@@ -3,17 +3,27 @@
#include <string>
#include <iostream>
#include <fstream>
#include "NodeTree.h"
#include "ASTData.h"
#include "Type.h"
#include "util.h"
class CGenerator {
public:
CGenerator();
~CGenerator();
std::string generate(NodeTree<ASTData>* from);
static std::string ValueTypeToCType(ValueType type);
void generateCompSet(std::map<std::string, NodeTree<ASTData>*> ASTs, std::string outputName);
std::string generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enclosingObject = NULL);
static std::string ValueTypeToCType(Type *type);
static std::string ValueTypeToCTypeDecoration(Type *type);
static std::string CifyFunctionName(std::string name);
std::string generateObjectMethod(NodeTree<ASTData>* enclosingObject, NodeTree<ASTData>* from);
std::string generatorString;
private:
std::string tabs();
int tabLevel;

View File

@@ -25,6 +25,7 @@ class GraphStructuredStack {
bool hasEdge(NodeTree<int>* start, NodeTree<int>* end);
NodeTree<Symbol>* getEdge(NodeTree<int>* start, NodeTree<int>* end);
void addEdge(NodeTree<int>* start, NodeTree<int>* end, NodeTree<Symbol>* edge);
void clear();
std::string toString();
private:

30
include/Importer.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef __IMPORTER__H_
#define __IMPORTER__H_
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include "Parser.h"
#include "NodeTree.h"
#include "ASTData.h"
#include "Symbol.h"
#include "RemovalTransformation.h"
#include "CollapseTransformation.h"
#include "ASTTransformation.h"
class Importer {
public:
Importer(Parser* parserIn);
~Importer();
NodeTree<ASTData>* import(std::string fileName);
std::map<std::string, NodeTree<ASTData>*> getASTMap();
private:
Parser* parser;
std::vector<Symbol> removeSymbols;
std::vector<Symbol> collapseSymbols;
std::map<std::string, NodeTree<ASTData>*> imported;
};
#endif

View File

@@ -16,6 +16,7 @@ class Lexer {
void addRegEx(std::string regExString);
void setInput(std::string inputString);
Symbol next();
void reset();
static void test();
private:
std::vector<RegEx*> regExs;

View File

@@ -4,7 +4,7 @@
#include "NodeTree.h"
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
template <class FROM, class TO>

View File

@@ -2,7 +2,7 @@
#define NODETREE_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include <vector>

View File

@@ -2,7 +2,7 @@
#define PARSE_ACTION_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include "util.h"

View File

@@ -2,7 +2,7 @@
#define PARSERULE_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include "Symbol.h"

View File

@@ -32,6 +32,8 @@ class Parser {
virtual std::string grammerToDOT();
std::string tableToString();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
protected:
std::vector<Symbol>* firstSet(Symbol token);

View File

@@ -39,6 +39,8 @@ class RNGLRParser: public Parser {
std::vector<NodeTree<Symbol>*> getPathEdges(std::vector<NodeTree<int>*> path);
int findLine(int tokenNum); //Get the line number for a token, used for error reporting
std::vector<Symbol> input;
GraphStructuredStack gss;
//start node, lefthand side of the reduction, reduction length

View File

@@ -16,8 +16,7 @@ class RegEx {
RegEx(std::string inPattern);
~RegEx();
void construct();
void deperenthesize();
RegExState* construct(std::vector<RegExState*>* ending, std::string pattern);
int longMatch(std::string stringToMatch);
std::string getPattern();
std::string toString();

View File

@@ -2,7 +2,7 @@
#define STATE_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include "util.h"
@@ -33,6 +33,7 @@ class State {
void addParents(std::vector<State*>* parents);
std::vector<State*>* getParents();
std::vector<State*>* getDeepParents(int depth);
int getNumber();
std::vector<ParseRule*> basis;

View File

@@ -2,7 +2,7 @@
#define SYMBOL_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include "NodeTree.h"

View File

@@ -1,3 +1,5 @@
#include <fstream>
#include "util.h"
#include "ParseRule.h"
#include "ParseAction.h"
@@ -11,6 +13,8 @@ class Table {
public:
Table();
~Table();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
void setSymbols(Symbol EOFSymbol, Symbol nullSymbol);
void add(int stateNum, Symbol tranSymbol, ParseAction* action);
void remove(int stateNum, Symbol tranSymbol);

37
include/Type.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef TYPE_H
#define TYPE_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include <string>
#include <iostream>
//Circular dependency
class ASTData;
#include "ASTData.h"
#include "util.h"
enum ValueType {none, void_type, boolean, integer, floating, double_percision, character };
class Type {
public:
Type();
Type(ValueType typeIn, int indirectionIn);
Type(ValueType typeIn);
Type(NodeTree<ASTData>* typeDefinitionIn);
Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn);
Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn);
~Type();
bool const operator==(const Type &other)const;
bool const operator!=(const Type &other)const;
std::string toString();
ValueType baseType;
NodeTree<ASTData>* typeDefinition;
int indirection;
private:
};
#endif

View File

@@ -2,14 +2,38 @@
#define UTIL_H
#ifndef NULL
#define NULL 0
#define NULL ((void*)0)
#endif
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
std::string intToString(int theInt);
std::string replaceExEscape(std::string first, std::string search, std::string replace);
std::string strSlice(std::string str, int begin, int end);
int findPerenEnd(std::string str, int i);
std::vector<std::string> split(const std::string &str, char delim);
std::string join(const std::vector<std::string> &strVec, std::string joinStr);
template <typename T>
bool contains(std::vector<T> vec, T item) {
for (auto i : vec)
if (i == item)
return true;
return false;
}
template <typename T>
std::vector<T> slice(std::vector<T> vec, int begin, int end) {
std::vector<T> toReturn;
if (begin < 0)
begin += vec.size()+1;
if (end < 0)
end += vec.size()+1;
for (int i = begin; i < end; i++)
toReturn.push_back(vec[i]);
return toReturn;
}
#endif

View File

@@ -1,10 +1,9 @@
Goal = translation_unit ;
translation_unit = interpreter_directive WS opt_import_list WS function_list WS ;
translation_unit = interpreter_directive WS unorderd_list_part WS ;
unorderd_list_part = import WS unorderd_list_part | function WS unorderd_list_part | type_def WS ";" WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement WS ";" WS unorderd_list_part | import | function | type_def WS ";" | if_comp | simple_passthrough | declaration_statement WS ";" ;
type = "\*" WS type | "void" | "int" | "float" | "double" | "char" | identifier ;
type = type WS "\*" | "void" | "int" | "float" | "double" | "char" | identifier ;
opt_import_list = import_list | ;
import_list = import_list WS import | import ;
import = "import" WS identifier WS ";" ;
interpreter_directive = "#!" WS path | ;
@@ -14,43 +13,68 @@ forward_slash = "/" ;
back_slash = "\\" ;
WS = "( | |
)+" | ;
)+" | WS comment WS | ;
if_comp = "__if_comp__" WS identifier WS if_comp_pred ;
if_comp_pred = code_block | simple_passthrough ;
simple_passthrough = "__simple_passthrough__" WS triple_quoted_string ;
triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ;
identifier = alpha | alpha alphanumeric ;
function_list = function_list WS function | function ;
function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ;
overloadable_operator = "\+" | "-" | "\*" | "/" | "%" | "^" | "&" | "\|" | "~" | "\!" | "," | "=" | "\+\+" | "--" | "<<" | ">>" | "==" | "!=" | "&&" | "\|\|" | "\+=" | "-=" | "/=" | "%=" | "^=" | "&=" | "\|=" | "\*=" | "<<=" | ">>=" | "->" ;
func_identifier = identifier | identifier overloadable_operator ;
function = type WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ;
opt_typed_parameter_list = typed_parameter_list | ;
typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ;
typed_parameter = type WS parameter ;
typed_parameter = type WS identifier ;
opt_parameter_list = parameter_list | ;
parameter_list = parameter_list WS parameter | parameter ;
parameter = expression ;
parameter_list = parameter_list WS "," WS parameter | parameter ;
parameter = boolean_expression ;
type_def = "typedef" WS identifier WS type | "typedef" WS identifier WS "{" WS class_innerds WS "}" | "typedef" WS identifier WS "{" WS declaration_block WS "}" ;
class_innerds = visibility_block WS class_innerds | visibility_block ;
visibility_block = "public:" WS declaration_block | "protected:" WS declaration_block | "private:" WS declaration_block ;
declaration_block = declaration_statement WS ";" WS declaration_block | function WS declaration_block | declaration_statement WS ";" | function ;
if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement ;
while_loop = "while" WS boolean_expression WS statement ;
for_loop = "for" WS "\(" WS statement WS boolean_expression WS ";" WS statement WS "\)" WS statement ;
return_statement = "return" | "return" WS boolean_expression ;
code_block = "{" WS statement_list WS "}" ;
statement_list = statement_list WS statement | statement ;
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ;
function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ;
scope = scope identifier "::" | ;
if_statement = "if" WS boolean_expression WS statement | "if" WS "\(" WS boolean_expression WS "\)" WS statement ;
statement_list = statement_list WS statement | statement ;
statement = if_statement | while_loop | for_loop | return_statement WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block | if_comp | simple_passthrough ;
function_call = unarad "\(" WS opt_parameter_list WS "\)" ;
boolean_expression = boolean_expression WS "\|\|" WS and_boolean_expression | and_boolean_expression ;
and_boolean_expression = and_boolean_expression "&&" bool_exp | bool_exp ;
bool_exp = "!" WS bool_exp | expression WS "==" WS expression | bool ;
bool_exp = "!" WS bool_exp | expression WS comparator WS expression | bool | expression ;
comparator = "==" | "<=" | ">=" | "!=" | "<" | ">" ;
return_statement = "return" WS "\(" WS expression WS "\)" WS ";" | "return" WS expression WS ";" ;
expression = expression WS "-" WS term | expression WS "\+" WS term | term ;
term = term WS forward_slash WS factor | term WS "\*" WS factor | factor ;
factor = number | identifier | function_call | bool | string ;
expression = expression WS "<<" WS term | expression WS ">>" WS shiftand | shiftand ;
shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ;
term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ;
factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ;
unarad = number | identifier | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | access_operation ;
number = integer | float | double ;
access_operation = unarad "." identifier | unarad "->" identifier ;
assignment_statement = identifier WS "=" WS expression ;
declaration_statement = type WS identifier WS "=" WS expression ;
assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ;
declaration_statement = type WS identifier WS "=" WS boolean_expression | type WS identifier ;
alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ;
hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ;
@@ -59,6 +83,13 @@ integer = sign numeric | sign hexadecimal | "null" ;
float = sign numeric "." numeric "f" ;
double = sign numeric "." numeric | sign numeric "." numeric "d" ;
bool = "true" | "false" | "True" | "False" ;
alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|_)+" ;
character = "'(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )'" ;
alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ;
numeric = "(0|1|2|3|4|5|6|7|8|9)+" ;
string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ;
string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ;
comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*
" | "(/\*+((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|~|!|@|#|$|%|^|&|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )/*\**)+\*/)|(/\*\*/)" ;

188
main.cpp
View File

@@ -3,68 +3,56 @@
#include <fstream>
#include <vector>
#include <cstring>
#include "NodeTree.h"
#include "Symbol.h"
#include "Lexer.h"
#include "LALRParser.h"
#include "RNGLRParser.h"
#include "NodeTransformation.h"
#include "RemovalTransformation.h"
#include "CollapseTransformation.h"
#include "ASTTransformation.h"
#include "Importer.h"
#include "ASTData.h"
#include "CGenerator.h"
#include "util.h"
int main(int argc, char* argv[]) {
if (argc == 2 && std::string(argv[1]) == "--test") {
StringReader::test();
RegEx::test();
Lexer::test();
//std::cout << strSlice("123", 0, -1) << std::endl;
return 0;
}
std::ifstream programInFile, grammerInFile;
std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
std::string programName = argv[1];
std::string grammerFileString = argv[2];
std::string outputName = argv[3];
programInFile.open(argv[1]);
if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << argv[1] << "\n";
return(1);
}
std::ifstream grammerInFile, compiledGrammerInFile;
std::ofstream /*outFileC,*/ compiledGrammerOutFile;
grammerInFile.open(argv[2]);
grammerInFile.open(grammerFileString);
if (!grammerInFile.is_open()) {
std::cout << "Problem opening grammerInFile " << argv[2] << "\n";
std::cout << "Problem opening grammerInFile " << grammerFileString << "\n";
return(1);
}
outFile.open(argv[3]);
if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << argv[3] << "\n";
return(1);
compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate);
if (!compiledGrammerInFile.is_open()) {
std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
//return(1);
}
outFileTransformed.open((std::string(argv[3]) + ".transformed.dot").c_str());
if (!outFileTransformed.is_open()) {
std::cout << "Probelm opening second output file " << std::string(argv[3]) + ".transformed.dot" << "\n";
return(1);
}
outFileAST.open((std::string(argv[3]) + ".AST.dot").c_str());
if (!outFileAST.is_open()) {
std::cout << "Probelm opening second output file " << std::string(argv[3]) + ".AST.dot" << "\n";
return(1);
}
outFileC.open((std::string(argv[3]) + ".c").c_str());
/*
outFileC.open((outputName + ".c").c_str());
if (!outFileC.is_open()) {
std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n";
std::cout << "Probelm opening third output file " << outputName + ".c" << "\n";
return(1);
}
*/
//Read the input file into a string
std::string programInputFileString, grammerInputFileString;
std::string grammerInputFileString;
std::string line;
while(grammerInFile.good()) {
getline(grammerInFile, line);
@@ -72,23 +60,69 @@ int main(int argc, char* argv[]) {
}
grammerInFile.close();
while(programInFile.good()) {
getline(programInFile, line);
programInputFileString.append(line+"\n");
}
programInFile.close();
//LALRParser parser;
RNGLRParser parser;
parser.loadGrammer(grammerInputFileString);
//std::cout << "Creating State Set from Main" << std::endl;
std::cout << "\nState Set" << std::endl;
parser.createStateSet();
//std::cout << "\nState Set" << std::endl;
//Start binary stuff
bool compGramGood = false;
if (compiledGrammerInFile.is_open()) {
std::cout << "Compiled grammer file exists, reading it in" << std::endl;
std::streampos compGramSize = compiledGrammerInFile.tellg();
char* binaryTablePointer = new char [compGramSize];
compiledGrammerInFile.seekg(0, std::ios::beg);
compiledGrammerInFile.read(binaryTablePointer, compGramSize);
compiledGrammerInFile.close();
//Check magic number
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
int gramStringLength = *((int*)(binaryTablePointer+4));
//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
if (grammerInputFileString.length() != gramStringLength-1 ||
(strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) {
//(one less for null terminator that is stored)
std::cout << "The Grammer has been changed, will re-create" << std::endl;
} else {
compGramGood = true;
std::cout << "Grammer file is up to date." << std::endl;
//int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength));
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
}
} else {
std::cout << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl;
return -1;
}
delete binaryTablePointer;
}
if (!compGramGood) {
//The load failed because either the file does not exist or it is not up-to-date.
std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl;
compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary);
if (!compiledGrammerOutFile.is_open())
std::cout << "Could not open compiled file to write either!" << std::endl;
compiledGrammerOutFile.write("KRAK", sizeof(char)*4); //Let us know when we load it that this is a kraken grammer file, but don't write out
compiledGrammerOutFile.flush(); // the grammer txt until we create the set, so that if we fail creating it it won't look valid
parser.createStateSet();
int* intBuffer = new int;
*intBuffer = grammerInputFileString.length()+1;
compiledGrammerOutFile.write((char*)intBuffer, sizeof(int));
delete intBuffer;
compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator
parser.exportTable(compiledGrammerOutFile);
compiledGrammerOutFile.close();
}
//End binary stuff
//std::cout << "finished State Set from Main" << std::endl;
//std::cout << "Doing stateSetToString from Main" << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
// std::cout << parser.stateSetToString() << std::endl;
// std::cout << "finished stateSetToString from Main" << std::endl;
// std::cout << "finished stateSetToString from Main" << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl;
// std::cout << parser.tableToString() << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl;
@@ -100,72 +134,24 @@ int main(int argc, char* argv[]) {
//outFile << parser.grammerToDOT() << std::endl;
std::cout << "\nParsing" << std::endl;
std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser.parseInput(programInputFileString);
Importer importer(&parser);
if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl;
outFile << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
}
outFile.close();
/*NodeTree<ASTData>* AST =*/
importer.import(programName);
std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
//Pre AST Transformations
std::vector<NodeTransformation<Symbol, Symbol>*> preASTTransforms;
//Remove Transformations
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("WS", false)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("\\(", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("\\)", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("::", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol(";", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("{", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("}", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("import", true))); //Don't need the actual text of the symbol
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("interpreter_directive", false)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("if", true)));
preASTTransforms.push_back(new RemovalTransformation<Symbol>(Symbol("while", true)));
//Collapse Transformations
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("opt_typed_parameter_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("opt_parameter_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("opt_import_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("import_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("function_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("statement_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("parameter_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("typed_parameter_list", false)));
for (int i = 0; i < preASTTransforms.size(); i++) {
parseTree = preASTTransforms[i]->transform(parseTree);
}
preASTTransforms.erase(preASTTransforms.begin(), preASTTransforms.end());
NodeTree<ASTData>* AST = ASTTransformation().transform(parseTree);
//NodeTree<ASTData>* AST = (new ASTTransformation())->transform(parseTree);
if (parseTree) {
outFileTransformed << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
outFileTransformed.close();
if (AST) {
outFileAST << AST->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
}
outFileAST.close();
//Do type checking, scope creation, etc. here.
//None at this time, instead going strait to C in this first (more naive) version
//Do optomization, etc. here.
//None at this time, instead going straight to C in this first (more naive) version
//Code generation
//For right now, just C
CGenerator().generateCompSet(ASTs, outputName);
/*
std::string c_code = CGenerator().generate(AST);
outFileC << c_code << std::endl;
outFileC.close();
outFileC.close();
*/
return(0);
}

View File

@@ -2,14 +2,15 @@
ASTData::ASTData() {
this->type = undef;
this->valueType = NULL;
}
ASTData::ASTData(ASTType type, ValueType valueType) {
ASTData::ASTData(ASTType type, Type *valueType) {
this->type = type;
this->valueType = valueType;
}
ASTData::ASTData(ASTType type, Symbol symbol, ValueType valueType) {
ASTData::ASTData(ASTType type, Symbol symbol, Type *valueType) {
this->type = type;
this->valueType = valueType;
this->symbol = symbol;
@@ -20,98 +21,55 @@ ASTData::~ASTData() {
}
std::string ASTData::toString() {
return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + (valueType ? " " + ValueTypeToString(valueType) : "");
}
ValueType ASTData::strToType(std::string type) {
if (type == "bool")
return boolean;
else if (type == "int")
return integer;
else if (type == "float")
return floating;
else if (type == "double")
return double_percision;
else if (type == "string")
return char_string;
else return none;
}
std::string ASTData::ValueTypeToString(ValueType type) {
switch (type) {
case none:
return "none";
break;
case boolean:
return "bool";
break;
case integer:
return "int";
break;
case floating:
return "float";
break;
case double_percision:
return "double";
break;
case char_string:
return "string";
break;
default:
return "unknown_ValueType";
}
return ASTTypeToString(type) + " " +
(symbol.isTerminal() ? " " + symbol.toString() : "") + " " +
(valueType ? valueType->toString() : "no_type");
}
std::string ASTData::ASTTypeToString(ASTType type) {
switch (type) {
case translation_unit:
return "translation_unit";
break;
case interpreter_directive:
return "interpreter_directive";
break;
case identifier:
return "identifier";
break;
case import:
return "import";
break;
case function:
return "function";
break;
case type_def:
return "type_def";
case code_block:
return "code_block";
break;
case typed_parameter:
return "typed_parameter";
break;
case expression:
return "expression";
break;
case boolean_expression:
return "boolean_expression";
break;
case statement:
return "statement";
break;
case if_statement:
return "if_statement";
break;
case while_loop:
return "while_loop";
case for_loop:
return "for_loop";
case return_statement:
return "return_statement";
break;
case assignment_statement:
return "assignment_statement";
break;
case declaration_statement:
return "declaration_statement";
break;
case if_comp:
return "if_comp";
case simple_passthrough:
return "simple_passthrough";
case function_call:
return "function_call";
break;
case value:
return "value";
break;
default:
return "unknown_ASTType";
}

View File

@@ -1,7 +1,28 @@
#include "ASTTransformation.h"
ASTTransformation::ASTTransformation() {
//
ASTTransformation::ASTTransformation(Importer *importerIn) {
importer = importerIn;
//Set up language level special scope. (the final scope checked)
//Note the NULL type
languageLevelScope["+"].push_back( new NodeTree<ASTData>("function", ASTData(function, Symbol("+", true), NULL)));
languageLevelScope["-"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-", true), NULL)));
languageLevelScope["*"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*", true), NULL)));
languageLevelScope["&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&", true), NULL)));
languageLevelScope["--"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("--", true), NULL)));
languageLevelScope["++"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("++", true), NULL)));
languageLevelScope["=="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("==", true), NULL)));
languageLevelScope["<="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<=", true), NULL)));
languageLevelScope[">="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">=", true), NULL)));
languageLevelScope["<"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<", true), NULL)));
languageLevelScope[">"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">", true), NULL)));
languageLevelScope["&&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&&", true), NULL)));
languageLevelScope["||"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("||", true), NULL)));
languageLevelScope["!"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("!", true), NULL)));
languageLevelScope["*="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*=", true), NULL)));
languageLevelScope["+="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("+=", true), NULL)));
languageLevelScope["-="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-=", true), NULL)));
languageLevelScope["."].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(".", true), NULL)));
languageLevelScope["->"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("->", true), NULL)));
}
ASTTransformation::~ASTTransformation() {
@@ -9,113 +30,455 @@ ASTTransformation::~ASTTransformation() {
}
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
//Set up top scope
return transform(from, NULL, std::vector<Type>());
}
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::vector<Type> types) {
Symbol current = from->getData();
std::string name = current.getName();
NodeTree<ASTData>* newNode;
NodeTree<ASTData>* newNode = NULL;
std::vector<NodeTree<Symbol>*> children = from->getChildren();
std::set<int> skipChildren;
if (name == "translation_unit") {
newNode = new NodeTree<ASTData>(name, ASTData(translation_unit));
scope = newNode;
} else if (name == "interpreter_directive") {
newNode = new NodeTree<ASTData>(name, ASTData(interpreter_directive));
} else if (name == "import" && !current.isTerminal()) {
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true)));
std::string toImport = concatSymbolTree(children[0]);
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(toImport, true)));
//Do the imported file too
NodeTree<ASTData>* outsideTranslationUnit = importer->import(toImport + ".krak");
scope->getDataRef()->scope[toImport].push_back(outsideTranslationUnit); //Put this transation_unit in the scope as it's files name
//Now add it to scope
for (auto i = outsideTranslationUnit->getDataRef()->scope.begin(); i != outsideTranslationUnit->getDataRef()->scope.end(); i++)
for (auto j : i->second)
scope->getDataRef()->scope[i->first].push_back(j);
return newNode; // Don't need children of import
} else if (name == "identifier") {
newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
//Make sure we get the entire name
std::string lookupName = concatSymbolTree(from);
std::cout << "Looking up: " << lookupName << std::endl;
newNode = scopeLookup(scope, lookupName, types);
if (newNode == NULL) {
std::cout << "scope lookup error! Could not find " << lookupName << " in identifier " << std::endl;
throw "LOOKUP ERROR: " + lookupName;
} else if (newNode->getDataRef()->symbol.getName() !=lookupName) {
//This happens when the lookup name denotes a member of an object, i.e. obj.foo
//The newNode points to obj, not foo.
}
//newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
} else if (name == "type_def") {
std::string typeAlias = concatSymbolTree(children[0]);
//If it is an alisis of a type
if (children[1]->getData().getName() == "type") {
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope)));
skipChildren.insert(1); //Don't want any children, it's unnecessary for ailising
} else { //Is a struct or class
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias)));
newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition
}
scope->getDataRef()->scope[typeAlias].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope
//return newNode;
} else if (name == "function") {
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), ASTData::strToType(concatSymbolTree(children[0]))));
std::string functionName = concatSymbolTree(children[1]);
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope)));
skipChildren.insert(0);
skipChildren.insert(1);
scope->getDataRef()->scope[functionName].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
// auto transChildren = transformChildren(children, skipChildren, scope, types);
// std::cout << functionName << " ";
// for (auto i : transChildren)
// std::cout << "||" << i->getDataRef()->toString() << "|| ";
// std::cout << "??||" << std::endl;
// newNode->addChildren(transChildren);
// return newNode;
std::cout << "finished function " << functionName << std::endl;
} else if (name == "code_block") {
newNode = new NodeTree<ASTData>(name, ASTData(code_block));
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
} else if (name == "typed_parameter") {
newNode = transform(children[1]); //Transform to get the identifier
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
//newNode = transform(children[1]); //Transform to get the identifier
std::string parameterName = concatSymbolTree(children[1]);
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
newNode = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope)));
scope->getDataRef()->scope[parameterName].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
return newNode;
} else if (name == "expression") {
} else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") {
//If this is an actual part of an expression, not just a premoted term
if (children.size() > 1) {
std::string functionCallName = concatSymbolTree(children[1]);
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
//We do children first so we can do appropriate scope searching with types (yay operator overloading!)
skipChildren.insert(1);
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
std::string functionCallString = concatSymbolTree(children[1]);
NodeTree<ASTData>* function = scopeLookup(scope, functionCallString, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl;
throw "LOOKUP ERROR: " + functionCallString;
}
newNode = new NodeTree<ASTData>(functionCallString, ASTData(function_call, function->getDataRef()->valueType));
newNode->addChild(function); // First child of function call is a link to the function
newNode->addChildren(transformedChildren);
} else {
return transform(children[0]); //Just a promoted term, so do child
//std::cout << children.size() << std::endl;
if (children.size() == 0)
return new NodeTree<ASTData>();
return transform(children[0], scope, types); //Just a promoted term, so do child
}
} else if (name == "term") {
//If this is an actual part of an expression, not just a premoted factor
if (children.size() > 1) {
//Here's the order of ops stuff
} else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad" || name == "access_operation") { //unarad can ride through, it should always just be a promoted child
//If this is an actual part of an expression, not just a premoted child
if (children.size() > 2) {
NodeTree<ASTData>* lhs = transform(children[0], scope); //LHS does not inherit types
NodeTree<ASTData>* rhs;
if (name == "access_operation")
rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition, types); //If an access operation, then the right side will be in the lhs's type's scope
else
rhs = transform(children[2], scope, types);
std::string functionCallName = concatSymbolTree(children[1]);
//std::cout << "scope lookup from expression or similar" << std::endl;
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
NodeTree<ASTData>* function = scopeLookup(scope, functionCallName, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl;
throw "LOOKUP ERROR: " + functionCallName;
}
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
skipChildren.insert(1);
newNode->addChild(function); // First child of function call is a link to the function definition
newNode->addChild(lhs);
newNode->addChild(rhs);
if (name == "access_operation")
std::cout << "Access Operation: " << lhs->getDataRef()->symbol.getName() << " : " << rhs->getDataRef()->symbol.getName() << std::endl;
std::cout << functionCallName << " - " << function->getName() << " has value type " << function->getDataRef()->valueType << " and rhs " << rhs->getDataRef()->valueType << std::endl;
//Set the value of this function call
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else if (rhs->getDataRef()->valueType)
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = NULL;
std::cout << "function call to " << functionCallName << " - " << function->getName() << " is now " << newNode->getDataRef()->valueType << std::endl;
return newNode;
//skipChildren.insert(1);
} else {
return transform(children[0]); //Just a promoted factor, so do child
return transform(children[0], scope, types); //Just a promoted child, so do it instead
}
} else if (name == "factor") { //Do factor here, as it has all the weird unary operators
//If this is an actual part of an expression, not just a premoted child
//NO SUPPORT FOR CASTING YET
if (children.size() == 2) {
std::string funcName = concatSymbolTree(children[0]);
NodeTree<ASTData>* param;
if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~")
param = transform(children[1], scope, types);
else
funcName = concatSymbolTree(children[1]), param = transform(children[0], scope, types);
//std::cout << "scope lookup from factor" << std::endl;
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(param);
NodeTree<ASTData>* function = scopeLookup(scope, funcName, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl;
throw "LOOKUP ERROR: " + funcName;
}
newNode = new NodeTree<ASTData>(funcName, ASTData(function_call, Symbol(funcName, true)));
newNode->addChild(function);
newNode->addChild(param);
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = param->getDataRef()->valueType;
return newNode;
} else {
return transform(children[0], scope, types); //Just a promoted child, so do it instead
}
} else if (name == "factor") {
return transform(children[0]); //Just a premoted number or function call or something, so use it instead
} else if (name == "boolean_expression") {
newNode = new NodeTree<ASTData>(name, ASTData(boolean_expression));
} else if (name == "statement") {
newNode = new NodeTree<ASTData>(name, ASTData(statement));
} else if (name == "if_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(if_statement));
} else if (name == "while_loop") {
newNode = new NodeTree<ASTData>(name, ASTData(while_loop));
} else if (name == "for_loop") {
newNode = new NodeTree<ASTData>(name, ASTData(for_loop));
} else if (name == "return_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
} else if (name == "assignment_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
std::string assignFuncName = concatSymbolTree(children[1]);
if (assignFuncName == "=") {
newNode->addChild(transform(children[0], scope, types));
newNode->addChild(transform(children[2], scope, types));
} else {
//For assignments like += or *=, expand the syntatic sugar.
NodeTree<ASTData>* lhs = transform(children[0], scope, types);
NodeTree<ASTData>* rhs = transform(children[2], scope, types);
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
std::string functionName = assignFuncName.substr(0,1);
NodeTree<ASTData>* childCall = new NodeTree<ASTData>(functionName, ASTData(function_call, Symbol(functionName, true)));
NodeTree<ASTData>* functionDef = scopeLookup(scope, functionName, transformedChildren);
if (functionDef == NULL) {
std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl;
throw "LOOKUP ERROR: " + functionName;
}
childCall->addChild(functionDef); //First child of function call is definition of the function
childCall->addChild(lhs);
childCall->addChild(rhs);
newNode->addChild(lhs);
newNode->addChild(childCall);
}
return newNode;
} else if (name == "declaration_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
// NodeTree<ASTData>* newIdentifier = transform(children[1], scope); //Transform the identifier
// newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier
std::string newIdentifierStr = concatSymbolTree(children[1]);
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
Type* identifierType = typeFromString(typeString, scope);
NodeTree<ASTData>* newIdentifier = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType));
scope->getDataRef()->scope[newIdentifierStr].push_back(newIdentifier);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
//Now we don't do this thing
// if (identifierType->typeDefinition) {
// //Is a custom type. Populate this declaration's scope with it's inner declarations
// std::vector<NodeTree<ASTData>*> definitions = identifierType->typeDefinition->getChildren();
// for (auto i : definitions) {
// //Point to the identifier. May need to change so it points to the declaration or something, with new declarations.....
// newIdentifier->getDataRef()->scope[i->get(0)->getDataRef()->symbol.getName()] = i->get(0); //make each declaration's name point to it's definition, like above
// }
// }
newNode->addChild(newIdentifier);
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
skipChildren.insert(1);
} else if (name == "if_comp") {
newNode = new NodeTree<ASTData>(name, ASTData(if_comp));
newNode->addChild(new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(concatSymbolTree(children[0]),true))));
skipChildren.insert(0); //Don't do the identifier. The identifier lookup will fail. That's why we do it here.
} else if (name == "simple_passthrough") {
newNode = new NodeTree<ASTData>(name, ASTData(simple_passthrough));
} else if (name == "function_call") {
//children[0] is scope
std::string functionCallName = concatSymbolTree(children[1]);
std::string functionCallName = concatSymbolTree(children[0]);
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
skipChildren.insert(1);
// if (function == NULL) {
// std::cout << "scope lookup error! Could not find " << functionCallName << " in function_call " << std::endl;
// throw "LOOKUP ERROR: " + functionCallName;
// }
skipChildren.insert(0);
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
std::cout << "scope lookup from function_call: " << functionCallName << std::endl;
for (auto i : children)
std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl;
NodeTree<ASTData>* function = transform(children[0], scope, mapNodesToTypes(transformedChildren));
std::cout << "The thing: " << function << " : " << function->getName() << std::endl;
for (auto i : function->getChildren())
std::cout << i->getName() << " ";
std::cout << std::endl;
newNode->addChild(function);
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
newNode->addChildren(transformedChildren);
return newNode;
} else if (name == "parameter") {
return transform(children[0]); //Don't need a parameter node, just the value
} else if (name == "bool") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), boolean));
return transform(children[0], scope, types); //Don't need a parameter node, just the value
} else if (name == "type") {
std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope)));
} else if (name == "number") {
return transform(children[0]);
return transform(children[0], scope, types);
} else if (name == "integer") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), integer));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(integer)));
} else if (name == "float") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), floating));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(floating)));
} else if (name == "double") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), double_percision));
} else if (name == "string") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), char_string));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(double_percision)));
} else if (name == "char") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
} else if (name == "string" || name == "triple_quoted_string") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
} else {
return new NodeTree<ASTData>();
}
// In general, iterate through children and do them. Might not do this for all children.
//Do all children but the ones we skip
for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i]);
if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
newNode->addChild(transChild);
else
delete transChild;
}
}
return newNode;
}
//We use this functionality a lot at different places
std::vector<NodeTree<ASTData>*> ASTTransformation::transformChildren(std::vector<NodeTree<Symbol>*> children, std::set<int> skipChildren, NodeTree<ASTData>* scope, std::vector<Type> types) {
std::vector<NodeTree<ASTData>*> transformedChildren;
// In general, iterate through children and do them. Might not do this for all children.
for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
transformedChildren.push_back(transChild);
else
delete transChild;
}
}
return transformedChildren;
}
std::vector<Type> ASTTransformation::mapNodesToTypes(std::vector<NodeTree<ASTData>*> nodes) {
std::vector<Type> types;
for (auto i : nodes)
types.push_back(*(i->getDataRef()->valueType));
return types;
}
std::string ASTTransformation::concatSymbolTree(NodeTree<Symbol>* root) {
std::string concatString;
std::string ourValue = root->getData().getValue();
std::string ourValue = root->getDataRef()->getValue();
if (ourValue != "NoValue")
concatString += ourValue;
std::vector<NodeTree<Symbol>*> children = root->getChildren();
for (int i = 0; i < children.size(); i++) {
concatString = concatSymbolTree(children[i]);
concatString += concatSymbolTree(children[i]);
}
return concatString;
}
//Overloaded with the actual children to allow us to handle operator methods
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<NodeTree<ASTData>*> nodes) {
//
auto LLElementIterator = languageLevelScope.find(lookup);
if (LLElementIterator != languageLevelScope.end()) {
std::cout << "Checking for early method level operator overload" << std::endl;
std::string lookupOp = "operator" + lookup;
for (auto i : nodes)
std::cout << i->getDataRef()->toString() << " ";
std::cout << std::endl;
NodeTree<ASTData>* operatorMethod = NULL;
if (nodes[0]->getDataRef()->valueType && nodes[0]->getDataRef()->valueType->typeDefinition)
operatorMethod = scopeLookup(nodes[0]->getDataRef()->valueType->typeDefinition, lookupOp, mapNodesToTypes(slice(nodes,1,-1)));
if (operatorMethod) {
//Ok, so we construct
std::cout << "Early method level operator was found" << std::endl;
//return operatorMethod;
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
newNode->addChild(function); // First child of function call is a link to the function definition
newNode->addChild(lhs);
newNode->addChild(rhs);
//Set the value of this function call
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else if (rhs->getDataRef()->valueType)
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = NULL;
}
std::cout << "Early method level operator was NOT found" << std::endl;
}
return scopeLookup(scope, lookup, mapNodesToTypes(nodes));
}
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type> types) {
//We first search the languageLevelScope to see if it's an operator. If so, we modifiy the lookup with a preceding "operator"
auto LLElementIterator = languageLevelScope.find(lookup);
if (LLElementIterator != languageLevelScope.end())
lookup = "operator" + lookup;
//Search the map
auto scopeMap = scope->getDataRef()->scope;
auto elementIterator = scopeMap.find(lookup);
for (auto i : scopeMap)
std::cout << i.first << " ";
std::cout << std::endl;
//
if (elementIterator != scopeMap.end()) {
for (auto i = elementIterator->second.begin(); i != elementIterator->second.end(); i++) {
//Types and functions cannot have the same name, and types very apparently do not have parameter types, so check and short-circuit
if ((*i)->getDataRef()->type == type_def)
return *i;
//return *i;
std::vector<NodeTree<ASTData>*> children = (*i)->getChildren();
if (types.size() != ((children.size() > 0) ? children.size()-1 : 0)) {
std::cout << "Type sizes do not match between two " << lookup << "(" << types.size() << "," << ((children.size() > 0) ? children.size()-1 : 0) << "), types are: ";
for (auto j : types)
std::cout << j.toString() << " ";
std::cout << std::endl;
continue;
}
bool typesMatch = true;
for (int j = 0; j < types.size(); j++) {
if (types[j] != *(children[j]->getDataRef()->valueType)) {
typesMatch = false;
std::cout << "Types do not match between two " << lookup << std::endl;
break;
}
}
if (typesMatch)
return *i;
}
}
//if it doesn't exist, try the enclosing scope if it exists.
auto enclosingIterator = scopeMap.find("~enclosing_scope");
if (enclosingIterator != scopeMap.end()) {
// std::cout << "upper scope exists, searching it for " << lookup << std::endl;
NodeTree<ASTData>* upperResult = scopeLookup(enclosingIterator->second[0], lookup, types);
if (upperResult)
return upperResult;
}
//std::cout << "upper scope does not exist" << std::endl;
std::cout << "could not find " << lookup << " in standard scope, checking for operator" << std::endl;
//Note that we don't check for types. At some point we should, as we don't know how to add objects/structs without overloaded operators, etc
//Also, we've already searched for the element because this is also how we keep track of operator overloading
if (LLElementIterator != languageLevelScope.end()) {
std::cout << "found it at language level as operator." << std::endl;
return LLElementIterator->second[0];
}
std::cout << "Did not find, returning NULL" << std::endl;
return NULL;
}
Type* ASTTransformation::typeFromString(std::string typeIn, NodeTree<ASTData>* scope) {
int indirection = 0;
ValueType baseType;
NodeTree<ASTData>* typeDefinition = NULL;
while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++;
std::string edited = strSlice(typeIn, 0, -(indirection + 1));
if (edited == "void")
baseType = void_type;
else if (edited == "bool")
baseType = boolean;
else if (edited == "int")
baseType = integer;
else if (edited == "float")
baseType = floating
; else if (edited == "double")
baseType = double_percision;
else if (edited == "char")
baseType = character;
else {
baseType = none;
typeDefinition = scopeLookup(scope, edited);
//std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl;
}
return new Type(baseType, typeDefinition, indirection);
}

View File

@@ -1,12 +1,33 @@
#include "CGenerator.h"
CGenerator::CGenerator() {
CGenerator::CGenerator() : generatorString("__C__") {
tabLevel = 0;
}
CGenerator::~CGenerator() {
}
void CGenerator::generateCompSet(std::map<std::string, NodeTree<ASTData>*> ASTs, std::string outputName) {
//Generate an entire set of files
std::string buildString = "#!/bin/sh\ncc -std=c99 ";
for (auto i = ASTs.begin(); i != ASTs.end(); i++) {
buildString += i->first + ".c ";
std::ofstream outputCFile;
outputCFile.open(i->first + ".c");
if (outputCFile.is_open()) {
outputCFile << generate(i->second);
} else {
std::cout << "Cannot open file " << i->first << ".c" << std::endl;
}
outputCFile.close();
}
buildString += "-o " + outputName;
std::ofstream outputBuild;
outputBuild.open(outputName + ".sh");
outputBuild << buildString;
outputBuild.close();
}
std::string CGenerator::tabs() {
std::string returnTabs;
for (int i = 0; i < tabLevel; i++)
@@ -14,76 +35,229 @@ std::string CGenerator::tabs() {
return returnTabs;
}
std::string CGenerator::generate(NodeTree<ASTData>* from) {
//The enclosing object is for when we're generating the inside of object methods. They allow us to check scope lookups against the object we're in
std::string CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enclosingObject) {
ASTData data = from->getData();
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string output = "";
std::string output = "";
switch (data.type) {
case translation_unit:
//Do nothing
//Do here because we may need the typedefs before the declarations of variables
for (int i = 0; i < children.size(); i++)
if (children[i]->getDataRef()->type == type_def)
output += generate(children[i], enclosingObject) + "\n";
//Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations)
for (auto i = data.scope.begin(); i != data.scope.end(); i++) {
for (auto overloadedMembers : i->second) {
NodeTree<ASTData>* declaration = overloadedMembers;
std::vector<NodeTree<ASTData>*> decChildren = declaration->getChildren();
ASTData declarationData = declaration->getData();
switch(declarationData.type) {
case identifier:
output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n";
break;
case function:
{
if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough {
output += "/* built in function: " + declarationData.toString() + " */\n";
break;
}
output += "\n" + ValueTypeToCType(declarationData.valueType) + " ";
std::string nameDecoration, parameters;
for (int j = 0; j < decChildren.size()-1; j++) {
if (j > 0)
parameters += ", ";
parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject);
nameDecoration += "_" + ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType);
}
output += CifyFunctionName(declarationData.symbol.getName()) + nameDecoration + "(" + parameters + "); /*func*/\n";
break;
}
case type_def:
//type
output += "/*typedef " + declarationData.symbol.getName() + " */\n";
break;
default:
//std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl;
output += "/*unknown declaration named " + declaration->getName() + "*/\n";
}
}
}
//Do here because we need the newlines
for (int i = 0; i < children.size(); i++)
if (children[i]->getDataRef()->type != type_def)
output += generate(children[i], enclosingObject) + "\n";
return output;
break;
case interpreter_directive:
//Do nothing
break;
case import:
return "#include \"" + data.symbol.getName() + "\"\n";
break;
return "/* would import \"" + data.symbol.getName() + "\" but....*/\n";
//return "#include <" + data.symbol.getName() + ">\n";
case identifier:
return data.symbol.getName();
break;
case function:
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
for (int i = 0; i < children.size()-1; i++) {
if (i > 0)
output += ", ";
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
{
//If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the self reference.
std::string preName;
if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end())
preName += "self->";
if (false)
for (int j = 0; j < children.size()-1; j++)
preName += ValueTypeToCType(children[j]->getData().valueType) + "_";
return preName + CifyFunctionName(data.symbol.getName()); //Cifying does nothing if not an operator overload
}
case type_def:
if (children.size() == 0) {
return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";";
} else {
std::string objectString = "typedef struct __struct_dummy_" + data.symbol.getName() + "__ {\n";
std::string postString; //The functions have to be outside the struct definition
for (int i = 0; i < children.size(); i++) {
std::cout << children[i]->getName() << std::endl;
if (children[i]->getName() == "function") //If object method
postString += generateObjectMethod(from, children[i]) + "\n";
else
objectString += generate(children[i], enclosingObject) + "\n";
}
objectString += "} " + data.symbol.getName() + ";";
return objectString + postString; //Functions come after the declaration of the struct
}
output+= ")\n" + generate(children[children.size()-1]);
case function:
{
output += "\n" + ValueTypeToCType(data.valueType) + " ";
std::string nameDecoration, parameters;
for (int j = 0; j < children.size()-1; j++) {
if (j > 0)
parameters += ", ";
parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject);
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[j]->getData().valueType);
}
output += CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject);
return output;
break;
}
case code_block:
output += tabs() + "{\n";
output += "{\n";
tabLevel++;
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
for (int i = 0; i < children.size(); i++) {
//std::cout << "Line " << i << std::endl;
std::string line = generate(children[i], enclosingObject);
//std::cout << line << std::endl;
output += line;
}
tabLevel--;
output += tabs() + "}";
return output;
break;
case expression:
output += " " + data.symbol.getName() + ", ";
break;
case boolean_expression:
output += " " + data.symbol.getName() + " ";
break;
case statement:
return tabs() + generate(children[0]) + ";\n";
break;
return tabs() + generate(children[0], enclosingObject) + ";\n";
case if_statement:
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
output += "if (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
if (children.size() > 2)
output += " else " + generate(children[2]);
output += " else " + generate(children[2], enclosingObject);
return output;
case while_loop:
output += "while (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
return output;
case for_loop:
//The strSlice's are there to get ride of an unwanted return and an unwanted semicolon(s)
output += "for (" + strSlice(generate(children[0], enclosingObject),0,-3) + generate(children[1], enclosingObject) + ";" + strSlice(generate(children[2], enclosingObject),0,-3) + ")\n\t" + generate(children[3], enclosingObject);
return output;
break;
case return_statement:
return "return " + generate(children[0]);
if (children.size())
return "return " + generate(children[0], enclosingObject);
else
return "return";
case assignment_statement:
return generate(children[0]) + " = " + generate(children[1]);
return generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject);
case declaration_statement:
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
if (children.size() == 1)
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + ";";
else
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject) + ";";
case if_comp:
if (generate(children[0], enclosingObject) == generatorString)
return generate(children[1], enclosingObject);
return "";
case simple_passthrough:
return strSlice(generate(children[0], enclosingObject), 3, -4);
case function_call:
{
//NOTE: The first (0th) child of a function call node is the declaration of the function
//Handle operators specially for now. Will later replace with
//Inlined functions in the standard library
std::string name = data.symbol.getName();
if (name == "+" || name == "-" || name == "*" || name == "/") {
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
// std::string name = data.symbol.getName();
// std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl;
std::string name = children[0]->getDataRef()->symbol.getName();
ASTType funcType = children[0]->getDataRef()->type;
std::cout << "Doing function: " << name << std::endl;
//Test for specail functions only if what we're testing is, indeed, the definition, not a function call that returns a callable function pointer
if (funcType == function) {
if (name == "++" || name == "--")
return generate(children[1], enclosingObject) + name;
if (name == "*" && children.size() == 2) //Is dereference, not multiplication
return "*(" + generate(children[1], enclosingObject) + ")";
if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!="
|| name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||"
|| name == "&&" || name == "!" )
return "((" + generate(children[1], enclosingObject) + ")" + name + "(" + generate(children[2], enclosingObject) + "))";
else if (name == "." || name == "->") {
if (children.size() == 1)
return "/*dot operation with one child*/" + generate(children[0], enclosingObject) + "/*end one child*/";
//If this is accessing an actual function, find the function in scope and take the appropriate action. Probabally an object method
if (children[2]->getDataRef()->type == function) {
std::string functionName = children[2]->getDataRef()->symbol.getName();
NodeTree<ASTData>* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition;
//If is an object method, generate it like one. Needs extension/modification for inheritence
if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) {
std::string nameDecoration;
std::vector<NodeTree<ASTData>*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation
std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl;
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + CifyFunctionName(functionName) + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ",";
//The comma lets the upper function call know we already started the param list
//Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses
} else {
std::cout << "Is not in scope or not type" << std::endl;
return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")";
}
} else {
//return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")";
return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2]) + ")";
}
} else {
//It's a normal function call, not a special one or a method or anything. Name decorate.
std::vector<NodeTree<ASTData>*> functionDefChildren = children[0]->getChildren();
std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl;
std::string nameDecoration;
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
//Check to see if we're inside of an object and this is a method call
bool isSelfObjectMethod = enclosingObject && contains(enclosingObject->getChildren(), children[0]);
if (isSelfObjectMethod)
output += enclosingObject->getDataRef()->symbol.getName() +"__";
/*HERE*/ output += CifyFunctionName(name) + nameDecoration + "(";
if (isSelfObjectMethod)
output += children.size() > 1 ? "self," : "self";
}
} else {
//This part handles cases where our definition isn't the function definition (that is, it is probabally the return from another function)
//It's probabally the result of an access function call (. or ->) to access an object method.
std::string functionCallSource = generate(children[0], enclosingObject);
if (functionCallSource[functionCallSource.size()-1] == ',') //If it's a member method, it's already started the parameter list.
output += children.size() > 1 ? functionCallSource : functionCallSource.substr(0, functionCallSource.size()-1);
else
output += functionCallSource + "(";
}
output += data.symbol.getName() + "(";
for (int i = 0; i < children.size(); i++)
for (int i = 1; i < children.size(); i++) //children[0] is the declaration
if (i < children.size()-1)
output += generate(children[i]) + ", ";
else output += generate(children[i]);
output += generate(children[i], enclosingObject) + ", ";
else
output += generate(children[i], enclosingObject);
output += ") ";
return output;
}
@@ -94,32 +268,141 @@ std::string CGenerator::generate(NodeTree<ASTData>* from) {
std::cout << "Nothing!" << std::endl;
}
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
output += generate(children[i], enclosingObject);
return output;
}
std::string CGenerator::ValueTypeToCType(ValueType type) {
switch (type) {
std::string CGenerator::generateObjectMethod(NodeTree<ASTData>* enclosingObject, NodeTree<ASTData>* from) {
std::string output;
ASTData data = from->getData();
Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer if we need to
enclosingObjectType.indirection++;
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string nameDecoration, parameters;
for (int i = 0; i < children.size()-1; i++) {
parameters += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]);
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType);
}
output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__"
+ CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType)
+ " self" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can properly handle access to member stuff
return output;
}
std::string CGenerator::ValueTypeToCType(Type *type) {
std::string return_type;
switch (type->baseType) {
case none:
return "none";
if (type->typeDefinition)
return_type = type->typeDefinition->getDataRef()->symbol.getName();
else
return_type = "none";
break;
case void_type:
return_type = "void";
break;
case boolean:
return "bool";
return_type = "bool";
break;
case integer:
return "int";
return_type = "int";
break;
case floating:
return "float";
return_type = "float";
break;
case double_percision:
return "double";
return_type = "double";
break;
case char_string:
return "char*";
case character:
return_type = "char";
break;
default:
return "unknown_ValueType";
return_type = "unknown_ValueType";
break;
}
for (int i = 0; i < type->indirection; i++)
return_type += "*";
return return_type;
}
std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) {
std::string return_type;
switch (type->baseType) {
case none:
if (type->typeDefinition)
return_type = type->typeDefinition->getDataRef()->symbol.getName();
else
return_type = "none";
break;
case void_type:
return_type = "void";
break;
case boolean:
return_type = "bool";
break;
case integer:
return_type = "int";
break;
case floating:
return_type = "float";
break;
case double_percision:
return_type = "double";
break;
case character:
return_type = "char";
break;
default:
return_type = "unknown_ValueType";
break;
}
for (int i = 0; i < type->indirection; i++)
return_type += "_P__";
return return_type;
}
std::string CGenerator::CifyFunctionName(std::string name) {
std::string operatorsToReplace[] = { "+", "plus",
"-", "minus",
"*", "star",
"/", "div",
"%", "mod",
"^", "carat",
"&", "amprsd",
"|", "pipe",
"~", "tilde",
"!", "exclamationpt",
",", "comma",
"=", "equals",
"++", "doubleplus",
"--", "doubleminus",
"<<", "doubleleft",
">>", "doubleright",
"==", "doubleequals",
"!=", "notequals",
"&&", "doubleamprsnd",
"||", "doublepipe",
"+=", "plusequals",
"-=", "minusequals",
"/=", "divequals",
"%=", "modequals",
"^=", "caratequals",
"&=", "amprsdequals",
"|=", "pipeequals",
"*=", "starequals",
"<<=", "doublerightequals",
">>=", "doubleleftequals",
"->", "arrow" };
int length = sizeof(operatorsToReplace)/sizeof(std::string);
//std::cout << "Length is " << length << std::endl;
for (int i = 0; i < length; i+= 2) {
size_t foundPos = name.find(operatorsToReplace[i]);
while(foundPos != std::string::npos) {
name = strSlice(name, 0, foundPos) + "_" + operatorsToReplace[i+1] + "_" + strSlice(name, foundPos+operatorsToReplace[i].length(), -1);
foundPos = name.find(operatorsToReplace[i]);
}
}
return name;
}

View File

@@ -128,3 +128,8 @@ std::string GraphStructuredStack::toString() {
}
return tostring;
}
void GraphStructuredStack::clear() {
gss.clear();
edges.clear();
}

126
src/Importer.cpp Normal file
View File

@@ -0,0 +1,126 @@
#include "Importer.h"
Importer::Importer(Parser* parserIn) {
//constructor
parser = parserIn;
removeSymbols.push_back(Symbol("WS", false));
removeSymbols.push_back(Symbol("\\(", true));
removeSymbols.push_back(Symbol("\\)", true));
removeSymbols.push_back(Symbol("::", true));
removeSymbols.push_back(Symbol(";", true));
removeSymbols.push_back(Symbol("{", true));
removeSymbols.push_back(Symbol("}", true));
removeSymbols.push_back(Symbol("(", true));
removeSymbols.push_back(Symbol(")", true));
removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol
removeSymbols.push_back(Symbol("interpreter_directive", false));
removeSymbols.push_back(Symbol("if", true));
removeSymbols.push_back(Symbol("while", true));
removeSymbols.push_back(Symbol("__if_comp__", true));
removeSymbols.push_back(Symbol("comp_simple_passthrough", true));
removeSymbols.push_back(Symbol("typedef", true));
collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false));
collapseSymbols.push_back(Symbol("opt_parameter_list", false));
collapseSymbols.push_back(Symbol("opt_import_list", false));
collapseSymbols.push_back(Symbol("import_list", false));
collapseSymbols.push_back(Symbol("statement_list", false));
collapseSymbols.push_back(Symbol("parameter_list", false));
collapseSymbols.push_back(Symbol("typed_parameter_list", false));
collapseSymbols.push_back(Symbol("unorderd_list_part", false));
collapseSymbols.push_back(Symbol("if_comp_pred", false));
collapseSymbols.push_back(Symbol("declaration_block", false));
}
Importer::~Importer() {
//destructor
}
NodeTree<ASTData>* Importer::import(std::string fileName) {
//Check to see if we've already done it
if (imported.find(fileName) != imported.end())
return imported[fileName];
std::ifstream programInFile;
std::ofstream outFile, outFileTransformed, outFileAST;
std::string outputName = fileName + "out";
programInFile.open(fileName);
if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << fileName << "\n";
return NULL;
}
outFile.open(outputName);
if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << outputName << "\n";
return NULL;
}
outFileTransformed.open((outputName + ".transformed.dot").c_str());
if (!outFileTransformed.is_open()) {
std::cout << "Probelm opening second output file " << outputName + ".transformed.dot" << "\n";
return NULL;
}
outFileAST.open((outputName + ".AST.dot").c_str());
if (!outFileAST.is_open()) {
std::cout << "Probelm opening second output file " << outputName + ".AST.dot" << "\n";
return NULL;
}
std::string programInputFileString, line;
while(programInFile.good()) {
getline(programInFile, line);
programInputFileString.append(line+"\n");
}
programInFile.close();
//std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl;
outFile << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
}
outFile.close();
//Remove Transformations
for (int i = 0; i < removeSymbols.size(); i++)
parseTree = RemovalTransformation<Symbol>(removeSymbols[i]).transform(parseTree);
//Collapse Transformations
for (int i = 0; i < collapseSymbols.size(); i++)
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
if (parseTree) {
outFileTransformed << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
outFileTransformed.close();
//Call with ourself to allow the transformation to call us to import files that it needs
NodeTree<ASTData>* AST = ASTTransformation(this).transform(parseTree);
if (AST) {
outFileAST << AST->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
}
outFileAST.close();
imported[fileName] = AST;
return AST;
}
std::map<std::string, NodeTree<ASTData>*> Importer::getASTMap() {
return imported;
}

View File

@@ -114,3 +114,7 @@ void Lexer::test() {
std::cout << "Lexer tests passed\n";
}
void Lexer::reset() {
currentPosition = 0;
}

View File

@@ -29,7 +29,12 @@ const bool ParseRule::operator!=(const ParseRule &other) {
}
ParseRule* ParseRule::clone() {
return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) );
std::vector<Symbol>* newLookahead = NULL;
if (lookahead) {
newLookahead = new std::vector<Symbol>();
*newLookahead = *lookahead;
}
return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) );
}
void ParseRule::setLeftHandle(Symbol leftHandle) {

View File

@@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid
Parser::~Parser() {
}
void Parser::exportTable(std::ofstream &file) {
//Do table
table.exportTable(file);
}
void Parser::importTable(char* tableData) {
//Do table
table.importTable(tableData);
return;
}
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
Symbol symbol;
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);
@@ -68,7 +78,7 @@ void Parser::loadGrammer(std::string grammerInputString) {
//Get next token
currToken = reader.word();
}
std::cout << "Parsed!\n";
//std::cout << "Parsed!\n";
// for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
// std::cout << loadedGrammer[i]->toString() << std::endl;
@@ -88,7 +98,7 @@ void Parser::createStateSet() {
std::queue<State*>* toDo = new std::queue<State*>();
toDo->push(zeroState);
//std::cout << "Begining for main set for loop" << std::endl;
while (toDo->front()) {
while (toDo->size()) {
//closure
closure(toDo->front());
//Add the new states
@@ -181,7 +191,7 @@ std::vector<Symbol>* Parser::incrementiveFollowSet(ParseRule* rule) {
}
}
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
//delete symbolFirstSet;
delete symbolFirstSet;
rule->advancePointer();
}
if (rule->isAtEnd()) {
@@ -209,10 +219,13 @@ void Parser::closure(State* state) {
std::vector<ParseRule*>* stateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal->size(); i++) {
ParseRule* currentStateRule = (*stateTotal)[i];
//If it's at it's end, move on. We can't advance it.
if(currentStateRule->isAtEnd())
continue;
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
ParseRule* currentGramRule = loadedGrammer[j]->clone();
if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
@@ -225,6 +238,7 @@ void Parser::closure(State* state) {
//std::cout << (*stateTotal)[k]->toString() << std::endl;
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
delete currentGramRule;
break;
}
}
@@ -311,7 +325,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu
std::string Parser::stateSetToString() {
std::string concat = "";
for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) {
concat += stateSets[i]->toString();
concat += intToString(i) + " is " + stateSets[i]->toString();
}
return concat;
}

View File

@@ -9,6 +9,13 @@ RNGLRParser::~RNGLRParser() {
}
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.clear();
gss.clear();
while(!toReduce.empty()) toReduce.pop();
while(!toShift.empty()) toReduce.pop();
SPPFStepNodes.clear();
nullableParts.clear();
packedMap.clear();
//Check for no tokens
bool accepting = false;
@@ -27,6 +34,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
return new NodeTree<Symbol>();
}
lexer.reset();
lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later.
@@ -42,7 +50,8 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.push_back(currentToken);
}
std::cout << "\nDone with Lexing\n" << std::endl;
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
// std::cout << input[0].toString() << std::endl;
// for (int i = 0; i < input.size(); i++)
@@ -50,13 +59,13 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << std::endl;
std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
//std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
//Frontier 0, new node with state 0
NodeTree<int>* v0 = gss.newNode(0);
gss.addToFrontier(0,v0);
std::cout << "Done setting up new frontier" << std::endl;
//std::cout << "Done setting up new frontier" << std::endl;
std::vector<ParseAction*> firstActions = *(table.get(0, input[0]));
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
@@ -71,17 +80,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << "GSS:\n" << gss.toString() << std::endl;
std::cout << "Starting parse loop" << std::endl;
//std::cout << "Starting parse loop" << std::endl;
for (int i = 0; i < input.size(); i++) {
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) {
std::cout << "Frontier " << i << " is empty." << std::endl;
std::cout << "Failed on " << input[i].toString() << std::endl;
//std::cout << "Frontier " << i << " is empty." << std::endl;
std::cout << "Parsing failed on " << input[i].toString() << std::endl;
std::cout << "Problem is on line: " << findLine(i) << std::endl;
std::cout << "Nearby is:" << std::endl;
int range = 5;
const int range = 10;
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
std::cout << input[j].toString() << " ";
if (j == i)
std::cout << "||*||*||" << input[j].toString() << "||*||*|| ";
else
std::cout << input[j].toString() << " ";
std::cout << std::endl;
break;
}
@@ -98,7 +111,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
shifter(i);
//std::cout << "GSS:\n" << gss.toString() << std::endl;
}
std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
//std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
NodeTree<int>* accState = gss.frontierGetAccState(input.size()-1);
if (accState) {
std::cout << "Accepted!" << std::endl;
@@ -106,7 +119,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
}
std::cout << "Rejected!" << std::endl;
std::cout << "GSS:\n" << gss.toString() << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl;
return NULL;
}
@@ -131,7 +144,7 @@ void RNGLRParser::reducer(int i) {
//The end of the current path
NodeTree<int>* currentReached = currentPath[currentPath.size()-1];
std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
//std::cout << "Getting the shift state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState;
//If reduction length is 0, then we make the new label the appropriate nullable parts
@@ -177,7 +190,7 @@ void RNGLRParser::reducer(int i) {
//std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
std::cout << "Action is " << actions[k]->toString() << std::endl;
//std::cout << "Action is " << actions[k]->toString() << std::endl;
if (actions[k]->action == ParseAction::SHIFT) {
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
} else if (actions[k]->action == ParseAction::REDUCE && fullyReducesToNull(actions[k]->reduceRule)) {
@@ -201,7 +214,7 @@ void RNGLRParser::shifter(int i) {
while (!toShift.empty()) {
std::pair<NodeTree<int>*, int> shift = toShift.front();
toShift.pop();
std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
//std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
NodeTree<int>* shiftTo = gss.inFrontier(i+1, shift.second);
if (shiftTo) {
//std::cout << "State already existed, just adding edge" << std::endl;
@@ -220,7 +233,7 @@ void RNGLRParser::shifter(int i) {
gss.addEdge(shiftTo, shift.first, newLabel);
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
//std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
//Shift
if (actions[j]->action == ParseAction::SHIFT) {
nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState));
@@ -339,11 +352,13 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
//if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
//std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl;
if (!((*stateSets)[j]->basisEquals(*(newStates[i]))))
toDo->push((*stateSets)[j]);
(*stateSets)[j]->combineStates(*(newStates[i]));
//std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl;
addStateReductionsToTable((*stateSets)[j]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
@@ -363,13 +378,15 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
void RNGLRParser::addStateReductionsToTable(State* state) {
std::vector<ParseRule*>* currStateTotal = state->getTotal();
//std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl;
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol>* lookahead = (*currStateTotal)[i]->getLookahead();
if ((*currStateTotal)[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++) {
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
}
//If this has an appropriate ruduction to null, get the reduce trees out
} else if (reducesToNull((*currStateTotal)[i])) {
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
@@ -476,3 +493,14 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
pathEdges.push_back(gss.getEdge(path[i], path[i+1]));
return pathEdges;
}
int RNGLRParser::findLine(int tokenNum) {
int lineNo = 0;
for (int i = 0; i < tokenNum; i++) {
std::string tokenString = input[i].getValue();
for (int j = 0; j < tokenString.size(); j++)
if (tokenString[j] == '\n')
lineNo++;
}
return lineNo;
}

View File

@@ -3,46 +3,55 @@
RegEx::RegEx(std::string inPattern) {
pattern = inPattern;
construct();
deperenthesize();
std::vector<RegExState*> ending;
begin = construct(&ending, inPattern);
//last one is goal state, add it to the end of all of these last states
for (std::vector<RegExState*>::size_type i = 0; i < ending.size(); i++)
ending[i]->addNext(NULL);
}
void RegEx::construct() {
std::vector<RegExState*> previousStates;
std::vector<RegExState*> currentStates;
std::stack<std::pair<std::vector<RegExState*>, std::vector<RegExState*> > > perenStack;
RegExState* RegEx::construct(std::vector<RegExState*>* ending, std::string pattern) {
//In the RegEx re-write, instead of doing complicated unperenthesising, we keep track of both the "front" and the "end" of a state.
//(these could be different if the state is perenthesezed)
std::vector<RegExState*> previousStatesBegin;
std::vector<RegExState*> previousStatesEnd;
std::vector<RegExState*> currentStatesBegin;
std::vector<RegExState*> currentStatesEnd;
bool alternating = false;
begin = new RegExState();
currentStates.push_back(begin);
RegExState* begin = new RegExState();
currentStatesBegin.push_back(begin);
currentStatesEnd.push_back(begin);
for (int i = 0; i < pattern.length(); i++) {
switch (pattern[i]) {
case '*':
{
//std::cout << "Star at " << i << " in " << pattern << std::endl;
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
// currentStates[j]->addNext(currentStates[k]);
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
//add all previous states to current states to enable skipping over the starred item
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '+':
{
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
//OtherThingy
//current->addNext(current);
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
// currentStates[j]->addNext(currentStates[k]);
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
}
break;
case '?':
{
//std::cout << "Question at " << i << " in " << pattern << std::endl;
//add all previous states to current states to enable skipping over the questioned item
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '|':
@@ -57,59 +66,31 @@ void RegEx::construct() {
{
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
//perentheses
//Create a peren node with an inner empty node
RegExState* next = new RegExState(new RegExState());
std::vector<RegExState*> innerEnds;
int perenEnd = findPerenEnd(pattern, i);
RegExState* innerBegin = construct(&innerEnds, strSlice(pattern, i+1, perenEnd));
i = perenEnd;
std::vector<RegExState*> innerBegins = *(innerBegin->getNextStates());
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
previousStates[j]->addNext(next);
//Save both current states here as well as the current preren
std::vector<RegExState*> savePreviousStates = previousStates;
currentStates.push_back(next);
std::vector<RegExState*> saveCurrentStates = currentStates;
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
previousStates.clear();
currentStates.clear();
currentStates.push_back(next->getInner());
alternating = false;
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
previousStatesEnd[j]->addNext(innerBegins[k]);
currentStatesBegin.insert(currentStatesBegin.end(), innerBegins.begin(), innerBegins.end());
currentStatesEnd.insert(currentStatesEnd.end(), innerEnds.begin(), innerEnds.end());
} else {
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
currentStates[j]->addNext(next);
//Save both current states here as well as the current preren
std::vector<RegExState*> savePreviousStates = currentStates;
currentStates.clear();
currentStates.push_back(next);
std::vector<RegExState*> saveCurrentStates = currentStates;
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
previousStates.clear();
currentStates.clear();
currentStates.push_back(next->getInner());
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
currentStatesEnd[j]->addNext(innerBegins[k]);
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin = innerBegins;
currentStatesEnd = innerEnds;
}
//std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
alternating = false;
}
break;
case ')':
{
//std::cout << "End peren at " << i << " in " << pattern << std::endl;
//perentheses
//Pop off the states that will now be the previous states and the peren node which will now be the current node
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
perenStack.pop();
//Make the it so
previousStates = savedPair.first;
//Make sure the end of the inner stuff points back to the peren node
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]);
//currentStates[j]->addNext(*(savedPair.second.end()));
currentStates.clear();
currentStates = savedPair.second;
}
break;
// ) does not need a case as we skip over it after finding it in ('s case
case '\\':
{
@@ -124,109 +105,33 @@ void RegEx::construct() {
RegExState* next = new RegExState(pattern[i]);
//If we're alternating, add next as the next for each previous state, and add self to currentStates
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
previousStates[j]->addNext(next);
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
}
currentStates.push_back(next);
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
previousStatesEnd[j]->addNext(next);
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
alternating = false;
} else {
//If we're not alternating, add next as next for all the current states, make the current states the new
//previous states, and add ourself as the new current state.
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
currentStates[j]->addNext(next);
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
}
previousStates.clear();
previousStates = currentStates;
currentStates.clear();
currentStates.push_back(next);
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
currentStatesEnd[j]->addNext(next);
previousStatesBegin.clear();
previousStatesEnd.clear();
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin.clear();
currentStatesEnd.clear();
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
}
}
}
}
//last one is goal state
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
currentStates[i]->addNext(NULL);
(*ending) = currentStatesEnd;
return(begin);
}
void RegEx::deperenthesize() {
//std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
//Now go through and expand the peren nodes to regular nodes
std::vector<RegExState*> processedStates;
std::vector<RegExState*> statesToProcess;
statesToProcess.push_back(begin);
for (std::vector<RegExState*>::size_type i = 0; i < statesToProcess.size(); i++) {
//Don't process null (sucess) state
if (statesToProcess[i] == NULL)
continue;
std::vector<RegExState*>* nextStates = statesToProcess[i]->getNextStates();
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
//std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
RegExState* perenState = (*nextStates)[j];
(*nextStates)[j] = (*insideNextStates)[0];
//std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
nextStates->push_back((*insideNextStates)[k]);
//std::cout << "Replaced beginning: " << begin->toString() << std::endl;
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
if ((*perenNextNodes)[k] == perenState) {
(*perenNextNodes)[k] = (*insideNextStates)[0];
for (std::vector<RegExState*>::size_type l = 1; l < insideNextStates->size(); l++)
perenNextNodes->push_back((*insideNextStates)[l]);
}
}
//std::cout << "Fixed self-references: " << begin->toString() << std::endl;
//Need to fix the end too
std::vector<RegExState*> traversalList;
traversalList.push_back(perenState->getInner());
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
//std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
//std::cout << "with children:" << std::endl;
//for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
// std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl;
//std::cout << std::endl;
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
//And we now replace this reference with the next nodes from the peren node
//std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
if ((*nextTraversalStates)[l] == perenState) {
// std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
(*nextTraversalStates)[l] = endPerenNextStates[0];
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
nextTraversalStates->push_back(endPerenNextStates[n]);
//Now make sure we don't now try to continue through and end up processing stuff we just replaced the peren reference with
break;
} else {
traversalList.push_back((*nextTraversalStates)[l]);
}
}
}
}
}
//Now add all these next states to process, only if they haven't already been processed
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
bool inCurrStates = false;
for (std::vector<RegExState*>::size_type k = 0; k < statesToProcess.size(); k++) {
if ((*nextStates)[j] == statesToProcess[k])
inCurrStates = true;
}
if (!inCurrStates) {
statesToProcess.push_back((*nextStates)[j]);
//std::cout << (*nextStates)[j] << "Is not in states to process" << std::endl;
}
}
}
//std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
}
RegEx::~RegEx() {
//No cleanup necessary
@@ -310,5 +215,16 @@ void RegEx::test() {
assert(re.longMatch("ab") == 1);
}
{
RegEx re("((ab)|c)*");
assert(re.longMatch("ababc") == 5);
assert(re.longMatch("ad") == 0);
assert(re.longMatch("ababccd") == 6);
}
{
RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ;
assert(re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") == 9);
}
std::cout << "RegEx tests pass\n";
}

View File

@@ -79,12 +79,9 @@ void State::combineStates(State &other) {
std::vector<ParseRule*>* State::getTotal() {
total.clear();
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
total.push_back(basis[i]);
}
for (std::vector<ParseRule*>::size_type i = 0; i < remaining.size(); i++) {
total.push_back(remaining[i]);
}
//std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl;
total.insert(total.begin(), basis.begin(), basis.end());
total.insert(total.end(), remaining.begin(), remaining.end());
return(&total);
}
std::vector<ParseRule*>* State::getBasis() {
@@ -111,6 +108,7 @@ void State::addRuleCombineLookahead(ParseRule* rule) {
if (rule->equalsExceptLookahead(*(total[i]))) {
total[i]->addLookahead(rule->getLookahead());
alreadyIn = true;
break;
}
}
if (!alreadyIn)
@@ -160,4 +158,8 @@ std::vector<State*>* State::getDeepParents(int depth) {
recursiveParents->insert(recursiveParents->end(), recursiveParentsToAdd->begin(), recursiveParentsToAdd->end());
}
return recursiveParents;
}
int State::getNumber() {
return number;
}

View File

@@ -75,7 +75,7 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd)
{
//End of String
end_reached = true;
std::cout << "Reached end of file!\n";
//std::cout << "Reached end of file!\n";
return "";
} else {

View File

@@ -8,6 +8,203 @@ Table::~Table() {
//
}
void Table::exportTable(std::ofstream &file) {
//Save symbolIndexVec
int size = symbolIndexVec.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < symbolIndexVec.size(); i++) {
//Save the name
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = symbolIndexVec[i].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
//Save the actual table
size = table.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < table.size(); i++) {
//each item is a middle vector
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
int middleVectorSize = middleVector->size();
file.write((char*)&middleVectorSize, sizeof(int));
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
int innerVectorSize = 0;
if (innerVector)
innerVectorSize = innerVector->size();
else
innerVectorSize = 0;
file.write((char*)&innerVectorSize, sizeof(int));
for (int k = 0; k < innerVectorSize; k++) {
//Save the type
ParseAction* toSave = (*innerVector)[k];
ParseAction::ActionType actionType = toSave->action;
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
//Save the reduce rule if necessary
if (actionType == ParseAction::REDUCE) {
//Save the reduce rule
ParseRule* rule = toSave->reduceRule;
//int pointer index
int ptrIndx = rule->getIndex();
file.write((char*)&ptrIndx, sizeof(int));
//Symbol leftHandle
Symbol leftHandle = rule->getLeftSide();
//Save the name
std::string symbolName = leftHandle.getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = leftHandle.getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = leftHandle.isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
//std::vector<Symbol>* lookahead;
//Should not need
//std::vector<Symbol> rightSide;
std::vector<Symbol> rightSide = rule->getRightSide();
size = rightSide.size();
//std::cout << leftHandle.toString() << std::endl;
file.write((char*)&size, sizeof(int));
for (int l = 0; l < rightSide.size(); l++) {
//Save the name
symbolName = rightSide[l].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//
//Save the value
symbolValue = rightSide[l].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
//
isTerminal = rightSide[l].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
}
int shiftState = toSave->shiftState;
file.write((char*)&shiftState, sizeof(int));
}
}
}
}
void Table::importTable(char* tableData) {
//Load symbolIndexVec
int size = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < size; i++) {
int stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolName = std::string(tableData);
tableData += stringLen*sizeof(char);
stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolValue = std::string(tableData);
tableData += stringLen*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
}
//Now for the actual table
int tableSize = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < tableSize; i++) {
//each item is a middle vector
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
table.push_back(middleVector);
int middleVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
middleVector->push_back(innerVector);
int innerVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int k = 0; k < innerVectorSize; k++) {
//each item is a ParseRule
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
tableData += sizeof(ParseAction::ActionType);
//If reduce, import the reduce rule
ParseRule* reduceRule = NULL;
if (action == ParseAction::REDUCE) {
int ptrIndx = *((int*)tableData);
tableData += sizeof(int);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleName = std::string(tableData);
tableData += size*sizeof(char);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleValue = std::string(tableData);
tableData += size*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
//right side
std::vector<Symbol> rightSide;
size = *((int*)tableData);
tableData += sizeof(int);
for (int l = 0; l < size; l++) {
int inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolName = std::string(tableData);
tableData += inStringLen*sizeof(char);
inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolValue = std::string(tableData);
tableData += inStringLen*sizeof(char);
bool inIsTerminal = *((bool*)tableData);
tableData += sizeof(bool);
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
}
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
}
int shiftState = *((int*)tableData);
tableData += sizeof(int);
//And push the new action back
if (reduceRule)
innerVector->push_back(new ParseAction(action, reduceRule));
else
innerVector->push_back(new ParseAction(action, shiftState));
}
}
}
}
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
this->EOFSymbol = EOFSymbol;
this->nullSymbol = nullSymbol;
@@ -106,7 +303,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
return NULL;
}
std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
//std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
if (state < 0 || state >= table.size()) {
std::cout << "State bad: " << state << std::endl;
return NULL;
@@ -115,7 +312,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
std::vector<ParseAction*>* action = NULL;
if (symbolIndex < 0 || symbolIndex >= table[state]->size()) {
std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
//std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
} else {
action = (*(table[state]))[symbolIndex];
}
@@ -128,7 +325,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
action->push_back(new ParseAction(ParseAction::ACCEPT));
}
//If ourside the symbol range of this state (same as NULL), reject
//If outside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() ) {
action = new std::vector<ParseAction*>();
action->push_back(new ParseAction(ParseAction::REJECT));
@@ -141,7 +338,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
}
//Otherwise, we have something, so return it
return (action);
return action;
}
ParseAction* Table::getShift(int state, Symbol token) {
@@ -163,8 +360,9 @@ std::string Table::toString() {
concat += "\n";
for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) {
concat += intToString(i) + "\t";
concat += intToString(i) + " is the state\t";
for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) {
concat += "for " + symbolIndexVec[j].toString() + " do ";
if ( (*(table[i]))[j] != NULL) {
for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) {
concat += (*((*(table[i]))[j]))[k]->toString() + "\t";

85
src/Type.cpp Normal file
View File

@@ -0,0 +1,85 @@
#include "Type.h"
Type::Type() {
indirection = 0;
baseType = none;
typeDefinition = NULL;
}
Type::Type(ValueType typeIn) {
indirection = 0;
baseType = typeIn;
typeDefinition = NULL;
}
Type::Type(ValueType typeIn, int indirectionIn) {
indirection = indirectionIn;
baseType = typeIn;
typeDefinition = NULL;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn) {
indirection = 0;
baseType = none;
typeDefinition = typeDefinitionIn;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
indirection = indirectionIn;
baseType = none;
typeDefinition = typeDefinitionIn;
}
Type::Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
baseType = typeIn;
indirection = indirectionIn;
typeDefinition = typeDefinitionIn;
}
Type::~Type() {
}
const bool Type::operator==(const Type &other) const {
return( baseType == other.baseType && indirection == other.indirection && typeDefinition == other.typeDefinition);
}
const bool Type::operator!=(const Type &other) const {
return(!this->operator==(other));
}
std::string Type::toString() {
std::string typeString;
switch (baseType) {
case none:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "none";
break;
case void_type:
typeString = "void";
break;
case boolean:
typeString = "bool";
break;
case integer:
typeString = "int";
break;
case floating:
typeString = "float";
break;
case double_percision:
typeString = "double";
break;
case character:
typeString = "char";
break;
default:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "unknown_type";
}
for (int i = 0; i < indirection; i++)
typeString += "*";
return typeString;
}

View File

@@ -8,7 +8,7 @@ std::string intToString(int theInt) {
std::string replaceExEscape(std::string first, std::string search, std::string replace) {
size_t pos = 0;
while (pos < first.size()-search.size()) {
while (pos <= first.size()-search.size()) {
pos = first.find(search, pos);
if (pos == std::string::npos)
break;
@@ -31,3 +31,44 @@ std::string replaceExEscape(std::string first, std::string search, std::string r
}
return first;
}
//String slicing is crazy useful. substr isn't bad, but slicing with negative indicies is wonderful
std::string strSlice(std::string str, int begin, int end) {
if (begin < 0)
begin += str.length()+1;
if (end < 0)
end += str.length()+1;
return str.substr(begin, end-begin);
}
int findPerenEnd(std::string str, int i) {
int numHangingOpen = 0;
for (; i< str.length(); i++) {
if (str[i] == '(')
numHangingOpen++;
else if (str[i] == ')')
numHangingOpen--;
if (numHangingOpen == 0)
return i;
}
}
std::vector<std::string> split(const std::string &str, char delim) {
std::stringstream ss(str);
std::string word;
std::vector<std::string> splitVec;
while (std::getline(ss, word, delim))
splitVec.push_back(word);
return splitVec;
}
std::string join(const std::vector<std::string> &strVec, std::string joinStr) {
if (strVec.size() == 0)
return "";
std::string joinedStr = strVec[0];
for (int i = 1; i < strVec.size(); i++)
joinedStr += joinStr + strVec[i];
return joinedStr;
}

View File

@@ -0,0 +1,9 @@
This is the true regex for triple quoted strings, but it segfaults my regex code....
triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|
|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ;