From a84e2ee6e1f270710de31cef7114c99508209a5f Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sat, 29 Aug 2015 21:45:55 -0400 Subject: [PATCH] faster deserilitation, super basic enums --- include/ASTData.h | 2 +- include/CGenerator.h | 2 +- krakenGrammer.kgm | 11 ++++--- src/ASTTransformation.cpp | 27 +++++++++++++++-- src/CGenerator.cpp | 36 +++++++++++++++++++---- src/Importer.cpp | 4 +-- stdlib/grammer.krak | 51 ++++++++++++++++++++------------- stdlib/map.krak | 11 ++++--- stdlib/set.krak | 7 ++--- stdlib/stack.krak | 7 ++--- stdlib/string.krak | 6 ++-- stdlib/symbol.krak | 11 ++++--- stdlib/util.krak | 2 +- tests/test_adt.expected_results | 1 + tests/test_adt.krak | 16 +++++++++++ 15 files changed, 134 insertions(+), 60 deletions(-) create mode 100644 tests/test_adt.expected_results create mode 100644 tests/test_adt.krak diff --git a/include/ASTData.h b/include/ASTData.h index 2d80ffe..12bf354 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -14,7 +14,7 @@ class Type; #define NULL ((void*)0) #endif -enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, type_def, +enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, type_def, adt_def, function, code_block, typed_parameter, expression, boolean_expression, statement, if_statement, while_loop, for_loop, return_statement, break_statement, continue_statement, defer_statement, assignment_statement, declaration_statement, if_comp, simple_passthrough, passthrough_params, diff --git a/include/CGenerator.h b/include/CGenerator.h index 0086690..d342b12 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -27,7 +27,7 @@ class CGenerator { CGenerator(); ~CGenerator(); void generateCompSet(std::map*> ASTs, std::string outputName); - std::string generateClassStruct(NodeTree* from); + std::string generateTypeStruct(NodeTree* from); bool isUnderTranslationUnit(NodeTree* from, NodeTree* typeDefinition); NodeTree* highestScope(NodeTree* node); std::pair generateTranslationUnit(std::string name, std::map*> ASTs); diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index ff82dfe..d6bcc94 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -1,6 +1,6 @@ Goal = translation_unit ; translation_unit = WS unorderd_list_part WS ; -unorderd_list_part = import WS unorderd_list_part | function WS unorderd_list_part | type_def line_end WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement line_end WS unorderd_list_part | import | function | type_def line_end | if_comp | simple_passthrough | declaration_statement line_end ; +unorderd_list_part = import WS unorderd_list_part | function WS unorderd_list_part | type_def line_end WS unorderd_list_part | adt_def line_end WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement line_end WS unorderd_list_part | import | function | type_def line_end | adt_def line_end | if_comp | simple_passthrough | declaration_statement line_end ; type = "ref" WS pre_reffed | pre_reffed ; pre_reffed = "\*" WS pre_reffed | "void" | "int" | "float" | "double" | "char" | scoped_identifier | scoped_identifier WS template_inst | function_type ; @@ -15,8 +15,7 @@ template_dec = "<" WS template_param_list WS ">" ; template_param_list = template_param_list WS "," WS template_param | template_param ; template_param = identifier WS traits | identifier ; -import = "import" WS identifier line_end | "import" WS identifier WS ":" WS "\*" line_end | "import" WS identifier WS ":" WS import_list line_end ; -import_list = identifier | identifier WS "," WS import_list ; +import = "import" WS identifier line_end | "import" WS identifier WS ":" WS "\*" line_end | "import" WS identifier WS ":" WS identifier_list line_end ; # all for optional semicolons line_break = " @@ -58,6 +57,7 @@ triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i #identifier = alpha_alphanumeric ; identifier = augmented_alpha_alphanumeric ; +identifier_list = identifier | identifier WS "," WS identifier_list ; scope_op = ":" ":" ; scoped_identifier = scoped_identifier WS scope_op WS identifier | identifier ; @@ -88,6 +88,9 @@ declaration_block = declaration_statement line_end WS declaration_block | functi traits = "\(" WS trait_list WS "\)" ; trait_list = trait_list WS "," WS scoped_identifier | scoped_identifier ; +adt_nonterm = "adt" ; +adt_def = adt_nonterm WS identifier WS "{" WS identifier_list WS "}" ; + if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement | "if" WS "\(" WS boolean_expression WS "\)" WS statement WS "else" WS statement ; while_loop = "while" WS boolean_expression WS statement ; @@ -128,7 +131,7 @@ float_end = "(0|1|2|3|4|5|6|7|8|9)+" | "(0|1|2|3|4|5|6|7|8|9)+f" | "(0|1|2|3|4|5 bool = "true" | "false" ; character = "'(`|1|2|3|4|5|6|7|8|9|0|-|=|(\\t)|q|w|e|r|t|y|u|i|o|p|[|]|(\\\\)|a|s|d|f|g|h|j|k|l|;|'|(\\n)|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| |(\\0))'" ; -keywords_also_identifiers = "obj" | "def" | "fun" | "var" ; +keywords_also_identifiers = "obj" | "def" | "fun" | "var" | "adt" ; alpha_alphanumeric = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_|0|1|2|3|4|5|6|7|8|9)*" ; augmented_alpha_alphanumeric = alpha_alphanumeric augmented_alpha_alphanumeric | keywords_also_identifiers augmented_alpha_alphanumeric | alpha_alphanumeric | keywords_also_identifiers ; diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index f0cd0bf..ab4fe71 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -68,7 +68,7 @@ std::vector*> ASTTransformation::getNodes(std::string lookup, s return results; } -//First pass defines all type_defs (objects and ailises), and if_comp/simple_passthrough +//First pass defines all type_defs (objects and ailises), ADTs, and if_comp/simple_passthrough NodeTree* ASTTransformation::firstPass(std::string fileName, NodeTree* parseTree) { NodeTree* translationUnit = new NodeTree("translation_unit", ASTData(translation_unit, Symbol(fileName, false))); std::vector*> children = parseTree->getChildren(); @@ -97,6 +97,12 @@ NodeTree* ASTTransformation::firstPass(std::string fileName, NodeTreegetDataRef()->valueType = new Type(firstDec); } + } else if (i->getDataRef()->getName() == "adt_def") { + std::string name = concatSymbolTree(i->getChildren()[0]); + NodeTree* adt_dec = addToScope("~enclosing_scope", translationUnit, new NodeTree("adt_def", ASTData(adt_def, Symbol(name, true, name)))); + addToScope(name, adt_dec, translationUnit); + translationUnit->addChild(adt_dec); + adt_dec->getDataRef()->valueType = new Type(adt_dec); } else if (i->getDataRef()->getName() == "if_comp") { std::cout << "IF COMP" << std::endl; NodeTree* newNode = addToScope("~enclosing_scope", translationUnit, new NodeTree(i->getDataRef()->getName(), ASTData(if_comp))); @@ -158,7 +164,7 @@ std::set ASTTransformation::parseTraits(NodeTree* traitsNod return traits; } -//Second pass defines data inside objects, outside declaration statements, and function prototypes (since we have type_defs now) +//Second pass defines data inside objects + ADTs, outside declaration statements, and function prototypes (since we have type_defs+ADTs now) void ASTTransformation::secondPass(NodeTree* ast, NodeTree* parseTree) { topScope = ast; //Top scope is maintained for templates, which need to add themselves to the top scope from where ever they are instantiated std::vector*> children = parseTree->getChildren(); @@ -186,6 +192,19 @@ void ASTTransformation::secondPass(NodeTree* ast, NodeTree* par } //Do the inside of classes here secondPassDoClassInsides(typeDef, typedefChildren, std::map()); + } else if (i->getDataRef()->getName() == "adt_def") { + std::string name = concatSymbolTree(i->getChildren()[0]); + NodeTree* adtDef = ast->getDataRef()->scope[name][0]; //No overloaded types (besides uninstantiated templates, which can have multiple versions based on types or specilizations) + for (NodeTree* j : i->getChildren()) { + if (j->getDataRef()->getName() == "identifier") { + std::string ident_name = concatSymbolTree(j); + std::cout << "add ing " << ident_name << " to " << name << " for ADT" << std::endl; + NodeTree* enum_variant_identifier = new NodeTree("identifier", ASTData(identifier, Symbol(ident_name, true), adtDef->getDataRef()->valueType)); + adtDef->addChild(enum_variant_identifier); + addToScope(ident_name, enum_variant_identifier, adtDef); + addToScope("~enclosing_scope", adtDef, enum_variant_identifier); + } + } } else if (i->getDataRef()->getName() == "function") { //Do prototypes of functions ast->addChild(secondPassFunction(i, ast, std::map())); @@ -280,6 +299,8 @@ void ASTTransformation::thirdPass(NodeTree* ast, NodeTree* pars thirdPassFunction(j, searchScopeForFunctionDef(typeDef, j, std::map()), std::map()); //do member method } } + } else if (i->getDataRef()->getName() == "adt_def") { + // nothing to do here yet, but eventually we will set up our internal objs, etc } else if (i->getDataRef()->getName() == "function") { //Do prototypes of functions if (i->getChildren()[1]->getData().getName() == "template_dec") @@ -1009,7 +1030,7 @@ NodeTree* ASTTransformation::functionLookup(NodeTree* scope, s if (possibleMatches.size()) { for (auto i : possibleMatches) { //We're not looking for types - if (i->getDataRef()->type == type_def) + if (i->getDataRef()->type == type_def || i->getDataRef()->type == adt_def) continue; Type* functionType = i->getDataRef()->valueType; diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 8db8686..a8eb38c 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -51,15 +51,20 @@ std::string CGenerator::getID() { return intToString(id++); } -std::string CGenerator::generateClassStruct(NodeTree* from) { +std::string CGenerator::generateTypeStruct(NodeTree* from) { auto data = from->getData(); auto children = from->getChildren(); - std::string objectString = "struct __struct_dummy_" + scopePrefix(from) + CifyName(data.symbol.getName()) + "__ {\n"; + std::string objectString; + if (data.type == type_def) + objectString = "struct __struct_dummy_"; + else if (data.type == adt_def) + objectString = "enum __adt_dummy_"; + objectString += scopePrefix(from) + CifyName(data.symbol.getName()) + "__ {\n"; tabLevel++; - for (int i = 0; i < children.size(); i++) { + for (int i = (data.type == adt_def ? 1 : 0); i < children.size(); i++) { //std::cout << children[i]->getName() << std::endl; if (children[i]->getName() != "function") - objectString += tabs() + generate(children[i], nullptr).oneString() + "\n"; + objectString += tabs() + generate(children[i], nullptr).oneString() + (data.type == adt_def ? ",\n" : "\n"); } tabLevel--; objectString += "};"; @@ -161,12 +166,15 @@ std::pair CGenerator::generateTranslationUnit(std::str typedefPoset.addRelationship(children[i], decType->typeDefinition); // Add a dependency } } + } else if (children[i]->getDataRef()->type == adt_def) { + // + typedefPoset.addVertex(children[i]); // We add this definition by itself just in case there are no dependencies. } } } //Now generate the typedef's in the correct, topological order for (NodeTree* i : typedefPoset.getTopoSort()) - classStructs += generateClassStruct(i) + "\n"; + classStructs += generateTypeStruct(i) + "\n"; // Declare everything in translation unit scope here (now for ALL translation units). (allows stuff from other files, automatic forward declarations) // Also, everything in all of the import's scopes @@ -253,6 +261,24 @@ std::pair CGenerator::generateTranslationUnit(std::str functionDefinitions += objectFunctionDefinitions + "/* Done with " + declarationData.symbol.getName() + " */\n"; } break; + case adt_def: + { + //type + plainTypedefs += "/* adt " + declarationData.symbol.getName() + " */\n"; + //plainTypedefs += "typedef struct __adt_dummy_" + + plainTypedefs += "typedef enum __adt_dummy_" + + scopePrefix(declaration) + CifyName(declarationData.symbol.getName()) + "__ " + + scopePrefix(declaration) + CifyName(declarationData.symbol.getName()) + ";\n"; + // We use a seperate string for this because we only include it if this is the file we're defined in + std::string enumString = "/* Enum Definition for " + declarationData.symbol.getName() + " */\n"; + // skip the name of the thing + for (int j = 1; j < decChildren.size(); j++) { + std::cout << decChildren[j]->getName() << std::endl; + if (decChildren[j]->getName() == "identifier") //If object method and not template + enumString += "an_option \n"; + } + break; + } default: //std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; cOutput += "/*unknown declaration named " + declaration->getName() + "*/\n"; diff --git a/src/Importer.cpp b/src/Importer.cpp index 7b99f98..a721f85 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -38,6 +38,7 @@ Importer::Importer(Parser* parserIn, std::vector includePaths, std: removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); removeSymbols.push_back(Symbol("def_nonterm", false)); removeSymbols.push_back(Symbol("obj_nonterm", false)); + removeSymbols.push_back(Symbol("adt_nonterm", false)); removeSymbols.push_back(Symbol("template", true)); removeSymbols.push_back(Symbol("\\|", true)); //collapseSymbols.push_back(Symbol("scoped_identifier", false)); @@ -45,8 +46,7 @@ Importer::Importer(Parser* parserIn, std::vector includePaths, std: collapseSymbols.push_back(Symbol("param_assign_list", false)); collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false)); collapseSymbols.push_back(Symbol("opt_parameter_list", false)); - collapseSymbols.push_back(Symbol("opt_import_list", false)); - collapseSymbols.push_back(Symbol("import_list", false)); + collapseSymbols.push_back(Symbol("identifier_list", false)); collapseSymbols.push_back(Symbol("statement_list", false)); collapseSymbols.push_back(Symbol("parameter_list", false)); collapseSymbols.push_back(Symbol("typed_parameter_list", false)); diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index babc9d1..58ecdc3 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -139,12 +139,19 @@ obj grammer (Object, Serializable) { } fun unserialize(it: ref vector::vector, pos: int): int { // get everything constructed before the assignment - construct() - util::unpack(rules, pos) = serialize::unserialize>(it, pos) - util::unpack(non_terminals, pos) = serialize::unserialize>(it, pos) - util::unpack(terminals, pos) = serialize::unserialize>>(it, pos) - util::unpack(first_set_map, pos) = serialize::unserialize>>(it, pos) - util::unpack(parse_table, pos) = serialize::unserialize(it, pos) + /*construct()*/ + /*util::unpack(rules, pos) = serialize::unserialize>(it, pos)*/ + /*util::unpack(non_terminals, pos) = serialize::unserialize>(it, pos)*/ + /*util::unpack(terminals, pos) = serialize::unserialize>>(it, pos)*/ + /*util::unpack(first_set_map, pos) = serialize::unserialize>>(it, pos)*/ + /*util::unpack(parse_table, pos) = serialize::unserialize
(it, pos)*/ + + // do it in place. Actually looks nicer too + pos = rules.unserialize(it, pos) + pos = non_terminals.unserialize(it, pos) + pos = terminals.unserialize(it, pos) + pos = first_set_map.unserialize(it, pos) + pos = parse_table.unserialize(it, pos) return pos } @@ -346,18 +353,22 @@ obj rule (Object, Serializable) { return serialize::serialize(lhs) + serialize::serialize(rhs) + serialize::serialize(position) + serialize::serialize(lookahead) } fun unserialize(it: ref vector::vector, pos: int): int { - var tempLhs = symbol::invalid_symbol() - var tempRhs = vector::vector() - var tempLookahead = set::set() - util::unpack(tempLhs, pos) = serialize::unserialize(it, pos) - util::unpack(tempRhs, pos) = serialize::unserialize>(it, pos) - util::unpack(position, pos) = serialize::unserialize(it, pos) - util::unpack(tempLookahead, pos) = serialize::unserialize>(it, pos) + /*var tempLhs = symbol::invalid_symbol()*/ + /*var tempRhs = vector::vector()*/ + /*var tempLookahead = set::set()*/ + /*util::unpack(tempLhs, pos) = serialize::unserialize(it, pos)*/ + /*util::unpack(tempRhs, pos) = serialize::unserialize>(it, pos)*/ + /*util::unpack(position, pos) = serialize::unserialize(it, pos)*/ + /*util::unpack(tempLookahead, pos) = serialize::unserialize>(it, pos)*/ - lhs.copy_construct(&tempLhs) - rhs.copy_construct(&tempRhs) - lookahead.copy_construct(&tempLookahead) - return pos + /*lhs.copy_construct(&tempLhs)*/ + /*rhs.copy_construct(&tempRhs)*/ + /*lookahead.copy_construct(&tempLookahead)*/ + + pos = lhs.unserialize(it, pos) + pos = rhs.unserialize(it, pos) + util::unpack(position, pos) = serialize::unserialize(it, pos) + return lookahead.unserialize(it, pos) } fun construct(): *rule { @@ -534,9 +545,9 @@ obj table (Object, Serializable) { return serialize::serialize(items) } fun unserialize(it: ref vector::vector, pos: int): int { - var temp = vector::vector>>() - util::unpack(temp, pos) = serialize::unserialize>>>(it, pos) - items.copy_construct(&temp) + /*construct()*/ + /*util::unpack(items, pos) = serialize::unserialize>>>(it, pos)*/ + pos = items.unserialize(it, pos) return pos } fun expand_to(include_state: int) { diff --git a/stdlib/map.krak b/stdlib/map.krak index 8eee76e..491b591 100644 --- a/stdlib/map.krak +++ b/stdlib/map.krak @@ -38,12 +38,11 @@ obj map (Object, Serializable) { return serialize::serialize(keys) + serialize::serialize(values) } fun unserialize(it: ref vector::vector, pos: int): int { - var tempKeys = vector::vector() - var tempValues = vector::vector() - util::unpack(tempKeys, pos) = serialize::unserialize>(it, pos) - util::unpack(tempValues, pos) = serialize::unserialize>(it, pos) - keys.copy_construct(&tempKeys) - values.copy_construct(&tempValues) + /*construct()*/ + /*util::unpack(keys, pos) = serialize::unserialize>(it, pos)*/ + /*util::unpack(values, pos) = serialize::unserialize>(it, pos)*/ + pos = keys.unserialize(it, pos) + pos = values.unserialize(it, pos) return pos } fun operator[]=(key: T, value: U) { diff --git a/stdlib/set.krak b/stdlib/set.krak index 395bba8..b88ac4a 100644 --- a/stdlib/set.krak +++ b/stdlib/set.krak @@ -37,10 +37,9 @@ obj set (Object, Serializable) { return serialize::serialize(data) } fun unserialize(it: ref vector::vector, pos: int): int { - var temp = vector::vector() - util::unpack(temp, pos) = serialize::unserialize>(it, pos) - data.copy_construct(&temp) - return pos + /*construct()*/ + /*util::unpack(data, pos) = serialize::unserialize>(it, pos)*/ + return data.unserialize(it, pos) } fun operator==(rhs: set): bool { if (size() != rhs.size()) diff --git a/stdlib/stack.krak b/stdlib/stack.krak index d0b733b..7ae6992 100644 --- a/stdlib/stack.krak +++ b/stdlib/stack.krak @@ -32,10 +32,9 @@ obj stack (Object, Serializable) { return serialize::serialize(data) } fun unserialize(it: ref vector::vector, pos: int): int { - var temp = vector::vector() - util::unpack(temp, pos) = serialize::unserialize>(it, pos) - data.copy_construct(&temp) - return pos + /*construct()*/ + /*util::unpack(data, pos) = serialize::unserialize>(it, pos)*/ + return data.unserialize(it, pos) } fun push(it: ref T) { data.addEnd(it) diff --git a/stdlib/string.krak b/stdlib/string.krak index 459c203..1c59867 100644 --- a/stdlib/string.krak +++ b/stdlib/string.krak @@ -77,9 +77,9 @@ obj string (Object, Serializable) { return serialize::serialize(data) } fun unserialize(it: ref vector::vector, pos: int): int { - construct() - util::unpack(data, pos) = serialize::unserialize>(it, pos) - return pos + /*construct()*/ + /*util::unpack(data, pos) = serialize::unserialize>(it, pos)*/ + return data.unserialize(it, pos) } fun operator[](index: int): ref char { return data[index]; } diff --git a/stdlib/symbol.krak b/stdlib/symbol.krak index 32f6702..9dd5cd6 100644 --- a/stdlib/symbol.krak +++ b/stdlib/symbol.krak @@ -69,13 +69,12 @@ obj symbol (Object, Serializable) { return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal) } fun unserialize(it: ref vector::vector, pos: int): int { - var tempData = string::string() - var tempName = string::string() - util::unpack(tempData, pos) = serialize::unserialize(it, pos) - util::unpack(tempName, pos) = serialize::unserialize(it, pos) + /*construct()*/ + /*util::unpack(data, pos) = serialize::unserialize(it, pos)*/ + /*util::unpack(name, pos) = serialize::unserialize(it, pos)*/ + pos = data.unserialize(it, pos) + pos = name.unserialize(it, pos) util::unpack(terminal, pos) = serialize::unserialize(it, pos) - data.copy_construct(&tempData) - name.copy_construct(&tempName) return pos } fun operator==(other: ref symbol): bool { diff --git a/stdlib/util.krak b/stdlib/util.krak index 30e84ce..e6b4962 100644 --- a/stdlib/util.krak +++ b/stdlib/util.krak @@ -40,7 +40,7 @@ obj pair (Object, Serializable) { var first: T var second: U - fun construct(firstIn: T, secondIn: U): *pair { + fun construct(firstIn: ref T, secondIn: ref U): *pair { mem::maybe_copy_construct(&first, &firstIn) mem::maybe_copy_construct(&second, &secondIn) return this diff --git a/tests/test_adt.expected_results b/tests/test_adt.expected_results new file mode 100644 index 0000000..64e7fa5 --- /dev/null +++ b/tests/test_adt.expected_results @@ -0,0 +1 @@ +option1 diff --git a/tests/test_adt.krak b/tests/test_adt.krak new file mode 100644 index 0000000..7993e70 --- /dev/null +++ b/tests/test_adt.krak @@ -0,0 +1,16 @@ +import io:* + +adt options { + option0, + option1 +} + +fun main():int { + var it: options = options::option1 + if (it == options::option0) + println("nope") + if (it == options::option1) + println("option1") + return 0 +} +