diff --git a/.gitignore b/.gitignore index 880a271..619c07f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ kraklist.txt .*.un~ papers callgrind* +*.comp_new +*.comp_bac +bintest.bin +*.dot diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index b6ac06b..babc9d1 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -7,6 +7,7 @@ import symbol import regex import io import util +import serialize fun split_into_words(gram_str: string::string): vector::vector { var out.construct(): vector::vector @@ -100,7 +101,7 @@ fun load_grammer(gram_str: string::string): grammer { return gram } -obj grammer (Object) { +obj grammer (Object, Serializable) { var rules: vector::vector var non_terminals: set::set var terminals: vector::vector> @@ -133,6 +134,20 @@ obj grammer (Object) { parse_table.destruct() } + fun serialize(): vector::vector { + return serialize::serialize(rules) + serialize::serialize(non_terminals) + serialize::serialize(terminals) + serialize::serialize(first_set_map) + serialize::serialize(parse_table) + } + fun unserialize(it: ref vector::vector, pos: int): int { + // get everything constructed before the assignment + construct() + util::unpack(rules, pos) = serialize::unserialize>(it, pos) + util::unpack(non_terminals, pos) = serialize::unserialize>(it, pos) + util::unpack(terminals, pos) = serialize::unserialize>>(it, pos) + util::unpack(first_set_map, pos) = serialize::unserialize>>(it, pos) + util::unpack(parse_table, pos) = serialize::unserialize(it, pos) + return pos + } + fun calculate_first_set() { // the first set of a terminal is itself terminals.for_each( fun(terminal: util::pair) @@ -321,12 +336,30 @@ fun rule(lhs: symbol::symbol, rhs: vector::vector): rule { return toRet } -obj rule (Object) { +obj rule (Object, Serializable) { var lhs: symbol::symbol var rhs: vector::vector var position: int var lookahead: set::set + fun serialize(): vector::vector { + return serialize::serialize(lhs) + serialize::serialize(rhs) + serialize::serialize(position) + serialize::serialize(lookahead) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var tempLhs = symbol::invalid_symbol() + var tempRhs = vector::vector() + var tempLookahead = set::set() + util::unpack(tempLhs, pos) = serialize::unserialize(it, pos) + util::unpack(tempRhs, pos) = serialize::unserialize>(it, pos) + util::unpack(position, pos) = serialize::unserialize(it, pos) + util::unpack(tempLookahead, pos) = serialize::unserialize>(it, pos) + + lhs.copy_construct(&tempLhs) + rhs.copy_construct(&tempRhs) + lookahead.copy_construct(&tempLookahead) + return pos + } + fun construct(): *rule { lhs.construct() rhs.construct() @@ -480,7 +513,7 @@ obj action { } } -obj table (Object) { +obj table (Object, Serializable) { // a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions var items: vector::vector>> @@ -497,6 +530,15 @@ obj table (Object) { fun destruct() { items.destruct() } + fun serialize(): vector::vector { + return serialize::serialize(items) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var temp = vector::vector>>() + util::unpack(temp, pos) = serialize::unserialize>>>(it, pos) + items.copy_construct(&temp) + return pos + } fun expand_to(include_state: int) { while (include_state >= items.size) items.addEnd(map::map>()) diff --git a/stdlib/io.krak b/stdlib/io.krak index f573404..f43645d 100644 --- a/stdlib/io.krak +++ b/stdlib/io.krak @@ -75,6 +75,22 @@ fun print(toPrint: double) : void{ } // Ok, just some DEAD simple file io for now +fun file_exists(path: string::string): bool { + var char_path = path.toCharArray() + defer delete(char_path) + var result = false + __if_comp__ __C__ { + simple_passthrough(char_path:result:) """ + bool result = false; + FILE *fp = fopen(char_path, "r"); + if (fp) { + result = true; + fclose(fp); + } + """ + } + return result +} fun read_file(path: string::string): string::string { var toRet.construct(read_file_binary(path)): string::string return toRet diff --git a/stdlib/map.krak b/stdlib/map.krak index 36adf6b..8eee76e 100644 --- a/stdlib/map.krak +++ b/stdlib/map.krak @@ -1,5 +1,7 @@ import vector import io +import serialize +import util fun map(): map { var toRet.construct(): map @@ -11,13 +13,14 @@ fun map(key:T, value:U): map { return toRet } -obj map (Object) { +obj map (Object, Serializable) { var keys: vector::vector var values: vector::vector - fun construct() { + fun construct(): *map { keys.construct() values.construct() + return this } fun copy_construct(old: *map) { keys.copy_construct(&old->keys) @@ -31,6 +34,18 @@ obj map (Object) { keys.destruct() values.destruct() } + fun serialize(): vector::vector { + return serialize::serialize(keys) + serialize::serialize(values) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var tempKeys = vector::vector() + var tempValues = vector::vector() + util::unpack(tempKeys, pos) = serialize::unserialize>(it, pos) + util::unpack(tempValues, pos) = serialize::unserialize>(it, pos) + keys.copy_construct(&tempKeys) + values.copy_construct(&tempValues) + return pos + } fun operator[]=(key: T, value: U) { set(key,value) } diff --git a/stdlib/parser.krak b/stdlib/parser.krak index 9b5ef9e..4bfe4cd 100644 --- a/stdlib/parser.krak +++ b/stdlib/parser.krak @@ -454,6 +454,15 @@ obj reduction (Object) { var nullable_parts: *tree var label: *tree + fun construct(): *reduction { + from = null>() + sym = invalid_symbol() + length = -1 + nullable_parts = null>() + label = null>() + return this + } + fun construct(f: *tree, s: symbol, l: int, n: *tree, labelIn:*tree): *reduction { from = f sym.copy_construct(&s) diff --git a/stdlib/regex.krak b/stdlib/regex.krak index f205b61..e903ac6 100644 --- a/stdlib/regex.krak +++ b/stdlib/regex.krak @@ -5,6 +5,7 @@ import mem import set import util import conversions +import serialize fun regex(in: *char):regex { return regex(string::string(in)) @@ -40,7 +41,7 @@ obj regexState (Object) { } } -obj regex (Object) { +obj regex (Object, Serializable) { var regexString: string::string var begin: *regexState var referenceCounter: *int @@ -85,6 +86,15 @@ obj regex (Object) { mem::delete(referenceCounter) } } + fun serialize(): vector::vector { + return serialize::serialize(regexString) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var temp = string::string() + util::unpack(temp, pos) = serialize::unserialize(it, pos) + construct(temp) + return pos + } fun operator==(other: regex):bool { return regexString == other.regexString diff --git a/stdlib/serialize.krak b/stdlib/serialize.krak index fcc92b6..3fbb6c0 100644 --- a/stdlib/serialize.krak +++ b/stdlib/serialize.krak @@ -16,13 +16,13 @@ fun serialize(it: T): vector::vector { } // dead simple wrapper for ease of use -fun unserialize(it: vector::vector): T { +fun unserialize(it: ref vector::vector): T { return unserialize(it, 0).first } -fun unserialize(it: vector::vector, pos: int): util::pair { +fun unserialize(it: ref vector::vector, pos: int): util::pair { return util::make_pair(*conversions::cast_ptr<*char,*T>(it.getBackingMemory()+pos), pos + mem::sizeof()) } -fun unserialize(it: vector::vector, pos: int): util::pair { +fun unserialize(it: ref vector::vector, pos: int): util::pair { var toRet: T pos = toRet.unserialize(it, pos) return util::make_pair(toRet, pos) diff --git a/stdlib/set.krak b/stdlib/set.krak index 4f07ee2..395bba8 100644 --- a/stdlib/set.krak +++ b/stdlib/set.krak @@ -1,5 +1,7 @@ import vector import io +import serialize +import util fun set(): set { var toRet.construct() : set @@ -18,10 +20,11 @@ fun from_vector(items: vector::vector): set { return toRet } -obj set (Object) { +obj set (Object, Serializable) { var data: vector::vector - fun construct() { + fun construct(): *set { data.construct() + return this } fun copy_construct(old: *set) { data.copy_construct(&old->data) @@ -30,6 +33,15 @@ obj set (Object) { destruct() copy_construct(&rhs) } + fun serialize(): vector::vector { + return serialize::serialize(data) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var temp = vector::vector() + util::unpack(temp, pos) = serialize::unserialize>(it, pos) + data.copy_construct(&temp) + return pos + } fun operator==(rhs: set): bool { if (size() != rhs.size()) return false diff --git a/stdlib/stack.krak b/stdlib/stack.krak index c3b7aff..d0b733b 100644 --- a/stdlib/stack.krak +++ b/stdlib/stack.krak @@ -1,4 +1,6 @@ import vector +import serialize +import util fun stack():stack { @@ -11,7 +13,7 @@ fun stack(in:T):stack { return out } -obj stack (Object) { +obj stack (Object, Serializable) { var data: vector::vector fun construct(): *stack { data.construct() @@ -26,6 +28,15 @@ obj stack (Object) { fun operator=(other: ref stack) { data = other.data } + fun serialize(): vector::vector { + return serialize::serialize(data) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var temp = vector::vector() + util::unpack(temp, pos) = serialize::unserialize>(it, pos) + data.copy_construct(&temp) + return pos + } fun push(it: ref T) { data.addEnd(it) } diff --git a/stdlib/string.krak b/stdlib/string.krak index ebbe0e0..459c203 100644 --- a/stdlib/string.krak +++ b/stdlib/string.krak @@ -1,6 +1,7 @@ import vector import util import mem +import serialize fun to_string(in: int): string { var dest = mem::new(mem::sizeof() * 8) @@ -23,8 +24,11 @@ fun string(in:char):string { out += in return out } +fun string():string { + return string("") +} -obj string (Object) { +obj string (Object, Serializable) { var data: vector::vector; fun construct(): *string { data.construct(); @@ -69,6 +73,15 @@ obj string (Object) { data.destruct() } + fun serialize(): vector::vector { + return serialize::serialize(data) + } + fun unserialize(it: ref vector::vector, pos: int): int { + construct() + util::unpack(data, pos) = serialize::unserialize>(it, pos) + return pos + } + fun operator[](index: int): ref char { return data[index]; } fun slice(first: int, second: int): string { var new.construct(data.slice(first,second)): string diff --git a/stdlib/symbol.krak b/stdlib/symbol.krak index 6769469..32f6702 100644 --- a/stdlib/symbol.krak +++ b/stdlib/symbol.krak @@ -1,4 +1,7 @@ import string +import serialize +import vector +import util fun null_symbol(): symbol { var toRet.construct(string::string("$NULL$"), false, string::string("$NULL$")): symbol @@ -33,7 +36,7 @@ fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): sy return toRet } -obj symbol (Object) { +obj symbol (Object, Serializable) { var data: string::string var name: string::string var terminal: bool @@ -62,6 +65,19 @@ obj symbol (Object) { destruct() copy_construct(&old) } + fun serialize(): vector::vector { + return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal) + } + fun unserialize(it: ref vector::vector, pos: int): int { + var tempData = string::string() + var tempName = string::string() + util::unpack(tempData, pos) = serialize::unserialize(it, pos) + util::unpack(tempName, pos) = serialize::unserialize(it, pos) + util::unpack(terminal, pos) = serialize::unserialize(it, pos) + data.copy_construct(&tempData) + name.copy_construct(&tempName) + return pos + } fun operator==(other: ref symbol): bool { return data == other.data && name == other.name && terminal == other.terminal; } diff --git a/stdlib/util.krak b/stdlib/util.krak index d8a4342..30e84ce 100644 --- a/stdlib/util.krak +++ b/stdlib/util.krak @@ -1,5 +1,6 @@ import mem import vector +import serialize fun max(a: T, b: T): T { if (a > b) @@ -35,7 +36,7 @@ obj unpack_dummy { } -obj pair (Object) { +obj pair (Object, Serializable) { var first: T var second: U @@ -60,6 +61,17 @@ obj pair (Object) { mem::maybe_destruct(&first) mem::maybe_destruct(&second) } + fun serialize(): vector::vector { + return serialize::serialize(first) + serialize::serialize(second) + } + fun unserialize(it: ref vector::vector, pos: int): int { + // can't use unpack :( (b/c we can't make an already constructed empty one) + var first_pair = serialize::unserialize(it, pos) + var second_pair = serialize::unserialize(it, first_pair.second) + mem::maybe_copy_construct(&first, &first_pair.first) + mem::maybe_copy_construct(&second, &second_pair.first) + return second_pair.second + } // the old unnecessary template to prevent generation // if not used trick (in this case, changing out U with V) diff --git a/stdlib/vector.krak b/stdlib/vector.krak index e7f5e70..45dea66 100644 --- a/stdlib/vector.krak +++ b/stdlib/vector.krak @@ -46,7 +46,7 @@ obj vector (Object, Serializable) { toRet += serialize(data[i]) return toRet } - fun unserialize(it: vector, pos: int): int { + fun unserialize(it: ref vector, pos: int): int { unpack(size, pos) = unserialize(it, pos) data = new(size) available = size diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index d6cb4d0..8164b31 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -6,17 +6,81 @@ import string:* import util:* import symbol:* import tree:* +import serialize:* fun main():int { + + var a.construct(): grammer - var a = load_grammer(read_file(string("../krakenGrammer.kgm"))) - /*var a = load_grammer(read_file(string("grammer.kgm")))*/ - /*var a = load_grammer(read_file(string("grammer2.kgm")))*/ - /*var a = load_grammer(read_file(string("grammer3.kgm")))*/ - /*var a = load_grammer(read_file(string("grammer4.kgm")))*/ + var file_name = string("../krakenGrammer.kgm") + /*var file_name = string("grammer.kgm")*/ + /*var file_name = string("grammer2.kgm")*/ + /*var file_name = string("grammer3.kgm")*/ + /*var file_name = string("grammer4.kgm")*/ + + var compiled_name = file_name + string(".comp_new") + var file_contents = read_file(file_name) + var loaded_and_valid = false + +/* + println("gonna serialize") + var s = serialize(file_contents) + println("gonna write") + write_file_binary(compiled_name, s) + println("gonna read") + var bin = read_file_binary(compiled_name) + println("gonna setup") + var pos = 0 + var uns = string() + println("gonna unserialize") + unpack(uns, pos) = unserialize(bin, pos) + println("gonna done") + + return 0 + */ + + /*a = load_grammer(file_contents)*/ + /*println("grammer loaded, calculate_state_automaton")*/ + /*a.calculate_first_set()*/ + /*a.calculate_state_automaton()*/ + if (file_exists(compiled_name)) { + println("cached file exists") + var pos = 0 + var binary = read_file_binary(compiled_name) + println("read file!") + var cached_contents = string() + println("made tmp string!") + unpack(cached_contents, pos) = unserialize(binary, pos) + println("unserialized the string!") + if (cached_contents == file_contents) { + println("loaded_and_valid, using cached version!") + loaded_and_valid = true + unpack(a, pos) = unserialize(binary, pos) + println("finished unserializeing!!") + } else { + println("file contents do not match:") + println("CACHED:") + println(cached_contents) + println("REAL:") + println(file_contents) + println("END") + } + } else { + println("cached file does not exist") + } + if (!loaded_and_valid) { + println("Not loaded_and_valid, re-generating and writing out") + a = load_grammer(file_contents) + println("grammer loaded, calculate_first_set") + a.calculate_first_set() + println("grammer loaded, calculate_state_automaton") + a.calculate_state_automaton() + println("calculated, writing out") + write_file_binary(compiled_name, serialize(file_contents) + serialize(a)) + println("done writing") + } println(a.to_string()) var doFirstSet = fun() { - a.calculate_first_set() println("///////////////////START FIRST SET/////////////") println("//TERMINALS//") a.terminals.for_each( fun(terminal: util::pair) { @@ -39,7 +103,7 @@ fun main():int { }) println("///////////////////END FIRST SET/////////////") } - doFirstSet() + /*doFirstSet()*/ var lex = lexer(a.terminals) @@ -50,8 +114,9 @@ fun main():int { println("woo lexing:") /*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ - println(a.to_string()) - a.calculate_state_automaton() + /*println(a.to_string())*/ + + var parse.construct(a): parser /*var result = parse.parse_input(string("a"), string("fun name"))*/ var result = parse.parse_input(read_file(string("to_parse.krak")), string("fun name")) diff --git a/tests/test_serialization.expected_results b/tests/test_serialization.expected_results index 8117860..a1a3893 100644 --- a/tests/test_serialization.expected_results +++ b/tests/test_serialization.expected_results @@ -1,3 +1,6 @@ 7 = 7 9 , 11 = 9 , 11 1 2 3 4 5 +hello serialize +3 = 2.700000 +50 = 3.141590 diff --git a/tests/test_serialization.krak b/tests/test_serialization.krak index e9cdc20..3523335 100644 --- a/tests/test_serialization.krak +++ b/tests/test_serialization.krak @@ -4,6 +4,7 @@ import string:* import util:* import vector:* import vector_literals:* +import map:* fun main():int { var intA = 7 @@ -43,6 +44,28 @@ fun main():int { back.for_each(fun(i: int) { print(i); print(" "); }) println() + // ok, lets do a string + write_file_binary(string("bintest.bin"), serialize(string("hello serialize"))) + bin = read_file_binary(string("bintest.bin")) + var backStr = string() + pos = 0 + unpack(backStr, pos) = unserialize(bin, 0) + println(backStr) + + // ok, lets do a map + var m = map(3, 2.7) + m.set(50, 3.14159) + write_file_binary(string("bintest.bin"), serialize(m)) + bin = read_file_binary(string("bintest.bin")) + var backM = map() + pos = 0 + unpack(backM, pos) = unserialize>(bin, 0) + backM.for_each(fun(key: int, value: double) { + print(key) + print(" = ") + println(value) + }) + return 0 }