Serilization and caching the table works!

This commit is contained in:
Nathan Braswell
2015-08-26 03:45:34 -04:00
parent b67d5e85fe
commit d72cbdcedb
16 changed files with 276 additions and 25 deletions

4
.gitignore vendored
View File

@@ -12,3 +12,7 @@ kraklist.txt
.*.un~ .*.un~
papers papers
callgrind* callgrind*
*.comp_new
*.comp_bac
bintest.bin
*.dot

View File

@@ -7,6 +7,7 @@ import symbol
import regex import regex
import io import io
import util import util
import serialize
fun split_into_words(gram_str: string::string): vector::vector<string::string> { fun split_into_words(gram_str: string::string): vector::vector<string::string> {
var out.construct(): vector::vector<string> var out.construct(): vector::vector<string>
@@ -100,7 +101,7 @@ fun load_grammer(gram_str: string::string): grammer {
return gram return gram
} }
obj grammer (Object) { obj grammer (Object, Serializable) {
var rules: vector::vector<rule> var rules: vector::vector<rule>
var non_terminals: set::set<symbol::symbol> var non_terminals: set::set<symbol::symbol>
var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>> var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>>
@@ -133,6 +134,20 @@ obj grammer (Object) {
parse_table.destruct() parse_table.destruct()
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(rules) + serialize::serialize(non_terminals) + serialize::serialize(terminals) + serialize::serialize(first_set_map) + serialize::serialize(parse_table)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
// get everything constructed before the assignment
construct()
util::unpack(rules, pos) = serialize::unserialize<vector::vector<rule>>(it, pos)
util::unpack(non_terminals, pos) = serialize::unserialize<set::set<symbol::symbol>>(it, pos)
util::unpack(terminals, pos) = serialize::unserialize<vector::vector<util::pair<symbol::symbol, regex::regex>>>(it, pos)
util::unpack(first_set_map, pos) = serialize::unserialize<map::map<symbol::symbol, set::set<symbol::symbol>>>(it, pos)
util::unpack(parse_table, pos) = serialize::unserialize<table>(it, pos)
return pos
}
fun calculate_first_set() { fun calculate_first_set() {
// the first set of a terminal is itself // the first set of a terminal is itself
terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>) terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>)
@@ -321,12 +336,30 @@ fun rule(lhs: symbol::symbol, rhs: vector::vector<symbol::symbol>): rule {
return toRet return toRet
} }
obj rule (Object) { obj rule (Object, Serializable) {
var lhs: symbol::symbol var lhs: symbol::symbol
var rhs: vector::vector<symbol::symbol> var rhs: vector::vector<symbol::symbol>
var position: int var position: int
var lookahead: set::set<symbol::symbol> var lookahead: set::set<symbol::symbol>
fun serialize(): vector::vector<char> {
return serialize::serialize(lhs) + serialize::serialize(rhs) + serialize::serialize(position) + serialize::serialize(lookahead)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempLhs = symbol::invalid_symbol()
var tempRhs = vector::vector<symbol::symbol>()
var tempLookahead = set::set<symbol::symbol>()
util::unpack(tempLhs, pos) = serialize::unserialize<symbol::symbol>(it, pos)
util::unpack(tempRhs, pos) = serialize::unserialize<vector::vector<symbol::symbol>>(it, pos)
util::unpack(position, pos) = serialize::unserialize<int>(it, pos)
util::unpack(tempLookahead, pos) = serialize::unserialize<set::set<symbol::symbol>>(it, pos)
lhs.copy_construct(&tempLhs)
rhs.copy_construct(&tempRhs)
lookahead.copy_construct(&tempLookahead)
return pos
}
fun construct(): *rule { fun construct(): *rule {
lhs.construct() lhs.construct()
rhs.construct() rhs.construct()
@@ -480,7 +513,7 @@ obj action {
} }
} }
obj table (Object) { obj table (Object, Serializable) {
// a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions // a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions
var items: vector::vector<map::map<symbol::symbol, vector::vector<action>>> var items: vector::vector<map::map<symbol::symbol, vector::vector<action>>>
@@ -497,6 +530,15 @@ obj table (Object) {
fun destruct() { fun destruct() {
items.destruct() items.destruct()
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(items)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<map::map<symbol::symbol, vector::vector<action>>>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<map::map<symbol::symbol, vector::vector<action>>>>(it, pos)
items.copy_construct(&temp)
return pos
}
fun expand_to(include_state: int) { fun expand_to(include_state: int) {
while (include_state >= items.size) while (include_state >= items.size)
items.addEnd(map::map<symbol::symbol, vector::vector<action>>()) items.addEnd(map::map<symbol::symbol, vector::vector<action>>())

View File

@@ -75,6 +75,22 @@ fun print(toPrint: double) : void{
} }
// Ok, just some DEAD simple file io for now // Ok, just some DEAD simple file io for now
fun file_exists(path: string::string): bool {
var char_path = path.toCharArray()
defer delete(char_path)
var result = false
__if_comp__ __C__ {
simple_passthrough(char_path:result:) """
bool result = false;
FILE *fp = fopen(char_path, "r");
if (fp) {
result = true;
fclose(fp);
}
"""
}
return result
}
fun read_file(path: string::string): string::string { fun read_file(path: string::string): string::string {
var toRet.construct(read_file_binary(path)): string::string var toRet.construct(read_file_binary(path)): string::string
return toRet return toRet

View File

@@ -1,5 +1,7 @@
import vector import vector
import io import io
import serialize
import util
fun map<T,U>(): map<T,U> { fun map<T,U>(): map<T,U> {
var toRet.construct(): map<T,U> var toRet.construct(): map<T,U>
@@ -11,13 +13,14 @@ fun map<T,U>(key:T, value:U): map<T,U> {
return toRet return toRet
} }
obj map<T,U> (Object) { obj map<T,U> (Object, Serializable) {
var keys: vector::vector<T> var keys: vector::vector<T>
var values: vector::vector<U> var values: vector::vector<U>
fun construct() { fun construct(): *map<T,U> {
keys.construct() keys.construct()
values.construct() values.construct()
return this
} }
fun copy_construct(old: *map<T,U>) { fun copy_construct(old: *map<T,U>) {
keys.copy_construct(&old->keys) keys.copy_construct(&old->keys)
@@ -31,6 +34,18 @@ obj map<T,U> (Object) {
keys.destruct() keys.destruct()
values.destruct() values.destruct()
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(keys) + serialize::serialize(values)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempKeys = vector::vector<T>()
var tempValues = vector::vector<U>()
util::unpack(tempKeys, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
util::unpack(tempValues, pos) = serialize::unserialize<vector::vector<U>>(it, pos)
keys.copy_construct(&tempKeys)
values.copy_construct(&tempValues)
return pos
}
fun operator[]=(key: T, value: U) { fun operator[]=(key: T, value: U) {
set(key,value) set(key,value)
} }

View File

@@ -454,6 +454,15 @@ obj reduction (Object) {
var nullable_parts: *tree<symbol> var nullable_parts: *tree<symbol>
var label: *tree<symbol> var label: *tree<symbol>
fun construct(): *reduction {
from = null<tree<int>>()
sym = invalid_symbol()
length = -1
nullable_parts = null<tree<symbol>>()
label = null<tree<symbol>>()
return this
}
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction { fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
from = f from = f
sym.copy_construct(&s) sym.copy_construct(&s)

View File

@@ -5,6 +5,7 @@ import mem
import set import set
import util import util
import conversions import conversions
import serialize
fun regex(in: *char):regex { fun regex(in: *char):regex {
return regex(string::string(in)) return regex(string::string(in))
@@ -40,7 +41,7 @@ obj regexState (Object) {
} }
} }
obj regex (Object) { obj regex (Object, Serializable) {
var regexString: string::string var regexString: string::string
var begin: *regexState var begin: *regexState
var referenceCounter: *int var referenceCounter: *int
@@ -85,6 +86,15 @@ obj regex (Object) {
mem::delete(referenceCounter) mem::delete(referenceCounter)
} }
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(regexString)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = string::string()
util::unpack(temp, pos) = serialize::unserialize<string::string>(it, pos)
construct(temp)
return pos
}
fun operator==(other: regex):bool { fun operator==(other: regex):bool {
return regexString == other.regexString return regexString == other.regexString

View File

@@ -16,13 +16,13 @@ fun serialize<T>(it: T): vector::vector<char> {
} }
// dead simple wrapper for ease of use // dead simple wrapper for ease of use
fun unserialize<T>(it: vector::vector<char>): T { fun unserialize<T>(it: ref vector::vector<char>): T {
return unserialize<T>(it, 0).first return unserialize<T>(it, 0).first
} }
fun unserialize<T>(it: vector::vector<char>, pos: int): util::pair<T,int> { fun unserialize<T>(it: ref vector::vector<char>, pos: int): util::pair<T,int> {
return util::make_pair(*conversions::cast_ptr<*char,*T>(it.getBackingMemory()+pos), pos + mem::sizeof<T>()) return util::make_pair(*conversions::cast_ptr<*char,*T>(it.getBackingMemory()+pos), pos + mem::sizeof<T>())
} }
fun unserialize<T(Serializable)>(it: vector::vector<char>, pos: int): util::pair<T,int> { fun unserialize<T(Serializable)>(it: ref vector::vector<char>, pos: int): util::pair<T,int> {
var toRet: T var toRet: T
pos = toRet.unserialize(it, pos) pos = toRet.unserialize(it, pos)
return util::make_pair(toRet, pos) return util::make_pair(toRet, pos)

View File

@@ -1,5 +1,7 @@
import vector import vector
import io import io
import serialize
import util
fun set<T>(): set<T> { fun set<T>(): set<T> {
var toRet.construct() : set<T> var toRet.construct() : set<T>
@@ -18,10 +20,11 @@ fun from_vector<T>(items: vector::vector<T>): set<T> {
return toRet return toRet
} }
obj set<T> (Object) { obj set<T> (Object, Serializable) {
var data: vector::vector<T> var data: vector::vector<T>
fun construct() { fun construct(): *set<T> {
data.construct() data.construct()
return this
} }
fun copy_construct(old: *set<T>) { fun copy_construct(old: *set<T>) {
data.copy_construct(&old->data) data.copy_construct(&old->data)
@@ -30,6 +33,15 @@ obj set<T> (Object) {
destruct() destruct()
copy_construct(&rhs) copy_construct(&rhs)
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<T>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
data.copy_construct(&temp)
return pos
}
fun operator==(rhs: set<T>): bool { fun operator==(rhs: set<T>): bool {
if (size() != rhs.size()) if (size() != rhs.size())
return false return false

View File

@@ -1,4 +1,6 @@
import vector import vector
import serialize
import util
fun stack<T>():stack<T> { fun stack<T>():stack<T> {
@@ -11,7 +13,7 @@ fun stack<T>(in:T):stack<T> {
return out return out
} }
obj stack<T> (Object) { obj stack<T> (Object, Serializable) {
var data: vector::vector<T> var data: vector::vector<T>
fun construct(): *stack<T> { fun construct(): *stack<T> {
data.construct() data.construct()
@@ -26,6 +28,15 @@ obj stack<T> (Object) {
fun operator=(other: ref stack<T>) { fun operator=(other: ref stack<T>) {
data = other.data data = other.data
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<T>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
data.copy_construct(&temp)
return pos
}
fun push(it: ref T) { fun push(it: ref T) {
data.addEnd(it) data.addEnd(it)
} }

View File

@@ -1,6 +1,7 @@
import vector import vector
import util import util
import mem import mem
import serialize
fun to_string(in: int): string { fun to_string(in: int): string {
var dest = mem::new<char>(mem::sizeof<int>() * 8) var dest = mem::new<char>(mem::sizeof<int>() * 8)
@@ -23,8 +24,11 @@ fun string(in:char):string {
out += in out += in
return out return out
} }
fun string():string {
return string("")
}
obj string (Object) { obj string (Object, Serializable) {
var data: vector::vector<char>; var data: vector::vector<char>;
fun construct(): *string { fun construct(): *string {
data.construct(); data.construct();
@@ -69,6 +73,15 @@ obj string (Object) {
data.destruct() data.destruct()
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
construct()
util::unpack(data, pos) = serialize::unserialize<vector::vector<char>>(it, pos)
return pos
}
fun operator[](index: int): ref char { return data[index]; } fun operator[](index: int): ref char { return data[index]; }
fun slice(first: int, second: int): string { fun slice(first: int, second: int): string {
var new.construct(data.slice(first,second)): string var new.construct(data.slice(first,second)): string

View File

@@ -1,4 +1,7 @@
import string import string
import serialize
import vector
import util
fun null_symbol(): symbol { fun null_symbol(): symbol {
var toRet.construct(string::string("$NULL$"), false, string::string("$NULL$")): symbol var toRet.construct(string::string("$NULL$"), false, string::string("$NULL$")): symbol
@@ -33,7 +36,7 @@ fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): sy
return toRet return toRet
} }
obj symbol (Object) { obj symbol (Object, Serializable) {
var data: string::string var data: string::string
var name: string::string var name: string::string
var terminal: bool var terminal: bool
@@ -62,6 +65,19 @@ obj symbol (Object) {
destruct() destruct()
copy_construct(&old) copy_construct(&old)
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempData = string::string()
var tempName = string::string()
util::unpack(tempData, pos) = serialize::unserialize<string::string>(it, pos)
util::unpack(tempName, pos) = serialize::unserialize<string::string>(it, pos)
util::unpack(terminal, pos) = serialize::unserialize<bool>(it, pos)
data.copy_construct(&tempData)
name.copy_construct(&tempName)
return pos
}
fun operator==(other: ref symbol): bool { fun operator==(other: ref symbol): bool {
return data == other.data && name == other.name && terminal == other.terminal; return data == other.data && name == other.name && terminal == other.terminal;
} }

View File

@@ -1,5 +1,6 @@
import mem import mem
import vector import vector
import serialize
fun max<T>(a: T, b: T): T { fun max<T>(a: T, b: T): T {
if (a > b) if (a > b)
@@ -35,7 +36,7 @@ obj unpack_dummy<T,U> {
} }
obj pair<T,U> (Object) { obj pair<T,U> (Object, Serializable) {
var first: T var first: T
var second: U var second: U
@@ -60,6 +61,17 @@ obj pair<T,U> (Object) {
mem::maybe_destruct(&first) mem::maybe_destruct(&first)
mem::maybe_destruct(&second) mem::maybe_destruct(&second)
} }
fun serialize(): vector::vector<char> {
return serialize::serialize(first) + serialize::serialize(second)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
// can't use unpack :( (b/c we can't make an already constructed empty one)
var first_pair = serialize::unserialize<T>(it, pos)
var second_pair = serialize::unserialize<U>(it, first_pair.second)
mem::maybe_copy_construct(&first, &first_pair.first)
mem::maybe_copy_construct(&second, &second_pair.first)
return second_pair.second
}
// the old unnecessary template to prevent generation // the old unnecessary template to prevent generation
// if not used trick (in this case, changing out U with V) // if not used trick (in this case, changing out U with V)

View File

@@ -46,7 +46,7 @@ obj vector<T> (Object, Serializable) {
toRet += serialize(data[i]) toRet += serialize(data[i])
return toRet return toRet
} }
fun unserialize(it: vector<char>, pos: int): int { fun unserialize(it: ref vector<char>, pos: int): int {
unpack(size, pos) = unserialize<int>(it, pos) unpack(size, pos) = unserialize<int>(it, pos)
data = new<T>(size) data = new<T>(size)
available = size available = size

View File

@@ -6,17 +6,81 @@ import string:*
import util:* import util:*
import symbol:* import symbol:*
import tree:* import tree:*
import serialize:*
fun main():int { fun main():int {
var a = load_grammer(read_file(string("../krakenGrammer.kgm"))) var a.construct(): grammer
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
/*var a = load_grammer(read_file(string("grammer2.kgm")))*/ var file_name = string("../krakenGrammer.kgm")
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/ /*var file_name = string("grammer.kgm")*/
/*var a = load_grammer(read_file(string("grammer4.kgm")))*/ /*var file_name = string("grammer2.kgm")*/
/*var file_name = string("grammer3.kgm")*/
/*var file_name = string("grammer4.kgm")*/
var compiled_name = file_name + string(".comp_new")
var file_contents = read_file(file_name)
var loaded_and_valid = false
/*
println("gonna serialize")
var s = serialize(file_contents)
println("gonna write")
write_file_binary(compiled_name, s)
println("gonna read")
var bin = read_file_binary(compiled_name)
println("gonna setup")
var pos = 0
var uns = string()
println("gonna unserialize")
unpack(uns, pos) = unserialize<string>(bin, pos)
println("gonna done")
return 0
*/
/*a = load_grammer(file_contents)*/
/*println("grammer loaded, calculate_state_automaton")*/
/*a.calculate_first_set()*/
/*a.calculate_state_automaton()*/
if (file_exists(compiled_name)) {
println("cached file exists")
var pos = 0
var binary = read_file_binary(compiled_name)
println("read file!")
var cached_contents = string()
println("made tmp string!")
unpack(cached_contents, pos) = unserialize<string>(binary, pos)
println("unserialized the string!")
if (cached_contents == file_contents) {
println("loaded_and_valid, using cached version!")
loaded_and_valid = true
unpack(a, pos) = unserialize<grammer>(binary, pos)
println("finished unserializeing!!")
} else {
println("file contents do not match:")
println("CACHED:")
println(cached_contents)
println("REAL:")
println(file_contents)
println("END")
}
} else {
println("cached file does not exist")
}
if (!loaded_and_valid) {
println("Not loaded_and_valid, re-generating and writing out")
a = load_grammer(file_contents)
println("grammer loaded, calculate_first_set")
a.calculate_first_set()
println("grammer loaded, calculate_state_automaton")
a.calculate_state_automaton()
println("calculated, writing out")
write_file_binary(compiled_name, serialize(file_contents) + serialize(a))
println("done writing")
}
println(a.to_string()) println(a.to_string())
var doFirstSet = fun() { var doFirstSet = fun() {
a.calculate_first_set()
println("///////////////////START FIRST SET/////////////") println("///////////////////START FIRST SET/////////////")
println("//TERMINALS//") println("//TERMINALS//")
a.terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>) { a.terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>) {
@@ -39,7 +103,7 @@ fun main():int {
}) })
println("///////////////////END FIRST SET/////////////") println("///////////////////END FIRST SET/////////////")
} }
doFirstSet() /*doFirstSet()*/
var lex = lexer(a.terminals) var lex = lexer(a.terminals)
@@ -50,8 +114,9 @@ fun main():int {
println("woo lexing:") println("woo lexing:")
/*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ /*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
/*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
println(a.to_string()) /*println(a.to_string())*/
a.calculate_state_automaton()
var parse.construct(a): parser var parse.construct(a): parser
/*var result = parse.parse_input(string("a"), string("fun name"))*/ /*var result = parse.parse_input(string("a"), string("fun name"))*/
var result = parse.parse_input(read_file(string("to_parse.krak")), string("fun name")) var result = parse.parse_input(read_file(string("to_parse.krak")), string("fun name"))

View File

@@ -1,3 +1,6 @@
7 = 7 7 = 7
9 , 11 = 9 , 11 9 , 11 = 9 , 11
1 2 3 4 5 1 2 3 4 5
hello serialize
3 = 2.700000
50 = 3.141590

View File

@@ -4,6 +4,7 @@ import string:*
import util:* import util:*
import vector:* import vector:*
import vector_literals:* import vector_literals:*
import map:*
fun main():int { fun main():int {
var intA = 7 var intA = 7
@@ -43,6 +44,28 @@ fun main():int {
back.for_each(fun(i: int) { print(i); print(" "); }) back.for_each(fun(i: int) { print(i); print(" "); })
println() println()
// ok, lets do a string
write_file_binary(string("bintest.bin"), serialize(string("hello serialize")))
bin = read_file_binary(string("bintest.bin"))
var backStr = string()
pos = 0
unpack(backStr, pos) = unserialize<string>(bin, 0)
println(backStr)
// ok, lets do a map
var m = map(3, 2.7)
m.set(50, 3.14159)
write_file_binary(string("bintest.bin"), serialize(m))
bin = read_file_binary(string("bintest.bin"))
var backM = map<int,double>()
pos = 0
unpack(backM, pos) = unserialize<map<int,double>>(bin, 0)
backM.for_each(fun(key: int, value: double) {
print(key)
print(" = ")
println(value)
})
return 0 return 0
} }