Serilization and caching the table works!

This commit is contained in:
Nathan Braswell
2015-08-26 03:45:34 -04:00
parent b67d5e85fe
commit d72cbdcedb
16 changed files with 276 additions and 25 deletions

4
.gitignore vendored
View File

@@ -12,3 +12,7 @@ kraklist.txt
.*.un~
papers
callgrind*
*.comp_new
*.comp_bac
bintest.bin
*.dot

View File

@@ -7,6 +7,7 @@ import symbol
import regex
import io
import util
import serialize
fun split_into_words(gram_str: string::string): vector::vector<string::string> {
var out.construct(): vector::vector<string>
@@ -100,7 +101,7 @@ fun load_grammer(gram_str: string::string): grammer {
return gram
}
obj grammer (Object) {
obj grammer (Object, Serializable) {
var rules: vector::vector<rule>
var non_terminals: set::set<symbol::symbol>
var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>>
@@ -133,6 +134,20 @@ obj grammer (Object) {
parse_table.destruct()
}
fun serialize(): vector::vector<char> {
return serialize::serialize(rules) + serialize::serialize(non_terminals) + serialize::serialize(terminals) + serialize::serialize(first_set_map) + serialize::serialize(parse_table)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
// get everything constructed before the assignment
construct()
util::unpack(rules, pos) = serialize::unserialize<vector::vector<rule>>(it, pos)
util::unpack(non_terminals, pos) = serialize::unserialize<set::set<symbol::symbol>>(it, pos)
util::unpack(terminals, pos) = serialize::unserialize<vector::vector<util::pair<symbol::symbol, regex::regex>>>(it, pos)
util::unpack(first_set_map, pos) = serialize::unserialize<map::map<symbol::symbol, set::set<symbol::symbol>>>(it, pos)
util::unpack(parse_table, pos) = serialize::unserialize<table>(it, pos)
return pos
}
fun calculate_first_set() {
// the first set of a terminal is itself
terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>)
@@ -321,12 +336,30 @@ fun rule(lhs: symbol::symbol, rhs: vector::vector<symbol::symbol>): rule {
return toRet
}
obj rule (Object) {
obj rule (Object, Serializable) {
var lhs: symbol::symbol
var rhs: vector::vector<symbol::symbol>
var position: int
var lookahead: set::set<symbol::symbol>
fun serialize(): vector::vector<char> {
return serialize::serialize(lhs) + serialize::serialize(rhs) + serialize::serialize(position) + serialize::serialize(lookahead)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempLhs = symbol::invalid_symbol()
var tempRhs = vector::vector<symbol::symbol>()
var tempLookahead = set::set<symbol::symbol>()
util::unpack(tempLhs, pos) = serialize::unserialize<symbol::symbol>(it, pos)
util::unpack(tempRhs, pos) = serialize::unserialize<vector::vector<symbol::symbol>>(it, pos)
util::unpack(position, pos) = serialize::unserialize<int>(it, pos)
util::unpack(tempLookahead, pos) = serialize::unserialize<set::set<symbol::symbol>>(it, pos)
lhs.copy_construct(&tempLhs)
rhs.copy_construct(&tempRhs)
lookahead.copy_construct(&tempLookahead)
return pos
}
fun construct(): *rule {
lhs.construct()
rhs.construct()
@@ -480,7 +513,7 @@ obj action {
}
}
obj table (Object) {
obj table (Object, Serializable) {
// a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions
var items: vector::vector<map::map<symbol::symbol, vector::vector<action>>>
@@ -497,6 +530,15 @@ obj table (Object) {
fun destruct() {
items.destruct()
}
fun serialize(): vector::vector<char> {
return serialize::serialize(items)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<map::map<symbol::symbol, vector::vector<action>>>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<map::map<symbol::symbol, vector::vector<action>>>>(it, pos)
items.copy_construct(&temp)
return pos
}
fun expand_to(include_state: int) {
while (include_state >= items.size)
items.addEnd(map::map<symbol::symbol, vector::vector<action>>())

View File

@@ -75,6 +75,22 @@ fun print(toPrint: double) : void{
}
// Ok, just some DEAD simple file io for now
fun file_exists(path: string::string): bool {
var char_path = path.toCharArray()
defer delete(char_path)
var result = false
__if_comp__ __C__ {
simple_passthrough(char_path:result:) """
bool result = false;
FILE *fp = fopen(char_path, "r");
if (fp) {
result = true;
fclose(fp);
}
"""
}
return result
}
fun read_file(path: string::string): string::string {
var toRet.construct(read_file_binary(path)): string::string
return toRet

View File

@@ -1,5 +1,7 @@
import vector
import io
import serialize
import util
fun map<T,U>(): map<T,U> {
var toRet.construct(): map<T,U>
@@ -11,13 +13,14 @@ fun map<T,U>(key:T, value:U): map<T,U> {
return toRet
}
obj map<T,U> (Object) {
obj map<T,U> (Object, Serializable) {
var keys: vector::vector<T>
var values: vector::vector<U>
fun construct() {
fun construct(): *map<T,U> {
keys.construct()
values.construct()
return this
}
fun copy_construct(old: *map<T,U>) {
keys.copy_construct(&old->keys)
@@ -31,6 +34,18 @@ obj map<T,U> (Object) {
keys.destruct()
values.destruct()
}
fun serialize(): vector::vector<char> {
return serialize::serialize(keys) + serialize::serialize(values)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempKeys = vector::vector<T>()
var tempValues = vector::vector<U>()
util::unpack(tempKeys, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
util::unpack(tempValues, pos) = serialize::unserialize<vector::vector<U>>(it, pos)
keys.copy_construct(&tempKeys)
values.copy_construct(&tempValues)
return pos
}
fun operator[]=(key: T, value: U) {
set(key,value)
}

View File

@@ -454,6 +454,15 @@ obj reduction (Object) {
var nullable_parts: *tree<symbol>
var label: *tree<symbol>
fun construct(): *reduction {
from = null<tree<int>>()
sym = invalid_symbol()
length = -1
nullable_parts = null<tree<symbol>>()
label = null<tree<symbol>>()
return this
}
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
from = f
sym.copy_construct(&s)

View File

@@ -5,6 +5,7 @@ import mem
import set
import util
import conversions
import serialize
fun regex(in: *char):regex {
return regex(string::string(in))
@@ -40,7 +41,7 @@ obj regexState (Object) {
}
}
obj regex (Object) {
obj regex (Object, Serializable) {
var regexString: string::string
var begin: *regexState
var referenceCounter: *int
@@ -85,6 +86,15 @@ obj regex (Object) {
mem::delete(referenceCounter)
}
}
fun serialize(): vector::vector<char> {
return serialize::serialize(regexString)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = string::string()
util::unpack(temp, pos) = serialize::unserialize<string::string>(it, pos)
construct(temp)
return pos
}
fun operator==(other: regex):bool {
return regexString == other.regexString

View File

@@ -16,13 +16,13 @@ fun serialize<T>(it: T): vector::vector<char> {
}
// dead simple wrapper for ease of use
fun unserialize<T>(it: vector::vector<char>): T {
fun unserialize<T>(it: ref vector::vector<char>): T {
return unserialize<T>(it, 0).first
}
fun unserialize<T>(it: vector::vector<char>, pos: int): util::pair<T,int> {
fun unserialize<T>(it: ref vector::vector<char>, pos: int): util::pair<T,int> {
return util::make_pair(*conversions::cast_ptr<*char,*T>(it.getBackingMemory()+pos), pos + mem::sizeof<T>())
}
fun unserialize<T(Serializable)>(it: vector::vector<char>, pos: int): util::pair<T,int> {
fun unserialize<T(Serializable)>(it: ref vector::vector<char>, pos: int): util::pair<T,int> {
var toRet: T
pos = toRet.unserialize(it, pos)
return util::make_pair(toRet, pos)

View File

@@ -1,5 +1,7 @@
import vector
import io
import serialize
import util
fun set<T>(): set<T> {
var toRet.construct() : set<T>
@@ -18,10 +20,11 @@ fun from_vector<T>(items: vector::vector<T>): set<T> {
return toRet
}
obj set<T> (Object) {
obj set<T> (Object, Serializable) {
var data: vector::vector<T>
fun construct() {
fun construct(): *set<T> {
data.construct()
return this
}
fun copy_construct(old: *set<T>) {
data.copy_construct(&old->data)
@@ -30,6 +33,15 @@ obj set<T> (Object) {
destruct()
copy_construct(&rhs)
}
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<T>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
data.copy_construct(&temp)
return pos
}
fun operator==(rhs: set<T>): bool {
if (size() != rhs.size())
return false

View File

@@ -1,4 +1,6 @@
import vector
import serialize
import util
fun stack<T>():stack<T> {
@@ -11,7 +13,7 @@ fun stack<T>(in:T):stack<T> {
return out
}
obj stack<T> (Object) {
obj stack<T> (Object, Serializable) {
var data: vector::vector<T>
fun construct(): *stack<T> {
data.construct()
@@ -26,6 +28,15 @@ obj stack<T> (Object) {
fun operator=(other: ref stack<T>) {
data = other.data
}
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var temp = vector::vector<T>()
util::unpack(temp, pos) = serialize::unserialize<vector::vector<T>>(it, pos)
data.copy_construct(&temp)
return pos
}
fun push(it: ref T) {
data.addEnd(it)
}

View File

@@ -1,6 +1,7 @@
import vector
import util
import mem
import serialize
fun to_string(in: int): string {
var dest = mem::new<char>(mem::sizeof<int>() * 8)
@@ -23,8 +24,11 @@ fun string(in:char):string {
out += in
return out
}
fun string():string {
return string("")
}
obj string (Object) {
obj string (Object, Serializable) {
var data: vector::vector<char>;
fun construct(): *string {
data.construct();
@@ -69,6 +73,15 @@ obj string (Object) {
data.destruct()
}
fun serialize(): vector::vector<char> {
return serialize::serialize(data)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
construct()
util::unpack(data, pos) = serialize::unserialize<vector::vector<char>>(it, pos)
return pos
}
fun operator[](index: int): ref char { return data[index]; }
fun slice(first: int, second: int): string {
var new.construct(data.slice(first,second)): string

View File

@@ -1,4 +1,7 @@
import string
import serialize
import vector
import util
fun null_symbol(): symbol {
var toRet.construct(string::string("$NULL$"), false, string::string("$NULL$")): symbol
@@ -33,7 +36,7 @@ fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): sy
return toRet
}
obj symbol (Object) {
obj symbol (Object, Serializable) {
var data: string::string
var name: string::string
var terminal: bool
@@ -62,6 +65,19 @@ obj symbol (Object) {
destruct()
copy_construct(&old)
}
fun serialize(): vector::vector<char> {
return serialize::serialize(data) + serialize::serialize(name) + serialize::serialize(terminal)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
var tempData = string::string()
var tempName = string::string()
util::unpack(tempData, pos) = serialize::unserialize<string::string>(it, pos)
util::unpack(tempName, pos) = serialize::unserialize<string::string>(it, pos)
util::unpack(terminal, pos) = serialize::unserialize<bool>(it, pos)
data.copy_construct(&tempData)
name.copy_construct(&tempName)
return pos
}
fun operator==(other: ref symbol): bool {
return data == other.data && name == other.name && terminal == other.terminal;
}

View File

@@ -1,5 +1,6 @@
import mem
import vector
import serialize
fun max<T>(a: T, b: T): T {
if (a > b)
@@ -35,7 +36,7 @@ obj unpack_dummy<T,U> {
}
obj pair<T,U> (Object) {
obj pair<T,U> (Object, Serializable) {
var first: T
var second: U
@@ -60,6 +61,17 @@ obj pair<T,U> (Object) {
mem::maybe_destruct(&first)
mem::maybe_destruct(&second)
}
fun serialize(): vector::vector<char> {
return serialize::serialize(first) + serialize::serialize(second)
}
fun unserialize(it: ref vector::vector<char>, pos: int): int {
// can't use unpack :( (b/c we can't make an already constructed empty one)
var first_pair = serialize::unserialize<T>(it, pos)
var second_pair = serialize::unserialize<U>(it, first_pair.second)
mem::maybe_copy_construct(&first, &first_pair.first)
mem::maybe_copy_construct(&second, &second_pair.first)
return second_pair.second
}
// the old unnecessary template to prevent generation
// if not used trick (in this case, changing out U with V)

View File

@@ -46,7 +46,7 @@ obj vector<T> (Object, Serializable) {
toRet += serialize(data[i])
return toRet
}
fun unserialize(it: vector<char>, pos: int): int {
fun unserialize(it: ref vector<char>, pos: int): int {
unpack(size, pos) = unserialize<int>(it, pos)
data = new<T>(size)
available = size

View File

@@ -6,17 +6,81 @@ import string:*
import util:*
import symbol:*
import tree:*
import serialize:*
fun main():int {
var a.construct(): grammer
var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
/*var a = load_grammer(read_file(string("grammer2.kgm")))*/
/*var a = load_grammer(read_file(string("grammer3.kgm")))*/
/*var a = load_grammer(read_file(string("grammer4.kgm")))*/
var file_name = string("../krakenGrammer.kgm")
/*var file_name = string("grammer.kgm")*/
/*var file_name = string("grammer2.kgm")*/
/*var file_name = string("grammer3.kgm")*/
/*var file_name = string("grammer4.kgm")*/
var compiled_name = file_name + string(".comp_new")
var file_contents = read_file(file_name)
var loaded_and_valid = false
/*
println("gonna serialize")
var s = serialize(file_contents)
println("gonna write")
write_file_binary(compiled_name, s)
println("gonna read")
var bin = read_file_binary(compiled_name)
println("gonna setup")
var pos = 0
var uns = string()
println("gonna unserialize")
unpack(uns, pos) = unserialize<string>(bin, pos)
println("gonna done")
return 0
*/
/*a = load_grammer(file_contents)*/
/*println("grammer loaded, calculate_state_automaton")*/
/*a.calculate_first_set()*/
/*a.calculate_state_automaton()*/
if (file_exists(compiled_name)) {
println("cached file exists")
var pos = 0
var binary = read_file_binary(compiled_name)
println("read file!")
var cached_contents = string()
println("made tmp string!")
unpack(cached_contents, pos) = unserialize<string>(binary, pos)
println("unserialized the string!")
if (cached_contents == file_contents) {
println("loaded_and_valid, using cached version!")
loaded_and_valid = true
unpack(a, pos) = unserialize<grammer>(binary, pos)
println("finished unserializeing!!")
} else {
println("file contents do not match:")
println("CACHED:")
println(cached_contents)
println("REAL:")
println(file_contents)
println("END")
}
} else {
println("cached file does not exist")
}
if (!loaded_and_valid) {
println("Not loaded_and_valid, re-generating and writing out")
a = load_grammer(file_contents)
println("grammer loaded, calculate_first_set")
a.calculate_first_set()
println("grammer loaded, calculate_state_automaton")
a.calculate_state_automaton()
println("calculated, writing out")
write_file_binary(compiled_name, serialize(file_contents) + serialize(a))
println("done writing")
}
println(a.to_string())
var doFirstSet = fun() {
a.calculate_first_set()
println("///////////////////START FIRST SET/////////////")
println("//TERMINALS//")
a.terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>) {
@@ -39,7 +103,7 @@ fun main():int {
})
println("///////////////////END FIRST SET/////////////")
}
doFirstSet()
/*doFirstSet()*/
var lex = lexer(a.terminals)
@@ -50,8 +114,9 @@ fun main():int {
println("woo lexing:")
/*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
/*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
println(a.to_string())
a.calculate_state_automaton()
/*println(a.to_string())*/
var parse.construct(a): parser
/*var result = parse.parse_input(string("a"), string("fun name"))*/
var result = parse.parse_input(read_file(string("to_parse.krak")), string("fun name"))

View File

@@ -1,3 +1,6 @@
7 = 7
9 , 11 = 9 , 11
1 2 3 4 5
hello serialize
3 = 2.700000
50 = 3.141590

View File

@@ -4,6 +4,7 @@ import string:*
import util:*
import vector:*
import vector_literals:*
import map:*
fun main():int {
var intA = 7
@@ -43,6 +44,28 @@ fun main():int {
back.for_each(fun(i: int) { print(i); print(" "); })
println()
// ok, lets do a string
write_file_binary(string("bintest.bin"), serialize(string("hello serialize")))
bin = read_file_binary(string("bintest.bin"))
var backStr = string()
pos = 0
unpack(backStr, pos) = unserialize<string>(bin, 0)
println(backStr)
// ok, lets do a map
var m = map(3, 2.7)
m.set(50, 3.14159)
write_file_binary(string("bintest.bin"), serialize(m))
bin = read_file_binary(string("bintest.bin"))
var backM = map<int,double>()
pos = 0
unpack(backM, pos) = unserialize<map<int,double>>(bin, 0)
backM.for_each(fun(key: int, value: double) {
print(key)
print(" = ")
println(value)
})
return 0
}