kraken/stdlib/grammer.krak

import string
import vector
import set
import symbol
import regex
import io

fun split_into_words(gram_str: string::string): vector::vector<string::string> {
    var out.construct(): vector::vector<string>
    var begin = 0
    for (var i = 0; i < gram_str.length(); i++;) {
        if (gram_str[i] == '#') {
            while(gram_str[i] != '\n') i++
            i++
            io::print("comment: "); io::print(gram_str.slice(begin, i))
            begin = i
        }
        if (gram_str[i] == '"') {
            i++
            while (gram_str[i] != '"') {
                i++
                // if we hit a " we check to see if an odd number of backslashes preceed it
                // (meaning that the " is escaped), and if so, we move on. Otherwise, we found
                // the end of the quoted string
                if (gram_str[i] == '"') {
                    var escaped = 0
                    while (gram_str[i-(1+escaped)] == '\\') escaped++
                    if (escaped % 2)
                        i++
                }
            }
        }
        if (gram_str[i] == ' ') {
            out.add(gram_str.slice(begin, i))
            // allow multiple spaces between words
            while (gram_str[i] == ' ') i++
            begin = i
            i--
        }
        if (gram_str[i] == '\n') {
            if (i != begin)
                out.add(gram_str.slice(begin, i))
            begin = i + 1
        }
    }
    return out
}

fun load_grammer(gram_str: string::string): grammer {
    var gram.construct(): grammer
    var leftSide = symbol::symbol("", false)
    var doLeftSide = true
    var rightSide = vector::vector<symbol::symbol>()
    /*split_into_words(io::read_file(path)).for_each(fun(word: string::string) {*/
        /*io::print("word: "); io::println(word);*/
    /*})*/
    /*return gram*/
    split_into_words(gram_str).for_each(fun(word: string::string) {
        if (word == "=") {
            // do nothing
        } else if (word == "|") {
            gram.rules.add(rule(leftSide, rightSide))
            rightSide = vector::vector<symbol::symbol>()
        } else if (word == ";") {
            gram.rules.add(rule(leftSide, rightSide))
            rightSide = vector::vector<symbol::symbol>()
            doLeftSide = true
        } else {
            if (doLeftSide)
                leftSide = symbol::symbol(word, true)
            else
                if (word[0] == '"') {
                    rightSide.add(symbol::symbol(word.slice(1,-2), true))
                    /*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/
                    gram.regexs.add(regex::regex(word.slice(1,-2)))
                } else {
                    rightSide.add(symbol::symbol(word, false))
                }
            doLeftSide = false
        }
    })
    return gram
}

obj grammer (Object) {
    var rules: vector::vector<rule>
    var regexs: vector::vector<regex::regex>

    fun construct(): *grammer {
        rules.construct()
        regexs.construct()
    }
    fun copy_construct(old: *grammer) {
        rules.copy_construct(&old->rules)
        regexs.copy_construct(&old->regexs)
    }
    fun operator=(other: grammer) {
        destruct()
        copy_construct(&other)
    }
    fun destruct() {
        rules.destruct()
        regexs.destruct()
    }

    fun to_string(): string::string {
        var result = string::string("grammer rules:")
        rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
        result += "\nregexs:"
        regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } )
        return result
    }
}

fun rule(lhs: symbol::symbol, rhs: vector::vector<symbol::symbol>): rule {
    var toRet.construct(): rule
    toRet.lhs = lhs
    toRet.rhs = rhs
    return toRet
}

obj rule (Object) {
    var lhs: symbol::symbol
    var rhs: vector::vector<symbol::symbol>
    var position: int
    var lookahead: set::set<symbol::symbol>

    fun construct(): *rule {
        lhs.construct()
        rhs.construct()
        position = 0
        lookahead.construct()
    }
    fun copy_construct(other: *rule) {
        lhs.copy_construct(&other->lhs)
        rhs.copy_construct(&other->rhs)
        position = other->position
        lookahead.copy_construct(&other->lookahead)
    }
    fun operator=(other: rule) {
        destruct()
        copy_construct(&other)
    }
    fun destruct() {
        lhs.destruct()
        rhs.destruct()
        lookahead.destruct()
    }

    fun to_string(): string::string {
        var result = lhs.name + " -> "
        rhs.for_each( fun(i : symbol::symbol) { result += i.to_string() + ", "; } )
        return result
    }
}