Finally make a clean sweep and delete / organize old files. Add skeleton for LaTeX formal writeup in doc/ and change license (since this is all new code from the past few years) to BSD-2-Clause-Patent
This commit is contained in:
96
working_files/fungll.kp
Normal file
96
working_files/fungll.kp
Normal file
@@ -0,0 +1,96 @@
|
||||
|
||||
(with_import "./collections.kp"
|
||||
(with_import "./rb.kp"
|
||||
(let (
|
||||
|
||||
; Implementing "Purely Functional GLL Parsing"
|
||||
; by L. Thomas van Binsbergena, Elizabeth Scott, Adrian Johnstone
|
||||
; retrived from from http://ltvanbinsbergen.nl/publications/jcola-fgll.pdf
|
||||
|
||||
; discriptor is a triple of grammer-slot and 2 indicies of t-string
|
||||
; corresponding to process
|
||||
; <X::= a.b,l,k>
|
||||
; I previously had this as nonterminal, rule-idx, idx into rule, l,r
|
||||
|
||||
; U - discriptors added to (worklist?), makes sure no duplicates added to "list"
|
||||
; P - binary relation between pairs of commencments and right extants
|
||||
; makes sure that later discoveries that use a sub-non-terminal that has already
|
||||
; been processed can be completed since the sub-non-terminal won't be
|
||||
; re-descended at the same index <s::=.d,k,k>
|
||||
;
|
||||
; a commencement is a pair of a nonterminal and a left extent (the arguemnts to
|
||||
; descend, since that's what we're skipping) to a set of right extants
|
||||
; G - binary relation between commencments and continuations, modified to include
|
||||
; actional continuation.
|
||||
; The normal continuation is a pair of as slot and a left extent.
|
||||
; So <<X,k> -> <g,l>> in G, with a new are is combined to form
|
||||
; discriptor <g,l,r> and BSR <g,l,k,r> whenever k,r are discovered for X
|
||||
; Note we haven't finished things with the above P, since some subs of the form
|
||||
; <s::=.d,k,k> or descriptors that follow them may not have been processed
|
||||
; yet. When new Right extants are discovered, we must add descriptors
|
||||
; <Y::=a's.b',l',r_j> and <X::as.b,l,rj> to R (if not in U) and add
|
||||
; BSR elements <Y::=a's.b',l',k,r_j> and <X::=as.b,l,k,r_j> to Y
|
||||
; Y - Our result BSR set!
|
||||
|
||||
; I've decided, a slot is [X [stff] int-for-dot]
|
||||
|
||||
id (lambda (sigma) sigma)
|
||||
altStart (lambda (t s k c) id)
|
||||
altOp (lambda (p q) (lambda (t s k c) (lcompose (p t s k c) (q t s [] k c))))
|
||||
term_parser (lambda (t [X b i] l k c) (lambda (sigma)
|
||||
(let (this_term (idx b (- i 1))
|
||||
_ (println "term parser looking for " this_term " at position " k " in " t)
|
||||
)
|
||||
(if (and (<= (+ k (len this_term)) (len t)) (= this_term (slice t k (+ k (len this_term))))) ((c [[X b i] l (+ (len this_term) k)]) sigma)
|
||||
sigma))))
|
||||
; the extra lambda layer of indirection is so that
|
||||
; recursive nonterminals can be made with rec-let, which
|
||||
; only works on functions. So both term types get wrapped in
|
||||
; an extra function which is evaluated in seqOp and parse
|
||||
term (lambda (s) (lambda () [ s term_parser ]))
|
||||
|
||||
continue (lambda (BSR_element c) (lambda ([U G P Y])
|
||||
(let (
|
||||
[slot l k r] BSR_element
|
||||
descriptor [slot l r]
|
||||
(X b i) slot
|
||||
Yp (if (or (!= 0 i) (= (len rhs) i)) (set-insert Y BSR_element)
|
||||
Y)
|
||||
Up (set-insert U descriptor)
|
||||
) (if (set-contains? U descriptor) [U G P Yp]
|
||||
((c descriptor) [Up G P Yp])))))
|
||||
seqStart (lambda (t X b l c0) (continue [[X b 0] l l l] c0))
|
||||
seqOp (lambda (p s_q) (lambda (t X b l c0) (let (
|
||||
; see term discussion about extra lambda wrap
|
||||
[s q] (s_q)
|
||||
c1 (lambda ([[X b i] l k]) (let (
|
||||
c2 (lambda ([slot l r]) (continue [slot l k r] c0))
|
||||
) (q t [X b (+ 1 i)] l k c2)))
|
||||
) (p t X (cons s b) l c1))))
|
||||
|
||||
cont_for (lambda (s p) (lambda ([[s d i] k r]) (lambda ([U G P Y]) (let (
|
||||
composed (set-foldl (lambda (cp [g l c]) (lcompose cp (c [g l r]))) id (multimap-get G [s k]))
|
||||
) (composed [U G (multimap-insert P [s k] r) Y])))))
|
||||
nterm_parser (lambda (p) (lambda (t gram_slot l k c) (lambda ([U G P Y])
|
||||
(let (
|
||||
[X b i] gram_slot
|
||||
s (idx b (- i 1))
|
||||
R (multimap-get P [s k])
|
||||
sigmap [U (multimap-insert G [s k] [gram_slot l c]) P Y]
|
||||
) (if (= 0 (size R)) ((p t s k (cont_for s p)) sigmap)
|
||||
(set-foldl (lambda (cp r) (lcompose cp (c [gram_slot l r]))) id R)
|
||||
)))))
|
||||
; see term discussion about extra lambda wrap
|
||||
nterm (lambda (s p) (lambda () [ s (nterm_parser p) ]))
|
||||
parse (lambda (s_f) (lambda (t)
|
||||
(let (
|
||||
; see term discussion about extra lambda wrap
|
||||
[s f] (s_f)
|
||||
X '__FUNGLL_UNIQUE_START_SYMBOL__
|
||||
sigma [ set-empty multimap-empty multimap-empty set-empty ]
|
||||
c (lambda (descriptor) (lambda (sigma) sigma))
|
||||
[U G P Y] ((f t ['X [s] 1] 0 0 c) sigma)
|
||||
) (set-foldl cons [] Y))))
|
||||
)
|
||||
(provide altStart altOp term seqStart seqOp nterm parse)
|
||||
)))
|
||||
Reference in New Issue
Block a user