2021-08-03 00:56:07 -04:00
|
|
|
|
|
|
|
|
(with_import "./collections.kp"
|
|
|
|
|
(with_import "./rb.kp"
|
|
|
|
|
(let (
|
2021-08-03 01:10:10 -04:00
|
|
|
|
|
|
|
|
; Implementing "Purely Functional GLL Parsing"
|
|
|
|
|
; by L. Thomas van Binsbergena, Elizabeth Scott, Adrian Johnstone
|
|
|
|
|
; retrived from from http://ltvanbinsbergen.nl/publications/jcola-fgll.pdf
|
2021-08-03 00:56:07 -04:00
|
|
|
|
|
|
|
|
; discriptor is a triple of grammer-slot and 2 indicies of t-string
|
|
|
|
|
; corresponding to process
|
|
|
|
|
; <X::= a.b,l,k>
|
|
|
|
|
; I previously had this as nonterminal, rule-idx, idx into rule, l,r
|
|
|
|
|
|
|
|
|
|
; U - discriptors added to (worklist?), makes sure no duplicates added to "list"
|
|
|
|
|
; P - binary relation between pairs of commencments and right extants
|
|
|
|
|
; makes sure that later discoveries that use a sub-non-terminal that has already
|
|
|
|
|
; been processed can be completed since the sub-non-terminal won't be
|
|
|
|
|
; re-descended at the same index <s::=.d,k,k>
|
|
|
|
|
;
|
|
|
|
|
; a commencement is a pair of a nonterminal and a left extent (the arguemnts to
|
|
|
|
|
; descend, since that's what we're skipping) to a set of right extants
|
|
|
|
|
; G - binary relation between commencments and continuations, modified to include
|
|
|
|
|
; actional continuation.
|
|
|
|
|
; The normal continuation is a pair of as slot and a left extent.
|
|
|
|
|
; So <<X,k> -> <g,l>> in G, with a new are is combined to form
|
|
|
|
|
; discriptor <g,l,r> and BSR <g,l,k,r> whenever k,r are discovered for X
|
|
|
|
|
; Note we haven't finished things with the above P, since some subs of the form
|
|
|
|
|
; <s::=.d,k,k> or descriptors that follow them may not have been processed
|
|
|
|
|
; yet. When new Right extants are discovered, we must add descriptors
|
|
|
|
|
; <Y::=a's.b',l',r_j> and <X::as.b,l,rj> to R (if not in U) and add
|
|
|
|
|
; BSR elements <Y::=a's.b',l',k,r_j> and <X::=as.b,l,k,r_j> to Y
|
|
|
|
|
; Y - Our result BSR set!
|
|
|
|
|
|
|
|
|
|
; I've decided, a slot is [X [stff] int-for-dot]
|
|
|
|
|
|
|
|
|
|
id (lambda (sigma) sigma)
|
|
|
|
|
altStart (lambda (t s k c) id)
|
2021-08-04 00:56:04 -04:00
|
|
|
altOp (lambda (p q) (lambda (t s k c) (lcompose (p t s k c) (q t s [] k c))))
|
|
|
|
|
term_parser (lambda (t [X b i] l k c) (lambda (sigma)
|
2021-08-05 00:07:44 -04:00
|
|
|
(let (this_term (idx b (- i 1)))
|
|
|
|
|
(if (= this_term (slice t k (+ k (len this_term)))) ((c [[X b i] l (+ (len this_term) k)]) sigma)
|
|
|
|
|
sigma))))
|
2021-08-04 00:56:04 -04:00
|
|
|
term (lambda (s) [ s term_parser ])
|
|
|
|
|
|
|
|
|
|
continue (lambda (BSR_element c) (lambda ([U G P Y])
|
2021-08-03 00:56:07 -04:00
|
|
|
(let (
|
2021-08-04 00:56:04 -04:00
|
|
|
[slot l k r] BSR_element
|
|
|
|
|
descriptor [slot l r]
|
|
|
|
|
(X b i) slot
|
|
|
|
|
Yp (if (or (!= 0 i) (= (len rhs) i)) (set-insert Y BSR_element)
|
|
|
|
|
Y)
|
2021-08-03 00:56:07 -04:00
|
|
|
Up (set-insert U descriptor)
|
2021-08-04 00:56:04 -04:00
|
|
|
) (if (set-contains? U descriptor) [U G P Yp]
|
|
|
|
|
((c descriptor) [Up G P Yp])))))
|
|
|
|
|
seqStart (lambda (t X b l c0) (continue [[X b 0] l l l] c0))
|
|
|
|
|
seqOp (lambda (p [s q]) (lambda (t X b l c0) (let (
|
|
|
|
|
c1 (lambda ([slot l k]) (let (
|
|
|
|
|
c2 (lambda ([slot l r]) (continue [slot l k r] c0))
|
|
|
|
|
) (q t slot l k c2)))
|
|
|
|
|
) (p t X (cons s b) l c1))))
|
|
|
|
|
|
|
|
|
|
cont_for (lambda (s p) (lambda ([[s d i] k r]) (lambda ([U G P Y]) (let (
|
|
|
|
|
composed (set-foldl (lambda (cp [g l c]) (lcompose cp (c [g l r]))) id (multimap-get G [s k]))
|
|
|
|
|
) (composed [U G (multimap-insert P [s k] r) Y])))))
|
|
|
|
|
nterm_parser (lambda (p) (lambda (t gram_slot l k c) (lambda ([U G P Y])
|
2021-08-03 00:56:07 -04:00
|
|
|
(let (
|
2021-08-04 00:56:04 -04:00
|
|
|
[X b i] gram_slot
|
|
|
|
|
s (idx b (- i 1))
|
|
|
|
|
R (multimap-get P [s k])
|
|
|
|
|
sigmap [U (multimap-insert G [s k] [gram_slot l c]) P Y]
|
2021-08-03 00:56:07 -04:00
|
|
|
) (if (= 0 (size R)) ((p t s k (cont_for s p)) sigmap)
|
2021-08-04 00:56:04 -04:00
|
|
|
(set-foldl (lambda (cp r) (lcompose cp (c [gram_slot l r]))) id R)
|
|
|
|
|
)))))
|
|
|
|
|
nterm (lambda (s p) [ s (nterm_parser p) ])
|
2021-08-03 00:56:07 -04:00
|
|
|
parse (lambda ([s f]) (lambda (t)
|
|
|
|
|
(let (
|
2021-08-04 00:56:04 -04:00
|
|
|
X '__FUNGLL_UNIQUE_START_SYMBOL__
|
|
|
|
|
sigma [ set-empty multimap-empty multimap-empty set-empty ]
|
|
|
|
|
c (lambda (descriptor) (lambda (sigma) sigma))
|
|
|
|
|
[U G P Y] ((f t ['X [s] 1] 0 0 c) sigma)
|
|
|
|
|
) (set-foldl cons [] Y))))
|
2021-08-03 00:56:07 -04:00
|
|
|
)
|
2021-08-04 00:56:04 -04:00
|
|
|
(provide altStart altOp term seqStart seqOp nterm parse)
|
2021-08-03 00:56:07 -04:00
|
|
|
)))
|