-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathparseclj-parser.el
285 lines (233 loc) · 12 KB
/
parseclj-parser.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
;;; parseclj-parser.el --- Clojure/EDN parser -*- lexical-binding: t; -*-
;; Copyright (C) 2017-2021 Arne Brasseur
;; Author: Arne Brasseur <arne@arnebrasseur.net>
;; This file is not part of GNU Emacs.
;; This file is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;; This file is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; A shift/reduce parser for Clojure source.
;;; Code:
(require 'cl-lib)
(require 'subr-x)
(require 'parseclj-lex)
(require 'parseclj-alist)
(require 'map)
(define-error 'parseclj-parser-error "parseclj: Syntax error")
(defun parseclj--error (format &rest args)
"Signal a parse error.
Takes a FORMAT string and optional ARGS to be passed to
`format-message'. Signals a 'parseclj-parser-error signal, which
can be handled with `condition-case'."
(signal 'parseclj-parser-error (list (apply #'format-message format args))))
(defun parseclj--find-opening-token (stack closing-token)
"Scan STACK for an opening-token matching CLOSING-TOKEN."
(let ((token-type (parseclj-lex-token-type closing-token)))
(cond
((eq :rparen token-type) (parseclj-lex-token-type
(seq-find (lambda (token)
(member (parseclj-lex-token-type token)
'(:lparen :lambda)))
stack)))
((eq :rbracket token-type) :lbracket)
((eq :rbrace token-type) (parseclj-lex-token-type
(seq-find (lambda (token)
(member (parseclj-lex-token-type token)
'(:lbrace :set)))
stack))))))
(defun parseclj--reduce-coll (stack closing-token reduce-branch options)
"Reduce collection based on the top of the STACK and a CLOSING-TOKEN.
REDUCE-BRANCH is a function to be applied to the collection of tokens found
from the top of the stack until an opening token that matches
CLOSING-TOKEN. This function should return an AST token representing such
collection.
OPTIONS is an association list. This list is also passed down to the
REDUCE-BRANCH function. See `parseclj-parser' for more information on
available options."
(let ((opening-token-type (parseclj--find-opening-token stack closing-token))
(fail-fast (map-elt options :fail-fast t))
(collection nil))
(if (not opening-token-type)
(if fail-fast
(parseclj--error "At position %s, unmatched %S"
(map-elt closing-token :pos)
(parseclj-lex-token-type closing-token))
stack)
(progn
;; unwind the stack until opening-token-type is found, adding to collection
(while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type)))
(push (pop stack) collection))
;; did we find the right token?
(if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
(progn
(when fail-fast
;; any unreduced tokens left: bail early
(when-let ((token (seq-find #'parseclj-lex-token-p collection)))
(parseclj--error "At position %s, unmatched %S"
(map-elt token :pos)
(parseclj-lex-token-type token))))
;; all good, call the reducer so it can return an updated stack with a
;; new node at the top.
(let ((opening-token (pop stack)))
(funcall reduce-branch stack opening-token collection options)))
;; Unwound the stack without finding a matching paren: either bail early
;; or return the original stack and continue parsing
(if fail-fast
(parseclj--error "At position %s, unmatched %S"
(map-elt closing-token :pos)
(parseclj-lex-token-type closing-token))
(reverse collection)))))))
(defun parseclj--take-value (stack value-p)
"Scan STACK until a value is found.
Return everything up to the value in reversed order (meaning the value
comes first in the result).
STACK is the current parse stack to scan.
VALUE-P a predicate to distinguish reduced values from non-values (tokens
and whitespace)."
(let ((result nil))
(cl-block nil
(while stack
(cond
((parseclj-lex-token-p (car stack))
(cl-return nil))
((funcall value-p (car stack))
(cl-return (cons (car stack) result)))
(t
(push (pop stack) result)))))))
(defun parseclj--take-token (stack value-p token-types)
"Scan STACK until a token of a certain type is found.
Returns nil if a value is encountered before a matching token is found.
Return everything up to the token in reversed order (meaning the token
comes first in the result).
STACK is the current parse stack to scan.
VALUE-P a predicate to distinguish reduced values from non-values (tokens
and whitespace).
TOKEN-TYPES are the token types to look for."
(let ((result nil))
(cl-block nil
(while stack
(cond
((member (parseclj-lex-token-type (car stack)) token-types)
(cl-return (cons (car stack) result)))
((funcall value-p (car stack))
(cl-return nil))
((parseclj-lex-token-p (car stack))
(cl-return nil))
(t
(push (pop stack) result)))))))
(defun parseclj-single-value-p (stack value-p)
"Return t if STACK only has a single node for which VALUE-P is true.
This checks if the stack contains a single, fully reduced value, and no
dangling unmatched tokens. When parsing with `:read-one' this indicates a
form can be returned."
(and (not (cl-reduce (lambda (bool node)
(or bool (parseclj-lex-token-p node)))
stack
:initial-value nil))
(parseclj--take-value stack value-p)))
(defun parseclj-parser (reduce-leaf reduce-branch &optional options)
"Clojure/EDN stack-based shift-reduce parser.
REDUCE-LEAF does reductions for leaf nodes. It is a function that takes
the current value of the stack and a token, and either returns an updated
stack, with a new leaf node at the top (front), or returns the stack
unmodified.
REDUCE-BRANCH does reductions for branch nodes. It is a function that
takes the current value of the stack, the type of branch node to create,
and a list of child nodes, and returns an updated stack, with the new node
at the top (front).
What \"node\" means in this case is up to the reducing functions, it could
be AST nodes (as in the case of `parseclj-parser-clojure'), or plain
values/sexps (as in the case of `parseedn-read'), or something else. The
only requirement is that they should not put raw tokens back on the stack,
as the parser relies on the presence or absence of these to detect parse
errors.
OPTIONS is an association list which is passed on to the reducing
functions. Additionally the following options are recognized
- `:fail-fast'
Raise an error when a parse error is encountered, rather than continuing
with a partial result.
- `:value-p'
A predicate function to differentiate values from tokens and
whitespace. This is needed when scanning the stack to see if any
reductions can be performed. By default anything that isn't a token is
considered a value. This can be problematic when parsing with
`:lexical-preservation', and which case you should provide an
implementation that also returns falsy for :whitespace, :comment, and
:discard AST nodes.
- `:tag-readers'
An association list that describes tag handler functions for any possible
tag. This options in only available in `parseedn-read', for more
information, please refer to its documentation.
- `:read-one'
Return as soon as a single complete value has been read."
(let ((fail-fast (map-elt options :fail-fast t))
(read-one (map-elt options :read-one))
(value-p (map-elt options :value-p (lambda (e) (not (parseclj-lex-token-p e)))))
(stack nil)
(token (parseclj-lex-next)))
(while (not (or (and read-one (parseclj-single-value-p stack value-p))
(eq (parseclj-lex-token-type token) :eof)))
;; (message "STACK: %S" stack)
;; (message "TOKEN: %S\n" token)
(when (and fail-fast (parseclj-lex-error-p token))
(parseclj--error "Invalid token at %s: %S"
(map-elt token :pos)
(parseclj-lex-token-form token)))
;; Reduce based on the top item on the stack (collections)
(cond
((parseclj-lex-leaf-token-p token)
(setf stack (funcall reduce-leaf stack token options)))
((parseclj-lex-closing-token-p token)
(setf stack (parseclj--reduce-coll stack token reduce-branch options)))
(t (push token stack)))
;; Reduce based on top two items on the stack (special prefixed elements)
(let* ((top-value (parseclj--take-value stack value-p))
(opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
new-stack)
(while (and top-value opening-token)
;; (message "Reducing...")
;; (message " - STACK %S" stack)
;; (message " - OPENING-TOKEN %S" opening-token)
;; (message " - TOP-VALUE %S" top-value)
(setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))
;; recur
(setq top-value (parseclj--take-value stack value-p))
(setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))
;; Reduce based on top three items on the stack (metadata, namespaced maps)
(let* ((top-value-1 (parseclj--take-value stack value-p))
(top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
(opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
new-stack)
(while (and top-value-1 top-value-2 opening-token)
(setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))
;; recur
(setq top-value-1 (parseclj--take-value stack value-p))
(setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
(setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
(length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
(setq token (parseclj-lex-next)))
;; reduce root
(when fail-fast
(when-let ((token (seq-find #'parseclj-lex-token-p stack)))
(parseclj--error "At position %s, unmatched %S"
(map-elt token :pos)
(parseclj-lex-token-type token))))
(if read-one
(car (parseclj--take-value stack value-p))
(car (funcall reduce-branch nil (parseclj-lex-token :root "" 1)
(reverse stack)
options)))))
(provide 'parseclj-parser)
;;; parseclj-parser.el ends here