opt.rkt (20484B)
1 #lang racket/base 2 (require racket/syntax 3 racket/pretty 4 syntax/parse/private/residual-ct ;; keep abs. path 5 syntax/parse/private/minimatch 6 syntax/parse/private/rep-patterns 7 syntax/parse/private/kws) 8 (provide (struct-out pk1) 9 (rename-out [optimize-matrix0 optimize-matrix])) 10 11 ;; ---- 12 13 ;; A Matrix is a (listof PK) where each PK has same number of columns 14 ;; A PK is one of 15 ;; - (pk1 (listof pattern) expr) -- a simple row in a parsing matrix 16 ;; - (pk/same pattern Matrix) -- a submatrix with a common first column factored out 17 ;; - (pk/pair Matrix) -- a submatrix with pair patterns in the first column unfolded 18 ;; - (pk/and Matrix) -- a submatrix with and patterns in the first column unfolded 19 (struct pk1 (patterns k) #:prefab) 20 (struct pk/same (pattern inner) #:prefab) 21 (struct pk/pair (inner) #:prefab) 22 (struct pk/and (inner) #:prefab) 23 24 (define (pk-columns pk) 25 (match pk 26 [(pk1 patterns k) (length patterns)] 27 [(pk/same p inner) (add1 (pk-columns inner))] 28 [(pk/pair inner) (sub1 (pk-columns inner))] 29 [(pk/and inner) (sub1 (pk-columns inner))])) 30 31 ;; Can factor pattern P given clauses like 32 ;; [ P P1 ... | e1] [ | [P1 ... | e1] ] 33 ;; [ P : | :] => [P | [ : | :] ] 34 ;; [ P PN ... | eN] [ | [PN ... | eN] ] 35 ;; if P cannot cut and P succeeds at most once (otherwise may reorder backtracking) 36 37 ;; Can unfold pair patterns as follows: 38 ;; [ (P11 . P12) P1 ... | e1 ] [ P11 P12 P1 ... | e1 ] 39 ;; [ : : | : ] => check pair, [ : | : ] 40 ;; [ (PN1 . PN2) PN ... | eN ] [ PN1 PN2 PN ... | eN ] 41 42 ;; Can unfold ~and patterns similarly; ~and patterns can hide 43 ;; factoring opportunities. 44 45 ;; ---- 46 47 ;; FIXME: New (unimplemented) optimization ideas 48 49 ;; (1) When collecting pair patterns, can reorder rows with pair vs never-pair 50 ;; first columns: 51 ;; [ (P11 . P12) P1 ... | e1 ] [ (P11 . P12) P1 ... | e1 ] 52 ;; [ P21 P2 ... | e2 ] => [ (P31 . P32) P3 ... | e3 ] 53 ;; [ (P31 . P32) P3 ... | e3 ] [ P21 P2 ... | e2 ] 54 ;; provided P21 does not cut and cannot match a pair term. 55 ;; Likewise for literals and never-symbol patterns. 56 57 ;; (2) If a row has a non-rejecting pattern (ie, always matches) in its first 58 ;; column, then the rows above it do not need to produce failure information 59 ;; *for their first columns*. For example, in the following matrix 60 ;; [ P11 P1 ... | e1 ] 61 ;; [ P21 P2 ... | e2 ] 62 ;; [ P31 P3 ... | e3 ] 63 ;; Suppose that P21 always matches (eg _) and assume P{1,3}1 are cut-free. Then 64 ;; P{1,3}1 do not need to produce failure info (set es = #f, etc). Here's why. 65 ;; If control reaches row 2, then since P21 cannot fail, if it fails the 66 ;; progress must be greater than P11 or P31. FIXME: Must also check neither P11 67 ;; nor P31 use ~post (or call stxclass that uses ~post, etc)! 68 69 70 ;; ---- 71 72 (define (optimize-matrix0 rows) 73 (define now (current-inexact-milliseconds)) 74 (when (and (> (length rows) 1)) 75 (log-syntax-parse-debug "OPT matrix (~s rows)\n~a" (length rows) 76 (pretty-format (matrix->sexpr rows) #:mode 'print))) 77 (define result (optimize-matrix rows)) 78 (define then (current-inexact-milliseconds)) 79 (when (and (> (length rows) 1)) 80 (cond [(= (length result) (length rows)) 81 (log-syntax-parse-debug "OPT FAILED (~s ms)" (floor (- then now)))] 82 [else 83 (log-syntax-parse-debug "OPT ==> (~s ms)\n~a" (floor (- then now)) 84 (pretty-format (matrix->sexpr result) #:mode 'print))])) 85 result) 86 87 ;; optimize-matrix : (listof pk1) -> Matrix 88 (define (optimize-matrix rows) 89 (cond [(null? rows) null] 90 [(null? (cdr rows)) rows] ;; no opportunities for 1 row 91 [(null? (pk1-patterns (car rows))) rows] 92 [else 93 ;; first unfold and-patterns 94 (let-values ([(col1 col2) 95 (for/lists (col1 col2) ([row (in-list rows)]) 96 (unfold-and (car (pk1-patterns row)) null))]) 97 (cond [(ormap pair? col2) 98 (list 99 (pk/and 100 (optimize-matrix* 101 (for/list ([row (in-list rows)] 102 [col1 (in-list col1)] 103 [col2 (in-list col2)]) 104 (pk1 (list* col1 105 (make-and-pattern col2) 106 (cdr (pk1-patterns row))) 107 (pk1-k row))))))] 108 [else (optimize-matrix* rows)]))])) 109 110 ;; optimize-matrix* : (listof pk1) -> Matrix 111 ;; The matrix is nonempty, and first column has no unfoldable pat:and. 112 ;; Split into submatrixes (sequences of rows) starting with similar patterns, 113 ;; handle according to similarity, then recursively optimize submatrixes. 114 (define (optimize-matrix* rows) 115 (define row1 (car rows)) 116 (define pat1 (car (pk1-patterns row1))) 117 (define k1 (pk1-k row1)) 118 ;; Now accumulate rows starting with patterns like pat1 119 (define-values (like? combine) (pattern->partitioner pat1)) 120 (let loop ([rows (cdr rows)] [rrows (list row1)]) 121 (cond [(null? rows) 122 (cons (combine (reverse rrows)) null)] 123 [else 124 (define row1 (car rows)) 125 (define pat1 (car (pk1-patterns row1))) 126 (cond [(like? pat1) 127 (loop (cdr rows) (cons row1 rrows))] 128 [else 129 (cons (combine (reverse rrows)) 130 (optimize-matrix* rows))])]))) 131 132 ;; pattern->partitioner : pattern -> (values (pattern -> boolean) ((listof pk1) -> PK)) 133 (define (pattern->partitioner pat1) 134 (match pat1 135 [(pat:pair head tail) 136 (values (lambda (p) (pat:pair? p)) 137 (lambda (rows) 138 (log-syntax-parse-debug "-- got ~s pair rows like ~e" (length rows) (pattern->sexpr pat1)) 139 (cond [(> (length rows) 1) 140 (pk/pair (optimize-matrix 141 (for/list ([row (in-list rows)]) 142 (let* ([patterns (pk1-patterns row)] 143 [pat1 (car patterns)]) 144 (pk1 (list* (pat:pair-head pat1) 145 (pat:pair-tail pat1) 146 (cdr patterns)) 147 (pk1-k row))))))] 148 [else (car rows)])))] 149 [(? pattern-factorable?) 150 (values (lambda (pat2) (pattern-equal? pat1 pat2)) 151 (lambda (rows) 152 (log-syntax-parse-debug "-- got ~s factorable like ~e" (length rows) (pattern->sexpr pat1)) 153 (cond [(> (length rows) 1) 154 (pk/same pat1 155 (optimize-matrix 156 (for/list ([row (in-list rows)]) 157 (pk1 (cdr (pk1-patterns row)) (pk1-k row)))))] 158 [else (car rows)])))] 159 [_ 160 (values (lambda (pat2) #f) 161 (lambda (rows) 162 ;; (length rows) = 1 163 (car rows)))])) 164 165 ;; unfold-and : pattern (listof pattern) -> (values pattern (listof pattern)) 166 (define (unfold-and p onto) 167 (match p 168 [(pat:and subpatterns) 169 ;; pat:and is worth unfolding if first subpattern is not pat:action 170 ;; if first subpattern is also pat:and, keep unfolding 171 (let* ([first-sub (car subpatterns)] 172 [rest-subs (cdr subpatterns)]) 173 (cond [(not (pat:action? first-sub)) 174 (unfold-and first-sub (*append rest-subs onto))] 175 [else (values p onto)]))] 176 [_ (values p onto)])) 177 178 ;; pattern-factorable? : *Pattern -> Boolean 179 (define (pattern-factorable? p) (not (pattern-unfactorable? p))) 180 181 ;; pattern-unfactorable? : *Pattern -> Boolean 182 (define (pattern-unfactorable? p) 183 ;; Cannot factor out p if 184 ;; - if p can succeed multiple times (factoring changes success order) 185 ;; - if p can cut (factoring changes which choice points are discarded (too few)) 186 ;; Note: presence of sub-expressions handled by pattern-equal?. 187 (define (for-pattern p recur) 188 (match p 189 [(pat:var/p _ _ _ _ _ (scopts _ commit? _ _)) (not commit?)] 190 [(pat:action _act _pat) #t] 191 [(pat:dots heads tail) 192 ;; Conservative approximation for common case: one head pattern 193 ;; In general, check if heads don't overlap, don't overlap with tail. 194 (or (> (length heads) 1) 195 (not (equal? tail (pat:datum '()))) 196 (recur))] 197 [(pat:or _ patterns _) #t] 198 [(pat:not pattern) #t] 199 [(pat:commit pattern) #f] 200 [(? pat:reflect?) #t] 201 [(hpat:var/p _ _ _ _ _ (scopts _ commit? _ _)) (not commit?)] 202 [(hpat:commit inner) #f] 203 [(ehpat _ head repc _) 204 (or (not (equal? repc #f)) 205 (recur))] 206 [_ (recur)])) 207 (pattern-ormap p for-pattern)) 208 209 (define (subpatterns-equal? as bs) 210 (and (= (length as) (length bs)) 211 (for/and ([a (in-list as)] 212 [b (in-list bs)]) 213 (pattern-equal? a b)))) 214 215 (define (pattern-equal? a b) 216 (define result 217 (cond [(and (pat:any? a) (pat:any? b)) #t] 218 [(and (pat:svar? a) (pat:svar? b)) 219 (bound-identifier=? (pat:svar-name a) (pat:svar-name b))] 220 [(and (pat:var/p? a) (pat:var/p? b)) 221 (and (free-id/f-equal? (pat:var/p-parser a) (pat:var/p-parser b)) 222 (bound-id/f-equal? (pat:var/p-name a) (pat:var/p-name b)) 223 (equal-iattrs? (pat:var/p-nested-attrs a) (pat:var/p-nested-attrs b)) 224 (equal-argu? (pat:var/p-argu a) (pat:var/p-argu b)) 225 (expr-equal? (pat:var/p-role a) (pat:var/p-role b)))] 226 [(and (pat:integrated? a) (pat:integrated? b)) 227 (and (bound-id/f-equal? (pat:integrated-name a) (pat:integrated-name b)) 228 (free-identifier=? (pat:integrated-predicate a) 229 (pat:integrated-predicate b)) 230 (expr-equal? (pat:integrated-role a) (pat:integrated-role b)))] 231 [(and (pat:literal? a) (pat:literal? b)) 232 ;; literals are hard to compare, so compare gensyms attached to 233 ;; literal ids (see rep.rkt) instead 234 (let ([ka (syntax-property (pat:literal-id a) 'literal)] 235 [kb (syntax-property (pat:literal-id b) 'literal)]) 236 (and ka kb (eq? ka kb)))] 237 [(and (pat:datum? a) (pat:datum? b)) 238 (equal? (pat:datum-datum a) 239 (pat:datum-datum b))] 240 [(and (pat:head? a) (pat:head? b)) 241 (and (pattern-equal? (pat:head-head a) (pat:head-head b)) 242 (pattern-equal? (pat:head-tail a) (pat:head-tail b)))] 243 [(and (pat:dots? a) (pat:dots? b)) 244 (and (subpatterns-equal? (pat:dots-heads a) (pat:dots-heads b)) 245 (pattern-equal? (pat:dots-tail a) (pat:dots-tail b)))] 246 [(and (pat:and? a) (pat:and? b)) 247 (subpatterns-equal? (pat:and-patterns a) (pat:and-patterns b))] 248 [(and (pat:or? a) (pat:or? b)) 249 (subpatterns-equal? (pat:or-patterns a) (pat:or-patterns b))] 250 [(and (pat:not? a) (pat:not? b)) 251 (pattern-equal? (pat:not-pattern a) (pat:not-pattern b))] 252 [(and (pat:pair? a) (pat:pair? b)) 253 (and (pattern-equal? (pat:pair-head a) (pat:pair-head b)) 254 (pattern-equal? (pat:pair-tail a) (pat:pair-tail b)))] 255 [(and (pat:vector? a) (pat:vector? b)) 256 (pattern-equal? (pat:vector-pattern a) (pat:vector-pattern b))] 257 [(and (pat:box? a) (pat:box? b)) 258 (pattern-equal? (pat:box-pattern a) (pat:box-pattern b))] 259 [(and (pat:pstruct? a) (pat:pstruct? b)) 260 (and (equal? (pat:pstruct-key a) 261 (pat:pstruct-key b)) 262 (pattern-equal? (pat:pstruct-pattern a) 263 (pat:pstruct-pattern b)))] 264 [(and (pat:describe? a) (pat:describe? b)) #f] ;; can't compare desc exprs 265 [(and (pat:delimit? a) (pat:delimit? b)) 266 (pattern-equal? (pat:delimit-pattern a) (pat:delimit-pattern b))] 267 [(and (pat:commit? a) (pat:commit? b)) 268 (pattern-equal? (pat:commit-pattern a) (pat:commit-pattern b))] 269 [(and (pat:reflect? a) (pat:reflect? b)) #f] ;; FIXME: ? 270 [(and (pat:ord? a) (pat:ord? b)) 271 (and (pattern-equal? (pat:ord-pattern a) (pat:ord-pattern b)) 272 (equal? (pat:ord-group a) (pat:ord-group b)) 273 (equal? (pat:ord-index a) (pat:ord-index b)))] 274 [(and (pat:post? a) (pat:post? b)) 275 (pattern-equal? (pat:post-pattern a) (pat:post-pattern b))] 276 [(and (pat:seq-end? a) (pat:seq-end? b)) #t] 277 ;; --- 278 [(and (hpat:single? a) (hpat:single? b)) 279 (pattern-equal? (hpat:single-pattern a) (hpat:single-pattern b))] 280 [(and (hpat:var/p? a) (hpat:var/p? b)) 281 (and (free-id/f-equal? (hpat:var/p-parser a) (hpat:var/p-parser b)) 282 (bound-id/f-equal? (hpat:var/p-name a) (hpat:var/p-name b)) 283 (equal-iattrs? (hpat:var/p-nested-attrs a) (hpat:var/p-nested-attrs b)) 284 (equal-argu? (hpat:var/p-argu a) (hpat:var/p-argu b)) 285 (expr-equal? (hpat:var/p-role a) (hpat:var/p-role b)))] 286 [(and (hpat:seq? a) (hpat:seq? b)) 287 (pattern-equal? (hpat:seq-inner a) (hpat:seq-inner b))] 288 ;; --- 289 [(and (ehpat? a) (ehpat? b)) 290 (and (equal? (ehpat-repc a) #f) 291 (equal? (ehpat-repc b) #f) 292 (pattern-equal? (ehpat-head a) (ehpat-head b)))] 293 ;; FIXME: more? 294 [else #f])) 295 (when (and (log-level? syntax-parse-logger 'debug) 296 (eq? result #f) 297 (equal? (syntax->datum #`#,a) (syntax->datum #`#,b))) 298 (log-syntax-parse-debug "** pattern-equal? failed on ~e" a)) 299 result) 300 301 (define (equal-iattrs? as bs) 302 (and (= (length as) (length bs)) 303 ;; assumes attrs in same order 304 (for/and ([aa (in-list as)] 305 [ba (in-list bs)]) 306 (and (bound-identifier=? (attr-name aa) (attr-name ba)) 307 (equal? (attr-depth aa) (attr-depth ba)) 308 (equal? (attr-syntax? aa) (attr-syntax? ba)))))) 309 310 (define (expr-equal? a b) 311 ;; Expression equality is undecidable in general. Especially difficult for unexpanded 312 ;; code, but it would be very difficult to set up correct env for local-expand because of 313 ;; attr binding rules. So, do *very* conservative approx: simple variables and literals. 314 ;; FIXME: any other common cases? 315 (cond [(not (and (syntax? a) (syntax? b))) 316 (equal? a b)] 317 [(and (identifier? a) (identifier? b)) 318 ;; note: "vars" might be identifier macros (unsafe to consider equal), 319 ;; so check var has no compile-time binding 320 (and (free-identifier=? a b) 321 (let/ec k (syntax-local-value a (lambda () (k #t))) #f))] 322 [(syntax-case (list a b) (quote) 323 [((quote ad) (quote bd)) 324 (cons (syntax->datum #'ad) (syntax->datum #'bd))] 325 [_ #f]) 326 => (lambda (ad+bd) 327 (equal? (car ad+bd) (cdr ad+bd)))] 328 [else 329 ;; approx: equal? only if both simple data (bool, string, etc), no inner stx 330 (let ([ad (syntax-e a)] 331 [bd (syntax-e b)]) 332 (and (equal? ad bd) 333 (free-identifier=? (datum->syntax a '#%datum) #'#%datum) 334 (free-identifier=? (datum->syntax b '#%datum) #'#%datum)))])) 335 336 (define (equal-argu? a b) 337 (define (unwrap-arguments x) 338 (match x 339 [(arguments pargs kws kwargs) 340 (values pargs kws kwargs)])) 341 (define (list-equal? as bs inner-equal?) 342 (and (= (length as) (length bs)) 343 (andmap inner-equal? as bs))) 344 (let-values ([(apargs akws akwargs) (unwrap-arguments a)] 345 [(bpargs bkws bkwargs) (unwrap-arguments b)]) 346 (and (list-equal? apargs bpargs expr-equal?) 347 (equal? akws bkws) 348 (list-equal? akwargs bkwargs expr-equal?)))) 349 350 (define (free-id/f-equal? a b) 351 (or (and (eq? a #f) 352 (eq? b #f)) 353 (and (identifier? a) 354 (identifier? b) 355 (free-identifier=? a b)))) 356 357 (define (bound-id/f-equal? a b) 358 (or (and (eq? a #f) 359 (eq? b #f)) 360 (and (identifier? a) 361 (identifier? b) 362 (bound-identifier=? a b)))) 363 364 (define (make-and-pattern subs) 365 (cond [(null? subs) (pat:any)] ;; shouldn't happen 366 [(null? (cdr subs)) (car subs)] 367 [else (pat:and subs)])) 368 369 (define (*append a b) (if (null? b) a (append a b))) 370 371 (define (stx-e x) (if (syntax? x) (syntax-e x) x)) 372 373 ;; ---- 374 375 (define (matrix->sexpr rows) 376 (cond [(null? rows) ;; shouldn't happen 377 '(FAIL)] 378 [(null? (cdr rows)) 379 (pk->sexpr (car rows))] 380 [else 381 (cons 'TRY (map pk->sexpr rows))])) 382 (define (pk->sexpr pk) 383 (match pk 384 [(pk1 pats k) 385 (cons 'MATCH (map pattern->sexpr pats))] 386 [(pk/same pat inner) 387 (list 'SAME (pattern->sexpr pat) (matrix->sexpr inner))] 388 [(pk/pair inner) 389 (list 'PAIR (matrix->sexpr inner))] 390 [(pk/and inner) 391 (list 'AND (matrix->sexpr inner))])) 392 (define (pattern->sexpr p) 393 (match p 394 [(pat:any) '_] 395 [(pat:integrated name pred desc _) 396 (format-symbol "~a:~a" (or name '_) desc)] 397 [(pat:svar name) 398 (syntax-e name)] 399 [(pat:var/p name parser _ _ _ _) 400 (cond [(and parser (regexp-match #rx"^parse-(.*)$" (symbol->string (syntax-e parser)))) 401 => (lambda (m) 402 (format-symbol "~a:~a" (or name '_) (cadr m)))] 403 [else 404 (if name (syntax-e name) '_)])] 405 [(? pat:literal?) `(syntax ,(syntax->datum (pat:literal-id p)))] 406 [(pat:datum datum) 407 (cond [(or (symbol? datum) (pair? datum)) 408 `(quote ,datum)] 409 [else datum])] 410 [(pat:action action (pat:any)) (pattern->sexpr action)] 411 [(pat:action action inner) (list '~AAND (pattern->sexpr action) (pattern->sexpr inner))] 412 [(pat:and patterns) (cons '~and (map pattern->sexpr patterns))] 413 [(pat:or _ patterns _) (cons '~or (map pattern->sexpr patterns))] 414 [(pat:not pattern) (list '~not (pattern->sexpr pattern))] 415 [(pat:pair head tail) 416 (cons (pattern->sexpr head) (pattern->sexpr tail))] 417 [(pat:head head tail) 418 (cons (pattern->sexpr head) (pattern->sexpr tail))] 419 [(pat:dots (list eh) tail) 420 (list* (pattern->sexpr eh) '... (pattern->sexpr tail))] 421 [(pat:dots ehs tail) 422 (list* (cons '~alt (map pattern->sexpr ehs)) '... (pattern->sexpr tail))] 423 [(pat:describe sp _ _ _) (list '~describe (pattern->sexpr sp))] 424 [(pat:delimit sp) (list '~delimit-cut (pattern->sexpr sp))] 425 [(pat:commit sp) (list '~commit (pattern->sexpr sp))] 426 [(pat:ord pattern _ _) (list '~ord (pattern->sexpr pattern))] 427 [(pat:post sp) (list '~post (pattern->sexpr sp))] 428 [(pat:seq-end) '()] 429 [(action:cut) '~!] 430 [(action:fail cnd msg) (list '~fail)] 431 [(action:bind attr expr) (list '~bind)] 432 [(action:and as) (cons '~and (map pattern->sexpr as))] 433 [(action:parse sp expr) (list '~parse (pattern->sexpr sp))] 434 [(action:do stmts) (list '~do)] 435 [(action:undo stmts) (list '~undo)] 436 [(action:ord ap _ _) (list '~ord (pattern->sexpr ap))] 437 [(action:post ap) (list '~post (pattern->sexpr ap))] 438 [(hpat:single sp) (pattern->sexpr sp)] 439 [(hpat:var/p name parser _ _ _ _) 440 (cond [(and parser (regexp-match #rx"^parser-(.*)$" (symbol->string (syntax-e parser)))) 441 => (lambda (m) (format-symbol "~a:~a" (or name '_) (cadr m)))] 442 [else (if name (syntax-e name) '_)])] 443 [(hpat:seq lp) (cons '~seq (pattern->sexpr lp))] 444 [(hpat:action ap hp) (list '~AAND (pattern->sexpr ap) (pattern->sexpr hp))] 445 [(hpat:and hp sp) (list '~and (pattern->sexpr hp) (pattern->sexpr sp))] 446 [(hpat:or _ hps _) (cons '~or (map pattern->sexpr hps))] 447 [(hpat:describe hp _ _ _) (list '~describe (pattern->sexpr hp))] 448 [(hpat:delimit hp) (list '~delimit-cut (pattern->sexpr hp))] 449 [(hpat:commit hp) (list '~commit (pattern->sexpr hp))] 450 [(hpat:ord hp _ _) (list '~ord (pattern->sexpr hp))] 451 [(hpat:post hp) (list '~post (pattern->sexpr hp))] 452 [(hpat:peek hp) (list '~peek (pattern->sexpr hp))] 453 [(hpat:peek-not hp) (list '~peek-not (pattern->sexpr hp))] 454 [(ehpat _as hpat repc _cn) 455 (if (eq? repc #f) (pattern->sexpr hpat) (list '~REPC (pattern->sexpr hpat)))] 456 [_ '<Pattern>]))