sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.APPLY, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 CAST_COLUMN_OPERATORS = { 796 TokenType.DOTCOLON, 797 TokenType.DCOLON, 798 } 799 800 EXPRESSION_PARSERS = { 801 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 802 exp.Column: lambda self: self._parse_column(), 803 exp.Condition: lambda self: self._parse_assignment(), 804 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 805 exp.Expression: lambda self: self._parse_expression(), 806 exp.From: lambda self: self._parse_from(joins=True), 807 exp.Group: lambda self: self._parse_group(), 808 exp.Having: lambda self: self._parse_having(), 809 exp.Hint: lambda self: self._parse_hint_body(), 810 exp.Identifier: lambda self: self._parse_id_var(), 811 exp.Join: lambda self: self._parse_join(), 812 exp.Lambda: lambda self: self._parse_lambda(), 813 exp.Lateral: lambda self: self._parse_lateral(), 814 exp.Limit: lambda self: self._parse_limit(), 815 exp.Offset: lambda self: self._parse_offset(), 816 exp.Order: lambda self: self._parse_order(), 817 exp.Ordered: lambda self: self._parse_ordered(), 818 exp.Properties: lambda self: self._parse_properties(), 819 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 820 exp.Qualify: lambda self: self._parse_qualify(), 821 exp.Returning: lambda self: self._parse_returning(), 822 exp.Select: lambda self: self._parse_select(), 823 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 824 exp.Table: lambda self: self._parse_table_parts(), 825 exp.TableAlias: lambda self: self._parse_table_alias(), 826 exp.Tuple: lambda self: self._parse_value(values=False), 827 exp.Whens: lambda self: self._parse_when_matched(), 828 exp.Where: lambda self: self._parse_where(), 829 exp.Window: lambda self: self._parse_named_window(), 830 exp.With: lambda self: self._parse_with(), 831 "JOIN_TYPE": lambda self: self._parse_join_parts(), 832 } 833 834 STATEMENT_PARSERS = { 835 TokenType.ALTER: lambda self: self._parse_alter(), 836 TokenType.ANALYZE: lambda self: self._parse_analyze(), 837 TokenType.BEGIN: lambda self: self._parse_transaction(), 838 TokenType.CACHE: lambda self: self._parse_cache(), 839 TokenType.COMMENT: lambda self: self._parse_comment(), 840 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 841 TokenType.COPY: lambda self: self._parse_copy(), 842 TokenType.CREATE: lambda self: self._parse_create(), 843 TokenType.DELETE: lambda self: self._parse_delete(), 844 TokenType.DESC: lambda self: self._parse_describe(), 845 TokenType.DESCRIBE: lambda self: self._parse_describe(), 846 TokenType.DROP: lambda self: self._parse_drop(), 847 TokenType.GRANT: lambda self: self._parse_grant(), 848 TokenType.INSERT: lambda self: self._parse_insert(), 849 TokenType.KILL: lambda self: self._parse_kill(), 850 TokenType.LOAD: lambda self: self._parse_load(), 851 TokenType.MERGE: lambda self: self._parse_merge(), 852 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 853 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 854 TokenType.REFRESH: lambda self: self._parse_refresh(), 855 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 856 TokenType.SET: lambda self: self._parse_set(), 857 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 858 TokenType.UNCACHE: lambda self: self._parse_uncache(), 859 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 860 TokenType.UPDATE: lambda self: self._parse_update(), 861 TokenType.USE: lambda self: self._parse_use(), 862 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 863 } 864 865 UNARY_PARSERS = { 866 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 867 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 868 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 869 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 870 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 871 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 872 } 873 874 STRING_PARSERS = { 875 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 876 exp.RawString, this=token.text 877 ), 878 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 879 exp.National, this=token.text 880 ), 881 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 882 TokenType.STRING: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=True 884 ), 885 TokenType.UNICODE_STRING: lambda self, token: self.expression( 886 exp.UnicodeString, 887 this=token.text, 888 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 889 ), 890 } 891 892 NUMERIC_PARSERS = { 893 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 894 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 895 TokenType.HEX_STRING: lambda self, token: self.expression( 896 exp.HexString, 897 this=token.text, 898 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 899 ), 900 TokenType.NUMBER: lambda self, token: self.expression( 901 exp.Literal, this=token.text, is_string=False 902 ), 903 } 904 905 PRIMARY_PARSERS = { 906 **STRING_PARSERS, 907 **NUMERIC_PARSERS, 908 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 909 TokenType.NULL: lambda self, _: self.expression(exp.Null), 910 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 911 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 912 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 913 TokenType.STAR: lambda self, _: self._parse_star_ops(), 914 } 915 916 PLACEHOLDER_PARSERS = { 917 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 918 TokenType.PARAMETER: lambda self: self._parse_parameter(), 919 TokenType.COLON: lambda self: ( 920 self.expression(exp.Placeholder, this=self._prev.text) 921 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 922 else None 923 ), 924 } 925 926 RANGE_PARSERS = { 927 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 928 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 929 TokenType.GLOB: binary_range_parser(exp.Glob), 930 TokenType.ILIKE: binary_range_parser(exp.ILike), 931 TokenType.IN: lambda self, this: self._parse_in(this), 932 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 933 TokenType.IS: lambda self, this: self._parse_is(this), 934 TokenType.LIKE: binary_range_parser(exp.Like), 935 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 936 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 937 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 938 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 939 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 940 } 941 942 PIPE_SYNTAX_TRANSFORM_PARSERS = { 943 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 944 "AS": lambda self, query: self._build_pipe_cte( 945 query, [exp.Star()], self._parse_table_alias() 946 ), 947 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 948 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 949 "ORDER BY": lambda self, query: query.order_by( 950 self._parse_order(), append=False, copy=False 951 ), 952 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 953 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 954 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 955 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 956 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 957 } 958 959 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 960 "ALLOWED_VALUES": lambda self: self.expression( 961 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 962 ), 963 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 964 "AUTO": lambda self: self._parse_auto_property(), 965 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 966 "BACKUP": lambda self: self.expression( 967 exp.BackupProperty, this=self._parse_var(any_token=True) 968 ), 969 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 970 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 971 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHECKSUM": lambda self: self._parse_checksum(), 973 "CLUSTER BY": lambda self: self._parse_cluster(), 974 "CLUSTERED": lambda self: self._parse_clustered_by(), 975 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 976 exp.CollateProperty, **kwargs 977 ), 978 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 979 "CONTAINS": lambda self: self._parse_contains_property(), 980 "COPY": lambda self: self._parse_copy_property(), 981 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 982 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 983 "DEFINER": lambda self: self._parse_definer(), 984 "DETERMINISTIC": lambda self: self.expression( 985 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 986 ), 987 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 988 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 989 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 990 "DISTKEY": lambda self: self._parse_distkey(), 991 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 992 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 993 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 994 "ENVIRONMENT": lambda self: self.expression( 995 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 996 ), 997 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 998 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 999 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1000 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1001 "FREESPACE": lambda self: self._parse_freespace(), 1002 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1003 "HEAP": lambda self: self.expression(exp.HeapProperty), 1004 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1005 "IMMUTABLE": lambda self: self.expression( 1006 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1007 ), 1008 "INHERITS": lambda self: self.expression( 1009 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1010 ), 1011 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1012 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1013 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1014 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1015 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1016 "LIKE": lambda self: self._parse_create_like(), 1017 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1018 "LOCK": lambda self: self._parse_locking(), 1019 "LOCKING": lambda self: self._parse_locking(), 1020 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1021 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1022 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1023 "MODIFIES": lambda self: self._parse_modifies_property(), 1024 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1025 "NO": lambda self: self._parse_no_property(), 1026 "ON": lambda self: self._parse_on_property(), 1027 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1028 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1029 "PARTITION": lambda self: self._parse_partitioned_of(), 1030 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1031 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1033 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1034 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1035 "READS": lambda self: self._parse_reads_property(), 1036 "REMOTE": lambda self: self._parse_remote_with_connection(), 1037 "RETURNS": lambda self: self._parse_returns(), 1038 "STRICT": lambda self: self.expression(exp.StrictProperty), 1039 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1040 "ROW": lambda self: self._parse_row(), 1041 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1042 "SAMPLE": lambda self: self.expression( 1043 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1044 ), 1045 "SECURE": lambda self: self.expression(exp.SecureProperty), 1046 "SECURITY": lambda self: self._parse_security(), 1047 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1048 "SETTINGS": lambda self: self._parse_settings_property(), 1049 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1050 "SORTKEY": lambda self: self._parse_sortkey(), 1051 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1052 "STABLE": lambda self: self.expression( 1053 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1054 ), 1055 "STORED": lambda self: self._parse_stored(), 1056 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1057 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1058 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1059 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1060 "TO": lambda self: self._parse_to_table(), 1061 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1062 "TRANSFORM": lambda self: self.expression( 1063 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1064 ), 1065 "TTL": lambda self: self._parse_ttl(), 1066 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1067 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1068 "VOLATILE": lambda self: self._parse_volatile_property(), 1069 "WITH": lambda self: self._parse_with_property(), 1070 } 1071 1072 CONSTRAINT_PARSERS = { 1073 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1074 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1075 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1076 "CHARACTER SET": lambda self: self.expression( 1077 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "CHECK": lambda self: self.expression( 1080 exp.CheckColumnConstraint, 1081 this=self._parse_wrapped(self._parse_assignment), 1082 enforced=self._match_text_seq("ENFORCED"), 1083 ), 1084 "COLLATE": lambda self: self.expression( 1085 exp.CollateColumnConstraint, 1086 this=self._parse_identifier() or self._parse_column(), 1087 ), 1088 "COMMENT": lambda self: self.expression( 1089 exp.CommentColumnConstraint, this=self._parse_string() 1090 ), 1091 "COMPRESS": lambda self: self._parse_compress(), 1092 "CLUSTERED": lambda self: self.expression( 1093 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1094 ), 1095 "NONCLUSTERED": lambda self: self.expression( 1096 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1097 ), 1098 "DEFAULT": lambda self: self.expression( 1099 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1102 "EPHEMERAL": lambda self: self.expression( 1103 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "EXCLUDE": lambda self: self.expression( 1106 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1107 ), 1108 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1109 "FORMAT": lambda self: self.expression( 1110 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1111 ), 1112 "GENERATED": lambda self: self._parse_generated_as_identity(), 1113 "IDENTITY": lambda self: self._parse_auto_increment(), 1114 "INLINE": lambda self: self._parse_inline(), 1115 "LIKE": lambda self: self._parse_create_like(), 1116 "NOT": lambda self: self._parse_not_constraint(), 1117 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1118 "ON": lambda self: ( 1119 self._match(TokenType.UPDATE) 1120 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1121 ) 1122 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1123 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1124 "PERIOD": lambda self: self._parse_period_for_system_time(), 1125 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1126 "REFERENCES": lambda self: self._parse_references(match=False), 1127 "TITLE": lambda self: self.expression( 1128 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1129 ), 1130 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1131 "UNIQUE": lambda self: self._parse_unique(), 1132 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1133 "WATERMARK": lambda self: self.expression( 1134 exp.WatermarkColumnConstraint, 1135 this=self._match(TokenType.FOR) and self._parse_column(), 1136 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1137 ), 1138 "WITH": lambda self: self.expression( 1139 exp.Properties, expressions=self._parse_wrapped_properties() 1140 ), 1141 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1142 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 } 1144 1145 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1146 if not self._match(TokenType.L_PAREN, advance=False): 1147 # Partitioning by bucket or truncate follows the syntax: 1148 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1149 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1150 self._retreat(self._index - 1) 1151 return None 1152 1153 klass = ( 1154 exp.PartitionedByBucket 1155 if self._prev.text.upper() == "BUCKET" 1156 else exp.PartitionByTruncate 1157 ) 1158 1159 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1160 this, expression = seq_get(args, 0), seq_get(args, 1) 1161 1162 if isinstance(this, exp.Literal): 1163 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1164 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1165 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1166 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1167 # 1168 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1169 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1170 this, expression = expression, this 1171 1172 return self.expression(klass, this=this, expression=expression) 1173 1174 ALTER_PARSERS = { 1175 "ADD": lambda self: self._parse_alter_table_add(), 1176 "AS": lambda self: self._parse_select(), 1177 "ALTER": lambda self: self._parse_alter_table_alter(), 1178 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1179 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1180 "DROP": lambda self: self._parse_alter_table_drop(), 1181 "RENAME": lambda self: self._parse_alter_table_rename(), 1182 "SET": lambda self: self._parse_alter_table_set(), 1183 "SWAP": lambda self: self.expression( 1184 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1185 ), 1186 } 1187 1188 ALTER_ALTER_PARSERS = { 1189 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1190 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1191 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1192 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1193 } 1194 1195 SCHEMA_UNNAMED_CONSTRAINTS = { 1196 "CHECK", 1197 "EXCLUDE", 1198 "FOREIGN KEY", 1199 "LIKE", 1200 "PERIOD", 1201 "PRIMARY KEY", 1202 "UNIQUE", 1203 "WATERMARK", 1204 "BUCKET", 1205 "TRUNCATE", 1206 } 1207 1208 NO_PAREN_FUNCTION_PARSERS = { 1209 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1210 "CASE": lambda self: self._parse_case(), 1211 "CONNECT_BY_ROOT": lambda self: self.expression( 1212 exp.ConnectByRoot, this=self._parse_column() 1213 ), 1214 "IF": lambda self: self._parse_if(), 1215 } 1216 1217 INVALID_FUNC_NAME_TOKENS = { 1218 TokenType.IDENTIFIER, 1219 TokenType.STRING, 1220 } 1221 1222 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1223 1224 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1225 1226 FUNCTION_PARSERS = { 1227 **{ 1228 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1229 }, 1230 **{ 1231 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1232 }, 1233 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1234 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1235 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1236 "DECODE": lambda self: self._parse_decode(), 1237 "EXTRACT": lambda self: self._parse_extract(), 1238 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1239 "GAP_FILL": lambda self: self._parse_gap_fill(), 1240 "JSON_OBJECT": lambda self: self._parse_json_object(), 1241 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1242 "JSON_TABLE": lambda self: self._parse_json_table(), 1243 "MATCH": lambda self: self._parse_match_against(), 1244 "NORMALIZE": lambda self: self._parse_normalize(), 1245 "OPENJSON": lambda self: self._parse_open_json(), 1246 "OVERLAY": lambda self: self._parse_overlay(), 1247 "POSITION": lambda self: self._parse_position(), 1248 "PREDICT": lambda self: self._parse_predict(), 1249 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1250 "STRING_AGG": lambda self: self._parse_string_agg(), 1251 "SUBSTRING": lambda self: self._parse_substring(), 1252 "TRIM": lambda self: self._parse_trim(), 1253 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1255 "XMLELEMENT": lambda self: self.expression( 1256 exp.XMLElement, 1257 this=self._match_text_seq("NAME") and self._parse_id_var(), 1258 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1259 ), 1260 "XMLTABLE": lambda self: self._parse_xml_table(), 1261 } 1262 1263 QUERY_MODIFIER_PARSERS = { 1264 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1265 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1266 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1267 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1268 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1269 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1270 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1271 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1272 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1273 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1274 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1275 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1276 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1277 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1278 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.CLUSTER_BY: lambda self: ( 1280 "cluster", 1281 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1282 ), 1283 TokenType.DISTRIBUTE_BY: lambda self: ( 1284 "distribute", 1285 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1286 ), 1287 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1288 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1289 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1290 } 1291 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1292 1293 SET_PARSERS = { 1294 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1295 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1296 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1297 "TRANSACTION": lambda self: self._parse_set_transaction(), 1298 } 1299 1300 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1301 1302 TYPE_LITERAL_PARSERS = { 1303 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1304 } 1305 1306 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1307 1308 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1309 1310 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1311 1312 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1313 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1314 "ISOLATION": ( 1315 ("LEVEL", "REPEATABLE", "READ"), 1316 ("LEVEL", "READ", "COMMITTED"), 1317 ("LEVEL", "READ", "UNCOMITTED"), 1318 ("LEVEL", "SERIALIZABLE"), 1319 ), 1320 "READ": ("WRITE", "ONLY"), 1321 } 1322 1323 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1324 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1325 ) 1326 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1327 1328 CREATE_SEQUENCE: OPTIONS_TYPE = { 1329 "SCALE": ("EXTEND", "NOEXTEND"), 1330 "SHARD": ("EXTEND", "NOEXTEND"), 1331 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1332 **dict.fromkeys( 1333 ( 1334 "SESSION", 1335 "GLOBAL", 1336 "KEEP", 1337 "NOKEEP", 1338 "ORDER", 1339 "NOORDER", 1340 "NOCACHE", 1341 "CYCLE", 1342 "NOCYCLE", 1343 "NOMINVALUE", 1344 "NOMAXVALUE", 1345 "NOSCALE", 1346 "NOSHARD", 1347 ), 1348 tuple(), 1349 ), 1350 } 1351 1352 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1353 1354 USABLES: OPTIONS_TYPE = dict.fromkeys( 1355 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1356 ) 1357 1358 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1359 1360 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1361 "TYPE": ("EVOLUTION",), 1362 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1363 } 1364 1365 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1366 1367 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1368 1369 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1370 "NOT": ("ENFORCED",), 1371 "MATCH": ( 1372 "FULL", 1373 "PARTIAL", 1374 "SIMPLE", 1375 ), 1376 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1377 "USING": ( 1378 "BTREE", 1379 "HASH", 1380 ), 1381 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1382 } 1383 1384 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1385 "NO": ("OTHERS",), 1386 "CURRENT": ("ROW",), 1387 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1388 } 1389 1390 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1391 1392 CLONE_KEYWORDS = {"CLONE", "COPY"} 1393 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1394 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1395 1396 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1397 1398 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1399 1400 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1401 1402 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1403 1404 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1405 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1406 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1407 1408 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1409 1410 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1411 1412 ADD_CONSTRAINT_TOKENS = { 1413 TokenType.CONSTRAINT, 1414 TokenType.FOREIGN_KEY, 1415 TokenType.INDEX, 1416 TokenType.KEY, 1417 TokenType.PRIMARY_KEY, 1418 TokenType.UNIQUE, 1419 } 1420 1421 DISTINCT_TOKENS = {TokenType.DISTINCT} 1422 1423 NULL_TOKENS = {TokenType.NULL} 1424 1425 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1426 1427 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1428 1429 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1430 1431 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1432 1433 ODBC_DATETIME_LITERALS = { 1434 "d": exp.Date, 1435 "t": exp.Time, 1436 "ts": exp.Timestamp, 1437 } 1438 1439 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1440 1441 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1442 1443 # The style options for the DESCRIBE statement 1444 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1445 1446 # The style options for the ANALYZE statement 1447 ANALYZE_STYLES = { 1448 "BUFFER_USAGE_LIMIT", 1449 "FULL", 1450 "LOCAL", 1451 "NO_WRITE_TO_BINLOG", 1452 "SAMPLE", 1453 "SKIP_LOCKED", 1454 "VERBOSE", 1455 } 1456 1457 ANALYZE_EXPRESSION_PARSERS = { 1458 "ALL": lambda self: self._parse_analyze_columns(), 1459 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1460 "DELETE": lambda self: self._parse_analyze_delete(), 1461 "DROP": lambda self: self._parse_analyze_histogram(), 1462 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1463 "LIST": lambda self: self._parse_analyze_list(), 1464 "PREDICATE": lambda self: self._parse_analyze_columns(), 1465 "UPDATE": lambda self: self._parse_analyze_histogram(), 1466 "VALIDATE": lambda self: self._parse_analyze_validate(), 1467 } 1468 1469 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1470 1471 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1472 1473 OPERATION_MODIFIERS: t.Set[str] = set() 1474 1475 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1476 1477 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1478 1479 STRICT_CAST = True 1480 1481 PREFIXED_PIVOT_COLUMNS = False 1482 IDENTIFY_PIVOT_STRINGS = False 1483 1484 LOG_DEFAULTS_TO_LN = False 1485 1486 # Whether the table sample clause expects CSV syntax 1487 TABLESAMPLE_CSV = False 1488 1489 # The default method used for table sampling 1490 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1491 1492 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1493 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1494 1495 # Whether the TRIM function expects the characters to trim as its first argument 1496 TRIM_PATTERN_FIRST = False 1497 1498 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1499 STRING_ALIASES = False 1500 1501 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1502 MODIFIERS_ATTACHED_TO_SET_OP = True 1503 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1504 1505 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1506 NO_PAREN_IF_COMMANDS = True 1507 1508 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1509 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1510 1511 # Whether the `:` operator is used to extract a value from a VARIANT column 1512 COLON_IS_VARIANT_EXTRACT = False 1513 1514 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1515 # If this is True and '(' is not found, the keyword will be treated as an identifier 1516 VALUES_FOLLOWED_BY_PAREN = True 1517 1518 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1519 SUPPORTS_IMPLICIT_UNNEST = False 1520 1521 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1522 INTERVAL_SPANS = True 1523 1524 # Whether a PARTITION clause can follow a table reference 1525 SUPPORTS_PARTITION_SELECTION = False 1526 1527 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1528 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1529 1530 # Whether the 'AS' keyword is optional in the CTE definition syntax 1531 OPTIONAL_ALIAS_TOKEN_CTE = True 1532 1533 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1534 ALTER_RENAME_REQUIRES_COLUMN = True 1535 1536 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1537 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1538 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1539 # as BigQuery, where all joins have the same precedence. 1540 JOINS_HAVE_EQUAL_PRECEDENCE = False 1541 1542 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1543 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1544 1545 # Whether map literals support arbitrary expressions as keys. 1546 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1547 # When False, keys are typically restricted to identifiers. 1548 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1549 1550 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1551 # is true for Snowflake but not for BigQuery which can also process strings 1552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1553 1554 __slots__ = ( 1555 "error_level", 1556 "error_message_context", 1557 "max_errors", 1558 "dialect", 1559 "sql", 1560 "errors", 1561 "_tokens", 1562 "_index", 1563 "_curr", 1564 "_next", 1565 "_prev", 1566 "_prev_comments", 1567 "_pipe_cte_counter", 1568 ) 1569 1570 # Autofilled 1571 SHOW_TRIE: t.Dict = {} 1572 SET_TRIE: t.Dict = {} 1573 1574 def __init__( 1575 self, 1576 error_level: t.Optional[ErrorLevel] = None, 1577 error_message_context: int = 100, 1578 max_errors: int = 3, 1579 dialect: DialectType = None, 1580 ): 1581 from sqlglot.dialects import Dialect 1582 1583 self.error_level = error_level or ErrorLevel.IMMEDIATE 1584 self.error_message_context = error_message_context 1585 self.max_errors = max_errors 1586 self.dialect = Dialect.get_or_raise(dialect) 1587 self.reset() 1588 1589 def reset(self): 1590 self.sql = "" 1591 self.errors = [] 1592 self._tokens = [] 1593 self._index = 0 1594 self._curr = None 1595 self._next = None 1596 self._prev = None 1597 self._prev_comments = None 1598 self._pipe_cte_counter = 0 1599 1600 def parse( 1601 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1602 ) -> t.List[t.Optional[exp.Expression]]: 1603 """ 1604 Parses a list of tokens and returns a list of syntax trees, one tree 1605 per parsed SQL statement. 1606 1607 Args: 1608 raw_tokens: The list of tokens. 1609 sql: The original SQL string, used to produce helpful debug messages. 1610 1611 Returns: 1612 The list of the produced syntax trees. 1613 """ 1614 return self._parse( 1615 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1616 ) 1617 1618 def parse_into( 1619 self, 1620 expression_types: exp.IntoType, 1621 raw_tokens: t.List[Token], 1622 sql: t.Optional[str] = None, 1623 ) -> t.List[t.Optional[exp.Expression]]: 1624 """ 1625 Parses a list of tokens into a given Expression type. If a collection of Expression 1626 types is given instead, this method will try to parse the token list into each one 1627 of them, stopping at the first for which the parsing succeeds. 1628 1629 Args: 1630 expression_types: The expression type(s) to try and parse the token list into. 1631 raw_tokens: The list of tokens. 1632 sql: The original SQL string, used to produce helpful debug messages. 1633 1634 Returns: 1635 The target Expression. 1636 """ 1637 errors = [] 1638 for expression_type in ensure_list(expression_types): 1639 parser = self.EXPRESSION_PARSERS.get(expression_type) 1640 if not parser: 1641 raise TypeError(f"No parser registered for {expression_type}") 1642 1643 try: 1644 return self._parse(parser, raw_tokens, sql) 1645 except ParseError as e: 1646 e.errors[0]["into_expression"] = expression_type 1647 errors.append(e) 1648 1649 raise ParseError( 1650 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1651 errors=merge_errors(errors), 1652 ) from errors[-1] 1653 1654 def _parse( 1655 self, 1656 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1657 raw_tokens: t.List[Token], 1658 sql: t.Optional[str] = None, 1659 ) -> t.List[t.Optional[exp.Expression]]: 1660 self.reset() 1661 self.sql = sql or "" 1662 1663 total = len(raw_tokens) 1664 chunks: t.List[t.List[Token]] = [[]] 1665 1666 for i, token in enumerate(raw_tokens): 1667 if token.token_type == TokenType.SEMICOLON: 1668 if token.comments: 1669 chunks.append([token]) 1670 1671 if i < total - 1: 1672 chunks.append([]) 1673 else: 1674 chunks[-1].append(token) 1675 1676 expressions = [] 1677 1678 for tokens in chunks: 1679 self._index = -1 1680 self._tokens = tokens 1681 self._advance() 1682 1683 expressions.append(parse_method(self)) 1684 1685 if self._index < len(self._tokens): 1686 self.raise_error("Invalid expression / Unexpected token") 1687 1688 self.check_errors() 1689 1690 return expressions 1691 1692 def check_errors(self) -> None: 1693 """Logs or raises any found errors, depending on the chosen error level setting.""" 1694 if self.error_level == ErrorLevel.WARN: 1695 for error in self.errors: 1696 logger.error(str(error)) 1697 elif self.error_level == ErrorLevel.RAISE and self.errors: 1698 raise ParseError( 1699 concat_messages(self.errors, self.max_errors), 1700 errors=merge_errors(self.errors), 1701 ) 1702 1703 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1704 """ 1705 Appends an error in the list of recorded errors or raises it, depending on the chosen 1706 error level setting. 1707 """ 1708 token = token or self._curr or self._prev or Token.string("") 1709 start = token.start 1710 end = token.end + 1 1711 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1712 highlight = self.sql[start:end] 1713 end_context = self.sql[end : end + self.error_message_context] 1714 1715 error = ParseError.new( 1716 f"{message}. Line {token.line}, Col: {token.col}.\n" 1717 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1718 description=message, 1719 line=token.line, 1720 col=token.col, 1721 start_context=start_context, 1722 highlight=highlight, 1723 end_context=end_context, 1724 ) 1725 1726 if self.error_level == ErrorLevel.IMMEDIATE: 1727 raise error 1728 1729 self.errors.append(error) 1730 1731 def expression( 1732 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1733 ) -> E: 1734 """ 1735 Creates a new, validated Expression. 1736 1737 Args: 1738 exp_class: The expression class to instantiate. 1739 comments: An optional list of comments to attach to the expression. 1740 kwargs: The arguments to set for the expression along with their respective values. 1741 1742 Returns: 1743 The target expression. 1744 """ 1745 instance = exp_class(**kwargs) 1746 instance.add_comments(comments) if comments else self._add_comments(instance) 1747 return self.validate_expression(instance) 1748 1749 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1750 if expression and self._prev_comments: 1751 expression.add_comments(self._prev_comments) 1752 self._prev_comments = None 1753 1754 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1755 """ 1756 Validates an Expression, making sure that all its mandatory arguments are set. 1757 1758 Args: 1759 expression: The expression to validate. 1760 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1761 1762 Returns: 1763 The validated expression. 1764 """ 1765 if self.error_level != ErrorLevel.IGNORE: 1766 for error_message in expression.error_messages(args): 1767 self.raise_error(error_message) 1768 1769 return expression 1770 1771 def _find_sql(self, start: Token, end: Token) -> str: 1772 return self.sql[start.start : end.end + 1] 1773 1774 def _is_connected(self) -> bool: 1775 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1776 1777 def _advance(self, times: int = 1) -> None: 1778 self._index += times 1779 self._curr = seq_get(self._tokens, self._index) 1780 self._next = seq_get(self._tokens, self._index + 1) 1781 1782 if self._index > 0: 1783 self._prev = self._tokens[self._index - 1] 1784 self._prev_comments = self._prev.comments 1785 else: 1786 self._prev = None 1787 self._prev_comments = None 1788 1789 def _retreat(self, index: int) -> None: 1790 if index != self._index: 1791 self._advance(index - self._index) 1792 1793 def _warn_unsupported(self) -> None: 1794 if len(self._tokens) <= 1: 1795 return 1796 1797 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1798 # interested in emitting a warning for the one being currently processed. 1799 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1800 1801 logger.warning( 1802 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1803 ) 1804 1805 def _parse_command(self) -> exp.Command: 1806 self._warn_unsupported() 1807 return self.expression( 1808 exp.Command, 1809 comments=self._prev_comments, 1810 this=self._prev.text.upper(), 1811 expression=self._parse_string(), 1812 ) 1813 1814 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1815 """ 1816 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1817 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1818 solve this by setting & resetting the parser state accordingly 1819 """ 1820 index = self._index 1821 error_level = self.error_level 1822 1823 self.error_level = ErrorLevel.IMMEDIATE 1824 try: 1825 this = parse_method() 1826 except ParseError: 1827 this = None 1828 finally: 1829 if not this or retreat: 1830 self._retreat(index) 1831 self.error_level = error_level 1832 1833 return this 1834 1835 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1836 start = self._prev 1837 exists = self._parse_exists() if allow_exists else None 1838 1839 self._match(TokenType.ON) 1840 1841 materialized = self._match_text_seq("MATERIALIZED") 1842 kind = self._match_set(self.CREATABLES) and self._prev 1843 if not kind: 1844 return self._parse_as_command(start) 1845 1846 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=kind.token_type) 1848 elif kind.token_type == TokenType.TABLE: 1849 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1850 elif kind.token_type == TokenType.COLUMN: 1851 this = self._parse_column() 1852 else: 1853 this = self._parse_id_var() 1854 1855 self._match(TokenType.IS) 1856 1857 return self.expression( 1858 exp.Comment, 1859 this=this, 1860 kind=kind.text, 1861 expression=self._parse_string(), 1862 exists=exists, 1863 materialized=materialized, 1864 ) 1865 1866 def _parse_to_table( 1867 self, 1868 ) -> exp.ToTableProperty: 1869 table = self._parse_table_parts(schema=True) 1870 return self.expression(exp.ToTableProperty, this=table) 1871 1872 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1873 def _parse_ttl(self) -> exp.Expression: 1874 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1875 this = self._parse_bitwise() 1876 1877 if self._match_text_seq("DELETE"): 1878 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1879 if self._match_text_seq("RECOMPRESS"): 1880 return self.expression( 1881 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1882 ) 1883 if self._match_text_seq("TO", "DISK"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1886 ) 1887 if self._match_text_seq("TO", "VOLUME"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1890 ) 1891 1892 return this 1893 1894 expressions = self._parse_csv(_parse_ttl_action) 1895 where = self._parse_where() 1896 group = self._parse_group() 1897 1898 aggregates = None 1899 if group and self._match(TokenType.SET): 1900 aggregates = self._parse_csv(self._parse_set_item) 1901 1902 return self.expression( 1903 exp.MergeTreeTTL, 1904 expressions=expressions, 1905 where=where, 1906 group=group, 1907 aggregates=aggregates, 1908 ) 1909 1910 def _parse_statement(self) -> t.Optional[exp.Expression]: 1911 if self._curr is None: 1912 return None 1913 1914 if self._match_set(self.STATEMENT_PARSERS): 1915 comments = self._prev_comments 1916 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1917 stmt.add_comments(comments, prepend=True) 1918 return stmt 1919 1920 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1921 return self._parse_command() 1922 1923 expression = self._parse_expression() 1924 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1925 return self._parse_query_modifiers(expression) 1926 1927 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1928 start = self._prev 1929 temporary = self._match(TokenType.TEMPORARY) 1930 materialized = self._match_text_seq("MATERIALIZED") 1931 1932 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1933 if not kind: 1934 return self._parse_as_command(start) 1935 1936 concurrently = self._match_text_seq("CONCURRENTLY") 1937 if_exists = exists or self._parse_exists() 1938 1939 if kind == "COLUMN": 1940 this = self._parse_column() 1941 else: 1942 this = self._parse_table_parts( 1943 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1944 ) 1945 1946 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1947 1948 if self._match(TokenType.L_PAREN, advance=False): 1949 expressions = self._parse_wrapped_csv(self._parse_types) 1950 else: 1951 expressions = None 1952 1953 return self.expression( 1954 exp.Drop, 1955 exists=if_exists, 1956 this=this, 1957 expressions=expressions, 1958 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1959 temporary=temporary, 1960 materialized=materialized, 1961 cascade=self._match_text_seq("CASCADE"), 1962 constraints=self._match_text_seq("CONSTRAINTS"), 1963 purge=self._match_text_seq("PURGE"), 1964 cluster=cluster, 1965 concurrently=concurrently, 1966 ) 1967 1968 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1969 return ( 1970 self._match_text_seq("IF") 1971 and (not not_ or self._match(TokenType.NOT)) 1972 and self._match(TokenType.EXISTS) 1973 ) 1974 1975 def _parse_create(self) -> exp.Create | exp.Command: 1976 # Note: this can't be None because we've matched a statement parser 1977 start = self._prev 1978 1979 replace = ( 1980 start.token_type == TokenType.REPLACE 1981 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1982 or self._match_pair(TokenType.OR, TokenType.ALTER) 1983 ) 1984 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1985 1986 unique = self._match(TokenType.UNIQUE) 1987 1988 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1989 clustered = True 1990 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1991 "COLUMNSTORE" 1992 ): 1993 clustered = False 1994 else: 1995 clustered = None 1996 1997 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1998 self._advance() 1999 2000 properties = None 2001 create_token = self._match_set(self.CREATABLES) and self._prev 2002 2003 if not create_token: 2004 # exp.Properties.Location.POST_CREATE 2005 properties = self._parse_properties() 2006 create_token = self._match_set(self.CREATABLES) and self._prev 2007 2008 if not properties or not create_token: 2009 return self._parse_as_command(start) 2010 2011 concurrently = self._match_text_seq("CONCURRENTLY") 2012 exists = self._parse_exists(not_=True) 2013 this = None 2014 expression: t.Optional[exp.Expression] = None 2015 indexes = None 2016 no_schema_binding = None 2017 begin = None 2018 end = None 2019 clone = None 2020 2021 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2022 nonlocal properties 2023 if properties and temp_props: 2024 properties.expressions.extend(temp_props.expressions) 2025 elif temp_props: 2026 properties = temp_props 2027 2028 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2029 this = self._parse_user_defined_function(kind=create_token.token_type) 2030 2031 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2032 extend_props(self._parse_properties()) 2033 2034 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2035 extend_props(self._parse_properties()) 2036 2037 if not expression: 2038 if self._match(TokenType.COMMAND): 2039 expression = self._parse_as_command(self._prev) 2040 else: 2041 begin = self._match(TokenType.BEGIN) 2042 return_ = self._match_text_seq("RETURN") 2043 2044 if self._match(TokenType.STRING, advance=False): 2045 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2046 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2047 expression = self._parse_string() 2048 extend_props(self._parse_properties()) 2049 else: 2050 expression = self._parse_user_defined_function_expression() 2051 2052 end = self._match_text_seq("END") 2053 2054 if return_: 2055 expression = self.expression(exp.Return, this=expression) 2056 elif create_token.token_type == TokenType.INDEX: 2057 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2058 if not self._match(TokenType.ON): 2059 index = self._parse_id_var() 2060 anonymous = False 2061 else: 2062 index = None 2063 anonymous = True 2064 2065 this = self._parse_index(index=index, anonymous=anonymous) 2066 elif create_token.token_type in self.DB_CREATABLES: 2067 table_parts = self._parse_table_parts( 2068 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2069 ) 2070 2071 # exp.Properties.Location.POST_NAME 2072 self._match(TokenType.COMMA) 2073 extend_props(self._parse_properties(before=True)) 2074 2075 this = self._parse_schema(this=table_parts) 2076 2077 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2078 extend_props(self._parse_properties()) 2079 2080 has_alias = self._match(TokenType.ALIAS) 2081 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2082 # exp.Properties.Location.POST_ALIAS 2083 extend_props(self._parse_properties()) 2084 2085 if create_token.token_type == TokenType.SEQUENCE: 2086 expression = self._parse_types() 2087 props = self._parse_properties() 2088 if props: 2089 sequence_props = exp.SequenceProperties() 2090 options = [] 2091 for prop in props: 2092 if isinstance(prop, exp.SequenceProperties): 2093 for arg, value in prop.args.items(): 2094 if arg == "options": 2095 options.extend(value) 2096 else: 2097 sequence_props.set(arg, value) 2098 prop.pop() 2099 2100 if options: 2101 sequence_props.set("options", options) 2102 2103 props.append("expressions", sequence_props) 2104 extend_props(props) 2105 else: 2106 expression = self._parse_ddl_select() 2107 2108 # Some dialects also support using a table as an alias instead of a SELECT. 2109 # Here we fallback to this as an alternative. 2110 if not expression and has_alias: 2111 expression = self._try_parse(self._parse_table_parts) 2112 2113 if create_token.token_type == TokenType.TABLE: 2114 # exp.Properties.Location.POST_EXPRESSION 2115 extend_props(self._parse_properties()) 2116 2117 indexes = [] 2118 while True: 2119 index = self._parse_index() 2120 2121 # exp.Properties.Location.POST_INDEX 2122 extend_props(self._parse_properties()) 2123 if not index: 2124 break 2125 else: 2126 self._match(TokenType.COMMA) 2127 indexes.append(index) 2128 elif create_token.token_type == TokenType.VIEW: 2129 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2130 no_schema_binding = True 2131 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2132 extend_props(self._parse_properties()) 2133 2134 shallow = self._match_text_seq("SHALLOW") 2135 2136 if self._match_texts(self.CLONE_KEYWORDS): 2137 copy = self._prev.text.lower() == "copy" 2138 clone = self.expression( 2139 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2140 ) 2141 2142 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2143 return self._parse_as_command(start) 2144 2145 create_kind_text = create_token.text.upper() 2146 return self.expression( 2147 exp.Create, 2148 this=this, 2149 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2150 replace=replace, 2151 refresh=refresh, 2152 unique=unique, 2153 expression=expression, 2154 exists=exists, 2155 properties=properties, 2156 indexes=indexes, 2157 no_schema_binding=no_schema_binding, 2158 begin=begin, 2159 end=end, 2160 clone=clone, 2161 concurrently=concurrently, 2162 clustered=clustered, 2163 ) 2164 2165 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2166 seq = exp.SequenceProperties() 2167 2168 options = [] 2169 index = self._index 2170 2171 while self._curr: 2172 self._match(TokenType.COMMA) 2173 if self._match_text_seq("INCREMENT"): 2174 self._match_text_seq("BY") 2175 self._match_text_seq("=") 2176 seq.set("increment", self._parse_term()) 2177 elif self._match_text_seq("MINVALUE"): 2178 seq.set("minvalue", self._parse_term()) 2179 elif self._match_text_seq("MAXVALUE"): 2180 seq.set("maxvalue", self._parse_term()) 2181 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2182 self._match_text_seq("=") 2183 seq.set("start", self._parse_term()) 2184 elif self._match_text_seq("CACHE"): 2185 # T-SQL allows empty CACHE which is initialized dynamically 2186 seq.set("cache", self._parse_number() or True) 2187 elif self._match_text_seq("OWNED", "BY"): 2188 # "OWNED BY NONE" is the default 2189 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2190 else: 2191 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2192 if opt: 2193 options.append(opt) 2194 else: 2195 break 2196 2197 seq.set("options", options if options else None) 2198 return None if self._index == index else seq 2199 2200 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2201 # only used for teradata currently 2202 self._match(TokenType.COMMA) 2203 2204 kwargs = { 2205 "no": self._match_text_seq("NO"), 2206 "dual": self._match_text_seq("DUAL"), 2207 "before": self._match_text_seq("BEFORE"), 2208 "default": self._match_text_seq("DEFAULT"), 2209 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2210 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2211 "after": self._match_text_seq("AFTER"), 2212 "minimum": self._match_texts(("MIN", "MINIMUM")), 2213 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2214 } 2215 2216 if self._match_texts(self.PROPERTY_PARSERS): 2217 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2218 try: 2219 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2220 except TypeError: 2221 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2222 2223 return None 2224 2225 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2226 return self._parse_wrapped_csv(self._parse_property) 2227 2228 def _parse_property(self) -> t.Optional[exp.Expression]: 2229 if self._match_texts(self.PROPERTY_PARSERS): 2230 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2231 2232 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2234 2235 if self._match_text_seq("COMPOUND", "SORTKEY"): 2236 return self._parse_sortkey(compound=True) 2237 2238 if self._match_text_seq("SQL", "SECURITY"): 2239 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2240 2241 index = self._index 2242 2243 seq_props = self._parse_sequence_properties() 2244 if seq_props: 2245 return seq_props 2246 2247 self._retreat(index) 2248 key = self._parse_column() 2249 2250 if not self._match(TokenType.EQ): 2251 self._retreat(index) 2252 return None 2253 2254 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2255 if isinstance(key, exp.Column): 2256 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2257 2258 value = self._parse_bitwise() or self._parse_var(any_token=True) 2259 2260 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2261 if isinstance(value, exp.Column): 2262 value = exp.var(value.name) 2263 2264 return self.expression(exp.Property, this=key, value=value) 2265 2266 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2267 if self._match_text_seq("BY"): 2268 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2269 2270 self._match(TokenType.ALIAS) 2271 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2272 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2273 2274 return self.expression( 2275 exp.FileFormatProperty, 2276 this=( 2277 self.expression( 2278 exp.InputOutputFormat, 2279 input_format=input_format, 2280 output_format=output_format, 2281 ) 2282 if input_format or output_format 2283 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2284 ), 2285 hive_format=True, 2286 ) 2287 2288 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2289 field = self._parse_field() 2290 if isinstance(field, exp.Identifier) and not field.quoted: 2291 field = exp.var(field) 2292 2293 return field 2294 2295 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2296 self._match(TokenType.EQ) 2297 self._match(TokenType.ALIAS) 2298 2299 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2300 2301 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2302 properties = [] 2303 while True: 2304 if before: 2305 prop = self._parse_property_before() 2306 else: 2307 prop = self._parse_property() 2308 if not prop: 2309 break 2310 for p in ensure_list(prop): 2311 properties.append(p) 2312 2313 if properties: 2314 return self.expression(exp.Properties, expressions=properties) 2315 2316 return None 2317 2318 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2319 return self.expression( 2320 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2321 ) 2322 2323 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2324 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2325 security_specifier = self._prev.text.upper() 2326 return self.expression(exp.SecurityProperty, this=security_specifier) 2327 return None 2328 2329 def _parse_settings_property(self) -> exp.SettingsProperty: 2330 return self.expression( 2331 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2332 ) 2333 2334 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2335 if self._index >= 2: 2336 pre_volatile_token = self._tokens[self._index - 2] 2337 else: 2338 pre_volatile_token = None 2339 2340 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2341 return exp.VolatileProperty() 2342 2343 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2344 2345 def _parse_retention_period(self) -> exp.Var: 2346 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2347 number = self._parse_number() 2348 number_str = f"{number} " if number else "" 2349 unit = self._parse_var(any_token=True) 2350 return exp.var(f"{number_str}{unit}") 2351 2352 def _parse_system_versioning_property( 2353 self, with_: bool = False 2354 ) -> exp.WithSystemVersioningProperty: 2355 self._match(TokenType.EQ) 2356 prop = self.expression( 2357 exp.WithSystemVersioningProperty, 2358 **{ # type: ignore 2359 "on": True, 2360 "with": with_, 2361 }, 2362 ) 2363 2364 if self._match_text_seq("OFF"): 2365 prop.set("on", False) 2366 return prop 2367 2368 self._match(TokenType.ON) 2369 if self._match(TokenType.L_PAREN): 2370 while self._curr and not self._match(TokenType.R_PAREN): 2371 if self._match_text_seq("HISTORY_TABLE", "="): 2372 prop.set("this", self._parse_table_parts()) 2373 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2374 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2375 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2376 prop.set("retention_period", self._parse_retention_period()) 2377 2378 self._match(TokenType.COMMA) 2379 2380 return prop 2381 2382 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2383 self._match(TokenType.EQ) 2384 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2385 prop = self.expression(exp.DataDeletionProperty, on=on) 2386 2387 if self._match(TokenType.L_PAREN): 2388 while self._curr and not self._match(TokenType.R_PAREN): 2389 if self._match_text_seq("FILTER_COLUMN", "="): 2390 prop.set("filter_column", self._parse_column()) 2391 elif self._match_text_seq("RETENTION_PERIOD", "="): 2392 prop.set("retention_period", self._parse_retention_period()) 2393 2394 self._match(TokenType.COMMA) 2395 2396 return prop 2397 2398 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2399 kind = "HASH" 2400 expressions: t.Optional[t.List[exp.Expression]] = None 2401 if self._match_text_seq("BY", "HASH"): 2402 expressions = self._parse_wrapped_csv(self._parse_id_var) 2403 elif self._match_text_seq("BY", "RANDOM"): 2404 kind = "RANDOM" 2405 2406 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2407 buckets: t.Optional[exp.Expression] = None 2408 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2409 buckets = self._parse_number() 2410 2411 return self.expression( 2412 exp.DistributedByProperty, 2413 expressions=expressions, 2414 kind=kind, 2415 buckets=buckets, 2416 order=self._parse_order(), 2417 ) 2418 2419 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2420 self._match_text_seq("KEY") 2421 expressions = self._parse_wrapped_id_vars() 2422 return self.expression(expr_type, expressions=expressions) 2423 2424 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2425 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2426 prop = self._parse_system_versioning_property(with_=True) 2427 self._match_r_paren() 2428 return prop 2429 2430 if self._match(TokenType.L_PAREN, advance=False): 2431 return self._parse_wrapped_properties() 2432 2433 if self._match_text_seq("JOURNAL"): 2434 return self._parse_withjournaltable() 2435 2436 if self._match_texts(self.VIEW_ATTRIBUTES): 2437 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2438 2439 if self._match_text_seq("DATA"): 2440 return self._parse_withdata(no=False) 2441 elif self._match_text_seq("NO", "DATA"): 2442 return self._parse_withdata(no=True) 2443 2444 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2445 return self._parse_serde_properties(with_=True) 2446 2447 if self._match(TokenType.SCHEMA): 2448 return self.expression( 2449 exp.WithSchemaBindingProperty, 2450 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2451 ) 2452 2453 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2454 return self.expression( 2455 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2456 ) 2457 2458 if not self._next: 2459 return None 2460 2461 return self._parse_withisolatedloading() 2462 2463 def _parse_procedure_option(self) -> exp.Expression | None: 2464 if self._match_text_seq("EXECUTE", "AS"): 2465 return self.expression( 2466 exp.ExecuteAsProperty, 2467 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2468 or self._parse_string(), 2469 ) 2470 2471 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2472 2473 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2474 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2475 self._match(TokenType.EQ) 2476 2477 user = self._parse_id_var() 2478 self._match(TokenType.PARAMETER) 2479 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2480 2481 if not user or not host: 2482 return None 2483 2484 return exp.DefinerProperty(this=f"{user}@{host}") 2485 2486 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2487 self._match(TokenType.TABLE) 2488 self._match(TokenType.EQ) 2489 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2490 2491 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2492 return self.expression(exp.LogProperty, no=no) 2493 2494 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2495 return self.expression(exp.JournalProperty, **kwargs) 2496 2497 def _parse_checksum(self) -> exp.ChecksumProperty: 2498 self._match(TokenType.EQ) 2499 2500 on = None 2501 if self._match(TokenType.ON): 2502 on = True 2503 elif self._match_text_seq("OFF"): 2504 on = False 2505 2506 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2507 2508 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2509 return self.expression( 2510 exp.Cluster, 2511 expressions=( 2512 self._parse_wrapped_csv(self._parse_ordered) 2513 if wrapped 2514 else self._parse_csv(self._parse_ordered) 2515 ), 2516 ) 2517 2518 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2519 self._match_text_seq("BY") 2520 2521 self._match_l_paren() 2522 expressions = self._parse_csv(self._parse_column) 2523 self._match_r_paren() 2524 2525 if self._match_text_seq("SORTED", "BY"): 2526 self._match_l_paren() 2527 sorted_by = self._parse_csv(self._parse_ordered) 2528 self._match_r_paren() 2529 else: 2530 sorted_by = None 2531 2532 self._match(TokenType.INTO) 2533 buckets = self._parse_number() 2534 self._match_text_seq("BUCKETS") 2535 2536 return self.expression( 2537 exp.ClusteredByProperty, 2538 expressions=expressions, 2539 sorted_by=sorted_by, 2540 buckets=buckets, 2541 ) 2542 2543 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2544 if not self._match_text_seq("GRANTS"): 2545 self._retreat(self._index - 1) 2546 return None 2547 2548 return self.expression(exp.CopyGrantsProperty) 2549 2550 def _parse_freespace(self) -> exp.FreespaceProperty: 2551 self._match(TokenType.EQ) 2552 return self.expression( 2553 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2554 ) 2555 2556 def _parse_mergeblockratio( 2557 self, no: bool = False, default: bool = False 2558 ) -> exp.MergeBlockRatioProperty: 2559 if self._match(TokenType.EQ): 2560 return self.expression( 2561 exp.MergeBlockRatioProperty, 2562 this=self._parse_number(), 2563 percent=self._match(TokenType.PERCENT), 2564 ) 2565 2566 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2567 2568 def _parse_datablocksize( 2569 self, 2570 default: t.Optional[bool] = None, 2571 minimum: t.Optional[bool] = None, 2572 maximum: t.Optional[bool] = None, 2573 ) -> exp.DataBlocksizeProperty: 2574 self._match(TokenType.EQ) 2575 size = self._parse_number() 2576 2577 units = None 2578 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2579 units = self._prev.text 2580 2581 return self.expression( 2582 exp.DataBlocksizeProperty, 2583 size=size, 2584 units=units, 2585 default=default, 2586 minimum=minimum, 2587 maximum=maximum, 2588 ) 2589 2590 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2591 self._match(TokenType.EQ) 2592 always = self._match_text_seq("ALWAYS") 2593 manual = self._match_text_seq("MANUAL") 2594 never = self._match_text_seq("NEVER") 2595 default = self._match_text_seq("DEFAULT") 2596 2597 autotemp = None 2598 if self._match_text_seq("AUTOTEMP"): 2599 autotemp = self._parse_schema() 2600 2601 return self.expression( 2602 exp.BlockCompressionProperty, 2603 always=always, 2604 manual=manual, 2605 never=never, 2606 default=default, 2607 autotemp=autotemp, 2608 ) 2609 2610 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2611 index = self._index 2612 no = self._match_text_seq("NO") 2613 concurrent = self._match_text_seq("CONCURRENT") 2614 2615 if not self._match_text_seq("ISOLATED", "LOADING"): 2616 self._retreat(index) 2617 return None 2618 2619 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2620 return self.expression( 2621 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2622 ) 2623 2624 def _parse_locking(self) -> exp.LockingProperty: 2625 if self._match(TokenType.TABLE): 2626 kind = "TABLE" 2627 elif self._match(TokenType.VIEW): 2628 kind = "VIEW" 2629 elif self._match(TokenType.ROW): 2630 kind = "ROW" 2631 elif self._match_text_seq("DATABASE"): 2632 kind = "DATABASE" 2633 else: 2634 kind = None 2635 2636 if kind in ("DATABASE", "TABLE", "VIEW"): 2637 this = self._parse_table_parts() 2638 else: 2639 this = None 2640 2641 if self._match(TokenType.FOR): 2642 for_or_in = "FOR" 2643 elif self._match(TokenType.IN): 2644 for_or_in = "IN" 2645 else: 2646 for_or_in = None 2647 2648 if self._match_text_seq("ACCESS"): 2649 lock_type = "ACCESS" 2650 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2651 lock_type = "EXCLUSIVE" 2652 elif self._match_text_seq("SHARE"): 2653 lock_type = "SHARE" 2654 elif self._match_text_seq("READ"): 2655 lock_type = "READ" 2656 elif self._match_text_seq("WRITE"): 2657 lock_type = "WRITE" 2658 elif self._match_text_seq("CHECKSUM"): 2659 lock_type = "CHECKSUM" 2660 else: 2661 lock_type = None 2662 2663 override = self._match_text_seq("OVERRIDE") 2664 2665 return self.expression( 2666 exp.LockingProperty, 2667 this=this, 2668 kind=kind, 2669 for_or_in=for_or_in, 2670 lock_type=lock_type, 2671 override=override, 2672 ) 2673 2674 def _parse_partition_by(self) -> t.List[exp.Expression]: 2675 if self._match(TokenType.PARTITION_BY): 2676 return self._parse_csv(self._parse_assignment) 2677 return [] 2678 2679 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2680 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("MINVALUE"): 2682 return exp.var("MINVALUE") 2683 if self._match_text_seq("MAXVALUE"): 2684 return exp.var("MAXVALUE") 2685 return self._parse_bitwise() 2686 2687 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2688 expression = None 2689 from_expressions = None 2690 to_expressions = None 2691 2692 if self._match(TokenType.IN): 2693 this = self._parse_wrapped_csv(self._parse_bitwise) 2694 elif self._match(TokenType.FROM): 2695 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2696 self._match_text_seq("TO") 2697 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2698 elif self._match_text_seq("WITH", "(", "MODULUS"): 2699 this = self._parse_number() 2700 self._match_text_seq(",", "REMAINDER") 2701 expression = self._parse_number() 2702 self._match_r_paren() 2703 else: 2704 self.raise_error("Failed to parse partition bound spec.") 2705 2706 return self.expression( 2707 exp.PartitionBoundSpec, 2708 this=this, 2709 expression=expression, 2710 from_expressions=from_expressions, 2711 to_expressions=to_expressions, 2712 ) 2713 2714 # https://www.postgresql.org/docs/current/sql-createtable.html 2715 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2716 if not self._match_text_seq("OF"): 2717 self._retreat(self._index - 1) 2718 return None 2719 2720 this = self._parse_table(schema=True) 2721 2722 if self._match(TokenType.DEFAULT): 2723 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2724 elif self._match_text_seq("FOR", "VALUES"): 2725 expression = self._parse_partition_bound_spec() 2726 else: 2727 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2728 2729 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2730 2731 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2732 self._match(TokenType.EQ) 2733 return self.expression( 2734 exp.PartitionedByProperty, 2735 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2736 ) 2737 2738 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2739 if self._match_text_seq("AND", "STATISTICS"): 2740 statistics = True 2741 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2742 statistics = False 2743 else: 2744 statistics = None 2745 2746 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2747 2748 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2749 if self._match_text_seq("SQL"): 2750 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2751 return None 2752 2753 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2754 if self._match_text_seq("SQL", "DATA"): 2755 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2756 return None 2757 2758 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2759 if self._match_text_seq("PRIMARY", "INDEX"): 2760 return exp.NoPrimaryIndexProperty() 2761 if self._match_text_seq("SQL"): 2762 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2763 return None 2764 2765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2767 return exp.OnCommitProperty() 2768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2769 return exp.OnCommitProperty(delete=True) 2770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2771 2772 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2773 if self._match_text_seq("SQL", "DATA"): 2774 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2775 return None 2776 2777 def _parse_distkey(self) -> exp.DistKeyProperty: 2778 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2779 2780 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2781 table = self._parse_table(schema=True) 2782 2783 options = [] 2784 while self._match_texts(("INCLUDING", "EXCLUDING")): 2785 this = self._prev.text.upper() 2786 2787 id_var = self._parse_id_var() 2788 if not id_var: 2789 return None 2790 2791 options.append( 2792 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2793 ) 2794 2795 return self.expression(exp.LikeProperty, this=table, expressions=options) 2796 2797 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2798 return self.expression( 2799 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2800 ) 2801 2802 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2803 self._match(TokenType.EQ) 2804 return self.expression( 2805 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2806 ) 2807 2808 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2809 self._match_text_seq("WITH", "CONNECTION") 2810 return self.expression( 2811 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2812 ) 2813 2814 def _parse_returns(self) -> exp.ReturnsProperty: 2815 value: t.Optional[exp.Expression] 2816 null = None 2817 is_table = self._match(TokenType.TABLE) 2818 2819 if is_table: 2820 if self._match(TokenType.LT): 2821 value = self.expression( 2822 exp.Schema, 2823 this="TABLE", 2824 expressions=self._parse_csv(self._parse_struct_types), 2825 ) 2826 if not self._match(TokenType.GT): 2827 self.raise_error("Expecting >") 2828 else: 2829 value = self._parse_schema(exp.var("TABLE")) 2830 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2831 null = True 2832 value = None 2833 else: 2834 value = self._parse_types() 2835 2836 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2837 2838 def _parse_describe(self) -> exp.Describe: 2839 kind = self._match_set(self.CREATABLES) and self._prev.text 2840 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2841 if self._match(TokenType.DOT): 2842 style = None 2843 self._retreat(self._index - 2) 2844 2845 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2846 2847 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2848 this = self._parse_statement() 2849 else: 2850 this = self._parse_table(schema=True) 2851 2852 properties = self._parse_properties() 2853 expressions = properties.expressions if properties else None 2854 partition = self._parse_partition() 2855 return self.expression( 2856 exp.Describe, 2857 this=this, 2858 style=style, 2859 kind=kind, 2860 expressions=expressions, 2861 partition=partition, 2862 format=format, 2863 ) 2864 2865 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2866 kind = self._prev.text.upper() 2867 expressions = [] 2868 2869 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2870 if self._match(TokenType.WHEN): 2871 expression = self._parse_disjunction() 2872 self._match(TokenType.THEN) 2873 else: 2874 expression = None 2875 2876 else_ = self._match(TokenType.ELSE) 2877 2878 if not self._match(TokenType.INTO): 2879 return None 2880 2881 return self.expression( 2882 exp.ConditionalInsert, 2883 this=self.expression( 2884 exp.Insert, 2885 this=self._parse_table(schema=True), 2886 expression=self._parse_derived_table_values(), 2887 ), 2888 expression=expression, 2889 else_=else_, 2890 ) 2891 2892 expression = parse_conditional_insert() 2893 while expression is not None: 2894 expressions.append(expression) 2895 expression = parse_conditional_insert() 2896 2897 return self.expression( 2898 exp.MultitableInserts, 2899 kind=kind, 2900 comments=comments, 2901 expressions=expressions, 2902 source=self._parse_table(), 2903 ) 2904 2905 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2906 comments = [] 2907 hint = self._parse_hint() 2908 overwrite = self._match(TokenType.OVERWRITE) 2909 ignore = self._match(TokenType.IGNORE) 2910 local = self._match_text_seq("LOCAL") 2911 alternative = None 2912 is_function = None 2913 2914 if self._match_text_seq("DIRECTORY"): 2915 this: t.Optional[exp.Expression] = self.expression( 2916 exp.Directory, 2917 this=self._parse_var_or_string(), 2918 local=local, 2919 row_format=self._parse_row_format(match_row=True), 2920 ) 2921 else: 2922 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2923 comments += ensure_list(self._prev_comments) 2924 return self._parse_multitable_inserts(comments) 2925 2926 if self._match(TokenType.OR): 2927 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2928 2929 self._match(TokenType.INTO) 2930 comments += ensure_list(self._prev_comments) 2931 self._match(TokenType.TABLE) 2932 is_function = self._match(TokenType.FUNCTION) 2933 2934 this = ( 2935 self._parse_table(schema=True, parse_partition=True) 2936 if not is_function 2937 else self._parse_function() 2938 ) 2939 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2940 this.set("alias", self._parse_table_alias()) 2941 2942 returning = self._parse_returning() 2943 2944 return self.expression( 2945 exp.Insert, 2946 comments=comments, 2947 hint=hint, 2948 is_function=is_function, 2949 this=this, 2950 stored=self._match_text_seq("STORED") and self._parse_stored(), 2951 by_name=self._match_text_seq("BY", "NAME"), 2952 exists=self._parse_exists(), 2953 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2954 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2955 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2956 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2957 conflict=self._parse_on_conflict(), 2958 returning=returning or self._parse_returning(), 2959 overwrite=overwrite, 2960 alternative=alternative, 2961 ignore=ignore, 2962 source=self._match(TokenType.TABLE) and self._parse_table(), 2963 ) 2964 2965 def _parse_kill(self) -> exp.Kill: 2966 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2967 2968 return self.expression( 2969 exp.Kill, 2970 this=self._parse_primary(), 2971 kind=kind, 2972 ) 2973 2974 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2975 conflict = self._match_text_seq("ON", "CONFLICT") 2976 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2977 2978 if not conflict and not duplicate: 2979 return None 2980 2981 conflict_keys = None 2982 constraint = None 2983 2984 if conflict: 2985 if self._match_text_seq("ON", "CONSTRAINT"): 2986 constraint = self._parse_id_var() 2987 elif self._match(TokenType.L_PAREN): 2988 conflict_keys = self._parse_csv(self._parse_id_var) 2989 self._match_r_paren() 2990 2991 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2992 if self._prev.token_type == TokenType.UPDATE: 2993 self._match(TokenType.SET) 2994 expressions = self._parse_csv(self._parse_equality) 2995 else: 2996 expressions = None 2997 2998 return self.expression( 2999 exp.OnConflict, 3000 duplicate=duplicate, 3001 expressions=expressions, 3002 action=action, 3003 conflict_keys=conflict_keys, 3004 constraint=constraint, 3005 where=self._parse_where(), 3006 ) 3007 3008 def _parse_returning(self) -> t.Optional[exp.Returning]: 3009 if not self._match(TokenType.RETURNING): 3010 return None 3011 return self.expression( 3012 exp.Returning, 3013 expressions=self._parse_csv(self._parse_expression), 3014 into=self._match(TokenType.INTO) and self._parse_table_part(), 3015 ) 3016 3017 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3018 if not self._match(TokenType.FORMAT): 3019 return None 3020 return self._parse_row_format() 3021 3022 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3023 index = self._index 3024 with_ = with_ or self._match_text_seq("WITH") 3025 3026 if not self._match(TokenType.SERDE_PROPERTIES): 3027 self._retreat(index) 3028 return None 3029 return self.expression( 3030 exp.SerdeProperties, 3031 **{ # type: ignore 3032 "expressions": self._parse_wrapped_properties(), 3033 "with": with_, 3034 }, 3035 ) 3036 3037 def _parse_row_format( 3038 self, match_row: bool = False 3039 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3040 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3041 return None 3042 3043 if self._match_text_seq("SERDE"): 3044 this = self._parse_string() 3045 3046 serde_properties = self._parse_serde_properties() 3047 3048 return self.expression( 3049 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3050 ) 3051 3052 self._match_text_seq("DELIMITED") 3053 3054 kwargs = {} 3055 3056 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3057 kwargs["fields"] = self._parse_string() 3058 if self._match_text_seq("ESCAPED", "BY"): 3059 kwargs["escaped"] = self._parse_string() 3060 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3061 kwargs["collection_items"] = self._parse_string() 3062 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3063 kwargs["map_keys"] = self._parse_string() 3064 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3065 kwargs["lines"] = self._parse_string() 3066 if self._match_text_seq("NULL", "DEFINED", "AS"): 3067 kwargs["null"] = self._parse_string() 3068 3069 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3070 3071 def _parse_load(self) -> exp.LoadData | exp.Command: 3072 if self._match_text_seq("DATA"): 3073 local = self._match_text_seq("LOCAL") 3074 self._match_text_seq("INPATH") 3075 inpath = self._parse_string() 3076 overwrite = self._match(TokenType.OVERWRITE) 3077 self._match_pair(TokenType.INTO, TokenType.TABLE) 3078 3079 return self.expression( 3080 exp.LoadData, 3081 this=self._parse_table(schema=True), 3082 local=local, 3083 overwrite=overwrite, 3084 inpath=inpath, 3085 partition=self._parse_partition(), 3086 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3087 serde=self._match_text_seq("SERDE") and self._parse_string(), 3088 ) 3089 return self._parse_as_command(self._prev) 3090 3091 def _parse_delete(self) -> exp.Delete: 3092 # This handles MySQL's "Multiple-Table Syntax" 3093 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3094 tables = None 3095 if not self._match(TokenType.FROM, advance=False): 3096 tables = self._parse_csv(self._parse_table) or None 3097 3098 returning = self._parse_returning() 3099 3100 return self.expression( 3101 exp.Delete, 3102 tables=tables, 3103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3105 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3106 where=self._parse_where(), 3107 returning=returning or self._parse_returning(), 3108 limit=self._parse_limit(), 3109 ) 3110 3111 def _parse_update(self) -> exp.Update: 3112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3114 returning = self._parse_returning() 3115 return self.expression( 3116 exp.Update, 3117 **{ # type: ignore 3118 "this": this, 3119 "expressions": expressions, 3120 "from": self._parse_from(joins=True), 3121 "where": self._parse_where(), 3122 "returning": returning or self._parse_returning(), 3123 "order": self._parse_order(), 3124 "limit": self._parse_limit(), 3125 }, 3126 ) 3127 3128 def _parse_use(self) -> exp.Use: 3129 return self.expression( 3130 exp.Use, 3131 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3132 this=self._parse_table(schema=False), 3133 ) 3134 3135 def _parse_uncache(self) -> exp.Uncache: 3136 if not self._match(TokenType.TABLE): 3137 self.raise_error("Expecting TABLE after UNCACHE") 3138 3139 return self.expression( 3140 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3141 ) 3142 3143 def _parse_cache(self) -> exp.Cache: 3144 lazy = self._match_text_seq("LAZY") 3145 self._match(TokenType.TABLE) 3146 table = self._parse_table(schema=True) 3147 3148 options = [] 3149 if self._match_text_seq("OPTIONS"): 3150 self._match_l_paren() 3151 k = self._parse_string() 3152 self._match(TokenType.EQ) 3153 v = self._parse_string() 3154 options = [k, v] 3155 self._match_r_paren() 3156 3157 self._match(TokenType.ALIAS) 3158 return self.expression( 3159 exp.Cache, 3160 this=table, 3161 lazy=lazy, 3162 options=options, 3163 expression=self._parse_select(nested=True), 3164 ) 3165 3166 def _parse_partition(self) -> t.Optional[exp.Partition]: 3167 if not self._match_texts(self.PARTITION_KEYWORDS): 3168 return None 3169 3170 return self.expression( 3171 exp.Partition, 3172 subpartition=self._prev.text.upper() == "SUBPARTITION", 3173 expressions=self._parse_wrapped_csv(self._parse_assignment), 3174 ) 3175 3176 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3177 def _parse_value_expression() -> t.Optional[exp.Expression]: 3178 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3179 return exp.var(self._prev.text.upper()) 3180 return self._parse_expression() 3181 3182 if self._match(TokenType.L_PAREN): 3183 expressions = self._parse_csv(_parse_value_expression) 3184 self._match_r_paren() 3185 return self.expression(exp.Tuple, expressions=expressions) 3186 3187 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3188 expression = self._parse_expression() 3189 if expression: 3190 return self.expression(exp.Tuple, expressions=[expression]) 3191 return None 3192 3193 def _parse_projections(self) -> t.List[exp.Expression]: 3194 return self._parse_expressions() 3195 3196 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3197 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3198 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3199 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3200 ) 3201 elif self._match(TokenType.FROM): 3202 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3203 # Support parentheses for duckdb FROM-first syntax 3204 select = self._parse_select() 3205 if select: 3206 select.set("from", from_) 3207 this = select 3208 else: 3209 this = exp.select("*").from_(t.cast(exp.From, from_)) 3210 else: 3211 this = ( 3212 self._parse_table(consume_pipe=True) 3213 if table 3214 else self._parse_select(nested=True, parse_set_operation=False) 3215 ) 3216 3217 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3218 # in case a modifier (e.g. join) is following 3219 if table and isinstance(this, exp.Values) and this.alias: 3220 alias = this.args["alias"].pop() 3221 this = exp.Table(this=this, alias=alias) 3222 3223 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3224 3225 return this 3226 3227 def _parse_select( 3228 self, 3229 nested: bool = False, 3230 table: bool = False, 3231 parse_subquery_alias: bool = True, 3232 parse_set_operation: bool = True, 3233 consume_pipe: bool = True, 3234 ) -> t.Optional[exp.Expression]: 3235 query = self._parse_select_query( 3236 nested=nested, 3237 table=table, 3238 parse_subquery_alias=parse_subquery_alias, 3239 parse_set_operation=parse_set_operation, 3240 ) 3241 3242 if ( 3243 consume_pipe 3244 and self._match(TokenType.PIPE_GT, advance=False) 3245 and isinstance(query, exp.Query) 3246 ): 3247 query = self._parse_pipe_syntax_query(query) 3248 query = query.subquery(copy=False) if query and table else query 3249 3250 return query 3251 3252 def _parse_select_query( 3253 self, 3254 nested: bool = False, 3255 table: bool = False, 3256 parse_subquery_alias: bool = True, 3257 parse_set_operation: bool = True, 3258 ) -> t.Optional[exp.Expression]: 3259 cte = self._parse_with() 3260 3261 if cte: 3262 this = self._parse_statement() 3263 3264 if not this: 3265 self.raise_error("Failed to parse any statement following CTE") 3266 return cte 3267 3268 if "with" in this.arg_types: 3269 this.set("with", cte) 3270 else: 3271 self.raise_error(f"{this.key} does not support CTE") 3272 this = cte 3273 3274 return this 3275 3276 # duckdb supports leading with FROM x 3277 from_ = ( 3278 self._parse_from(consume_pipe=True) 3279 if self._match(TokenType.FROM, advance=False) 3280 else None 3281 ) 3282 3283 if self._match(TokenType.SELECT): 3284 comments = self._prev_comments 3285 3286 hint = self._parse_hint() 3287 3288 if self._next and not self._next.token_type == TokenType.DOT: 3289 all_ = self._match(TokenType.ALL) 3290 distinct = self._match_set(self.DISTINCT_TOKENS) 3291 else: 3292 all_, distinct = None, None 3293 3294 kind = ( 3295 self._match(TokenType.ALIAS) 3296 and self._match_texts(("STRUCT", "VALUE")) 3297 and self._prev.text.upper() 3298 ) 3299 3300 if distinct: 3301 distinct = self.expression( 3302 exp.Distinct, 3303 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3304 ) 3305 3306 if all_ and distinct: 3307 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3308 3309 operation_modifiers = [] 3310 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3311 operation_modifiers.append(exp.var(self._prev.text.upper())) 3312 3313 limit = self._parse_limit(top=True) 3314 projections = self._parse_projections() 3315 3316 this = self.expression( 3317 exp.Select, 3318 kind=kind, 3319 hint=hint, 3320 distinct=distinct, 3321 expressions=projections, 3322 limit=limit, 3323 operation_modifiers=operation_modifiers or None, 3324 ) 3325 this.comments = comments 3326 3327 into = self._parse_into() 3328 if into: 3329 this.set("into", into) 3330 3331 if not from_: 3332 from_ = self._parse_from() 3333 3334 if from_: 3335 this.set("from", from_) 3336 3337 this = self._parse_query_modifiers(this) 3338 elif (table or nested) and self._match(TokenType.L_PAREN): 3339 this = self._parse_wrapped_select(table=table) 3340 3341 # We return early here so that the UNION isn't attached to the subquery by the 3342 # following call to _parse_set_operations, but instead becomes the parent node 3343 self._match_r_paren() 3344 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3345 elif self._match(TokenType.VALUES, advance=False): 3346 this = self._parse_derived_table_values() 3347 elif from_: 3348 this = exp.select("*").from_(from_.this, copy=False) 3349 elif self._match(TokenType.SUMMARIZE): 3350 table = self._match(TokenType.TABLE) 3351 this = self._parse_select() or self._parse_string() or self._parse_table() 3352 return self.expression(exp.Summarize, this=this, table=table) 3353 elif self._match(TokenType.DESCRIBE): 3354 this = self._parse_describe() 3355 elif self._match_text_seq("STREAM"): 3356 this = self._parse_function() 3357 if this: 3358 this = self.expression(exp.Stream, this=this) 3359 else: 3360 self._retreat(self._index - 1) 3361 else: 3362 this = None 3363 3364 return self._parse_set_operations(this) if parse_set_operation else this 3365 3366 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3367 self._match_text_seq("SEARCH") 3368 3369 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3370 3371 if not kind: 3372 return None 3373 3374 self._match_text_seq("FIRST", "BY") 3375 3376 return self.expression( 3377 exp.RecursiveWithSearch, 3378 kind=kind, 3379 this=self._parse_id_var(), 3380 expression=self._match_text_seq("SET") and self._parse_id_var(), 3381 using=self._match_text_seq("USING") and self._parse_id_var(), 3382 ) 3383 3384 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3385 if not skip_with_token and not self._match(TokenType.WITH): 3386 return None 3387 3388 comments = self._prev_comments 3389 recursive = self._match(TokenType.RECURSIVE) 3390 3391 last_comments = None 3392 expressions = [] 3393 while True: 3394 cte = self._parse_cte() 3395 if isinstance(cte, exp.CTE): 3396 expressions.append(cte) 3397 if last_comments: 3398 cte.add_comments(last_comments) 3399 3400 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3401 break 3402 else: 3403 self._match(TokenType.WITH) 3404 3405 last_comments = self._prev_comments 3406 3407 return self.expression( 3408 exp.With, 3409 comments=comments, 3410 expressions=expressions, 3411 recursive=recursive, 3412 search=self._parse_recursive_with_search(), 3413 ) 3414 3415 def _parse_cte(self) -> t.Optional[exp.CTE]: 3416 index = self._index 3417 3418 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3419 if not alias or not alias.this: 3420 self.raise_error("Expected CTE to have alias") 3421 3422 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3423 self._retreat(index) 3424 return None 3425 3426 comments = self._prev_comments 3427 3428 if self._match_text_seq("NOT", "MATERIALIZED"): 3429 materialized = False 3430 elif self._match_text_seq("MATERIALIZED"): 3431 materialized = True 3432 else: 3433 materialized = None 3434 3435 cte = self.expression( 3436 exp.CTE, 3437 this=self._parse_wrapped(self._parse_statement), 3438 alias=alias, 3439 materialized=materialized, 3440 comments=comments, 3441 ) 3442 3443 values = cte.this 3444 if isinstance(values, exp.Values): 3445 if values.alias: 3446 cte.set("this", exp.select("*").from_(values)) 3447 else: 3448 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3449 3450 return cte 3451 3452 def _parse_table_alias( 3453 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3454 ) -> t.Optional[exp.TableAlias]: 3455 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3456 # so this section tries to parse the clause version and if it fails, it treats the token 3457 # as an identifier (alias) 3458 if self._can_parse_limit_or_offset(): 3459 return None 3460 3461 any_token = self._match(TokenType.ALIAS) 3462 alias = ( 3463 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3464 or self._parse_string_as_identifier() 3465 ) 3466 3467 index = self._index 3468 if self._match(TokenType.L_PAREN): 3469 columns = self._parse_csv(self._parse_function_parameter) 3470 self._match_r_paren() if columns else self._retreat(index) 3471 else: 3472 columns = None 3473 3474 if not alias and not columns: 3475 return None 3476 3477 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3478 3479 # We bubble up comments from the Identifier to the TableAlias 3480 if isinstance(alias, exp.Identifier): 3481 table_alias.add_comments(alias.pop_comments()) 3482 3483 return table_alias 3484 3485 def _parse_subquery( 3486 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3487 ) -> t.Optional[exp.Subquery]: 3488 if not this: 3489 return None 3490 3491 return self.expression( 3492 exp.Subquery, 3493 this=this, 3494 pivots=self._parse_pivots(), 3495 alias=self._parse_table_alias() if parse_alias else None, 3496 sample=self._parse_table_sample(), 3497 ) 3498 3499 def _implicit_unnests_to_explicit(self, this: E) -> E: 3500 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3501 3502 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3503 for i, join in enumerate(this.args.get("joins") or []): 3504 table = join.this 3505 normalized_table = table.copy() 3506 normalized_table.meta["maybe_column"] = True 3507 normalized_table = _norm(normalized_table, dialect=self.dialect) 3508 3509 if isinstance(table, exp.Table) and not join.args.get("on"): 3510 if normalized_table.parts[0].name in refs: 3511 table_as_column = table.to_column() 3512 unnest = exp.Unnest(expressions=[table_as_column]) 3513 3514 # Table.to_column creates a parent Alias node that we want to convert to 3515 # a TableAlias and attach to the Unnest, so it matches the parser's output 3516 if isinstance(table.args.get("alias"), exp.TableAlias): 3517 table_as_column.replace(table_as_column.this) 3518 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3519 3520 table.replace(unnest) 3521 3522 refs.add(normalized_table.alias_or_name) 3523 3524 return this 3525 3526 def _parse_query_modifiers( 3527 self, this: t.Optional[exp.Expression] 3528 ) -> t.Optional[exp.Expression]: 3529 if isinstance(this, self.MODIFIABLES): 3530 for join in self._parse_joins(): 3531 this.append("joins", join) 3532 for lateral in iter(self._parse_lateral, None): 3533 this.append("laterals", lateral) 3534 3535 while True: 3536 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3537 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3538 key, expression = parser(self) 3539 3540 if expression: 3541 this.set(key, expression) 3542 if key == "limit": 3543 offset = expression.args.pop("offset", None) 3544 3545 if offset: 3546 offset = exp.Offset(expression=offset) 3547 this.set("offset", offset) 3548 3549 limit_by_expressions = expression.expressions 3550 expression.set("expressions", None) 3551 offset.set("expressions", limit_by_expressions) 3552 continue 3553 break 3554 3555 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3556 this = self._implicit_unnests_to_explicit(this) 3557 3558 return this 3559 3560 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3561 start = self._curr 3562 while self._curr: 3563 self._advance() 3564 3565 end = self._tokens[self._index - 1] 3566 return exp.Hint(expressions=[self._find_sql(start, end)]) 3567 3568 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3569 return self._parse_function_call() 3570 3571 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3572 start_index = self._index 3573 should_fallback_to_string = False 3574 3575 hints = [] 3576 try: 3577 for hint in iter( 3578 lambda: self._parse_csv( 3579 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3580 ), 3581 [], 3582 ): 3583 hints.extend(hint) 3584 except ParseError: 3585 should_fallback_to_string = True 3586 3587 if should_fallback_to_string or self._curr: 3588 self._retreat(start_index) 3589 return self._parse_hint_fallback_to_string() 3590 3591 return self.expression(exp.Hint, expressions=hints) 3592 3593 def _parse_hint(self) -> t.Optional[exp.Hint]: 3594 if self._match(TokenType.HINT) and self._prev_comments: 3595 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3596 3597 return None 3598 3599 def _parse_into(self) -> t.Optional[exp.Into]: 3600 if not self._match(TokenType.INTO): 3601 return None 3602 3603 temp = self._match(TokenType.TEMPORARY) 3604 unlogged = self._match_text_seq("UNLOGGED") 3605 self._match(TokenType.TABLE) 3606 3607 return self.expression( 3608 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3609 ) 3610 3611 def _parse_from( 3612 self, 3613 joins: bool = False, 3614 skip_from_token: bool = False, 3615 consume_pipe: bool = False, 3616 ) -> t.Optional[exp.From]: 3617 if not skip_from_token and not self._match(TokenType.FROM): 3618 return None 3619 3620 return self.expression( 3621 exp.From, 3622 comments=self._prev_comments, 3623 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3624 ) 3625 3626 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3627 return self.expression( 3628 exp.MatchRecognizeMeasure, 3629 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3630 this=self._parse_expression(), 3631 ) 3632 3633 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3634 if not self._match(TokenType.MATCH_RECOGNIZE): 3635 return None 3636 3637 self._match_l_paren() 3638 3639 partition = self._parse_partition_by() 3640 order = self._parse_order() 3641 3642 measures = ( 3643 self._parse_csv(self._parse_match_recognize_measure) 3644 if self._match_text_seq("MEASURES") 3645 else None 3646 ) 3647 3648 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3649 rows = exp.var("ONE ROW PER MATCH") 3650 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3651 text = "ALL ROWS PER MATCH" 3652 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3653 text += " SHOW EMPTY MATCHES" 3654 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3655 text += " OMIT EMPTY MATCHES" 3656 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3657 text += " WITH UNMATCHED ROWS" 3658 rows = exp.var(text) 3659 else: 3660 rows = None 3661 3662 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3663 text = "AFTER MATCH SKIP" 3664 if self._match_text_seq("PAST", "LAST", "ROW"): 3665 text += " PAST LAST ROW" 3666 elif self._match_text_seq("TO", "NEXT", "ROW"): 3667 text += " TO NEXT ROW" 3668 elif self._match_text_seq("TO", "FIRST"): 3669 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3670 elif self._match_text_seq("TO", "LAST"): 3671 text += f" TO LAST {self._advance_any().text}" # type: ignore 3672 after = exp.var(text) 3673 else: 3674 after = None 3675 3676 if self._match_text_seq("PATTERN"): 3677 self._match_l_paren() 3678 3679 if not self._curr: 3680 self.raise_error("Expecting )", self._curr) 3681 3682 paren = 1 3683 start = self._curr 3684 3685 while self._curr and paren > 0: 3686 if self._curr.token_type == TokenType.L_PAREN: 3687 paren += 1 3688 if self._curr.token_type == TokenType.R_PAREN: 3689 paren -= 1 3690 3691 end = self._prev 3692 self._advance() 3693 3694 if paren > 0: 3695 self.raise_error("Expecting )", self._curr) 3696 3697 pattern = exp.var(self._find_sql(start, end)) 3698 else: 3699 pattern = None 3700 3701 define = ( 3702 self._parse_csv(self._parse_name_as_expression) 3703 if self._match_text_seq("DEFINE") 3704 else None 3705 ) 3706 3707 self._match_r_paren() 3708 3709 return self.expression( 3710 exp.MatchRecognize, 3711 partition_by=partition, 3712 order=order, 3713 measures=measures, 3714 rows=rows, 3715 after=after, 3716 pattern=pattern, 3717 define=define, 3718 alias=self._parse_table_alias(), 3719 ) 3720 3721 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3722 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3723 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3724 cross_apply = False 3725 3726 if cross_apply is not None: 3727 this = self._parse_select(table=True) 3728 view = None 3729 outer = None 3730 elif self._match(TokenType.LATERAL): 3731 this = self._parse_select(table=True) 3732 view = self._match(TokenType.VIEW) 3733 outer = self._match(TokenType.OUTER) 3734 else: 3735 return None 3736 3737 if not this: 3738 this = ( 3739 self._parse_unnest() 3740 or self._parse_function() 3741 or self._parse_id_var(any_token=False) 3742 ) 3743 3744 while self._match(TokenType.DOT): 3745 this = exp.Dot( 3746 this=this, 3747 expression=self._parse_function() or self._parse_id_var(any_token=False), 3748 ) 3749 3750 ordinality: t.Optional[bool] = None 3751 3752 if view: 3753 table = self._parse_id_var(any_token=False) 3754 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3755 table_alias: t.Optional[exp.TableAlias] = self.expression( 3756 exp.TableAlias, this=table, columns=columns 3757 ) 3758 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3759 # We move the alias from the lateral's child node to the lateral itself 3760 table_alias = this.args["alias"].pop() 3761 else: 3762 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3763 table_alias = self._parse_table_alias() 3764 3765 return self.expression( 3766 exp.Lateral, 3767 this=this, 3768 view=view, 3769 outer=outer, 3770 alias=table_alias, 3771 cross_apply=cross_apply, 3772 ordinality=ordinality, 3773 ) 3774 3775 def _parse_join_parts( 3776 self, 3777 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3778 return ( 3779 self._match_set(self.JOIN_METHODS) and self._prev, 3780 self._match_set(self.JOIN_SIDES) and self._prev, 3781 self._match_set(self.JOIN_KINDS) and self._prev, 3782 ) 3783 3784 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3785 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3786 this = self._parse_column() 3787 if isinstance(this, exp.Column): 3788 return this.this 3789 return this 3790 3791 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3792 3793 def _parse_join( 3794 self, skip_join_token: bool = False, parse_bracket: bool = False 3795 ) -> t.Optional[exp.Join]: 3796 if self._match(TokenType.COMMA): 3797 table = self._try_parse(self._parse_table) 3798 cross_join = self.expression(exp.Join, this=table) if table else None 3799 3800 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3801 cross_join.set("kind", "CROSS") 3802 3803 return cross_join 3804 3805 index = self._index 3806 method, side, kind = self._parse_join_parts() 3807 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3808 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3809 join_comments = self._prev_comments 3810 3811 if not skip_join_token and not join: 3812 self._retreat(index) 3813 kind = None 3814 method = None 3815 side = None 3816 3817 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3818 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3819 3820 if not skip_join_token and not join and not outer_apply and not cross_apply: 3821 return None 3822 3823 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3824 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3825 kwargs["expressions"] = self._parse_csv( 3826 lambda: self._parse_table(parse_bracket=parse_bracket) 3827 ) 3828 3829 if method: 3830 kwargs["method"] = method.text 3831 if side: 3832 kwargs["side"] = side.text 3833 if kind: 3834 kwargs["kind"] = kind.text 3835 if hint: 3836 kwargs["hint"] = hint 3837 3838 if self._match(TokenType.MATCH_CONDITION): 3839 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3840 3841 if self._match(TokenType.ON): 3842 kwargs["on"] = self._parse_assignment() 3843 elif self._match(TokenType.USING): 3844 kwargs["using"] = self._parse_using_identifiers() 3845 elif ( 3846 not method 3847 and not (outer_apply or cross_apply) 3848 and not isinstance(kwargs["this"], exp.Unnest) 3849 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3850 ): 3851 index = self._index 3852 joins: t.Optional[list] = list(self._parse_joins()) 3853 3854 if joins and self._match(TokenType.ON): 3855 kwargs["on"] = self._parse_assignment() 3856 elif joins and self._match(TokenType.USING): 3857 kwargs["using"] = self._parse_using_identifiers() 3858 else: 3859 joins = None 3860 self._retreat(index) 3861 3862 kwargs["this"].set("joins", joins if joins else None) 3863 3864 kwargs["pivots"] = self._parse_pivots() 3865 3866 comments = [c for token in (method, side, kind) if token for c in token.comments] 3867 comments = (join_comments or []) + comments 3868 return self.expression(exp.Join, comments=comments, **kwargs) 3869 3870 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3871 this = self._parse_assignment() 3872 3873 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3874 return this 3875 3876 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3877 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3878 3879 return this 3880 3881 def _parse_index_params(self) -> exp.IndexParameters: 3882 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3883 3884 if self._match(TokenType.L_PAREN, advance=False): 3885 columns = self._parse_wrapped_csv(self._parse_with_operator) 3886 else: 3887 columns = None 3888 3889 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3890 partition_by = self._parse_partition_by() 3891 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3892 tablespace = ( 3893 self._parse_var(any_token=True) 3894 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3895 else None 3896 ) 3897 where = self._parse_where() 3898 3899 on = self._parse_field() if self._match(TokenType.ON) else None 3900 3901 return self.expression( 3902 exp.IndexParameters, 3903 using=using, 3904 columns=columns, 3905 include=include, 3906 partition_by=partition_by, 3907 where=where, 3908 with_storage=with_storage, 3909 tablespace=tablespace, 3910 on=on, 3911 ) 3912 3913 def _parse_index( 3914 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3915 ) -> t.Optional[exp.Index]: 3916 if index or anonymous: 3917 unique = None 3918 primary = None 3919 amp = None 3920 3921 self._match(TokenType.ON) 3922 self._match(TokenType.TABLE) # hive 3923 table = self._parse_table_parts(schema=True) 3924 else: 3925 unique = self._match(TokenType.UNIQUE) 3926 primary = self._match_text_seq("PRIMARY") 3927 amp = self._match_text_seq("AMP") 3928 3929 if not self._match(TokenType.INDEX): 3930 return None 3931 3932 index = self._parse_id_var() 3933 table = None 3934 3935 params = self._parse_index_params() 3936 3937 return self.expression( 3938 exp.Index, 3939 this=index, 3940 table=table, 3941 unique=unique, 3942 primary=primary, 3943 amp=amp, 3944 params=params, 3945 ) 3946 3947 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3948 hints: t.List[exp.Expression] = [] 3949 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3950 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3951 hints.append( 3952 self.expression( 3953 exp.WithTableHint, 3954 expressions=self._parse_csv( 3955 lambda: self._parse_function() or self._parse_var(any_token=True) 3956 ), 3957 ) 3958 ) 3959 self._match_r_paren() 3960 else: 3961 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3962 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3963 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3964 3965 self._match_set((TokenType.INDEX, TokenType.KEY)) 3966 if self._match(TokenType.FOR): 3967 hint.set("target", self._advance_any() and self._prev.text.upper()) 3968 3969 hint.set("expressions", self._parse_wrapped_id_vars()) 3970 hints.append(hint) 3971 3972 return hints or None 3973 3974 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3975 return ( 3976 (not schema and self._parse_function(optional_parens=False)) 3977 or self._parse_id_var(any_token=False) 3978 or self._parse_string_as_identifier() 3979 or self._parse_placeholder() 3980 ) 3981 3982 def _parse_table_parts( 3983 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3984 ) -> exp.Table: 3985 catalog = None 3986 db = None 3987 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3988 3989 while self._match(TokenType.DOT): 3990 if catalog: 3991 # This allows nesting the table in arbitrarily many dot expressions if needed 3992 table = self.expression( 3993 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3994 ) 3995 else: 3996 catalog = db 3997 db = table 3998 # "" used for tsql FROM a..b case 3999 table = self._parse_table_part(schema=schema) or "" 4000 4001 if ( 4002 wildcard 4003 and self._is_connected() 4004 and (isinstance(table, exp.Identifier) or not table) 4005 and self._match(TokenType.STAR) 4006 ): 4007 if isinstance(table, exp.Identifier): 4008 table.args["this"] += "*" 4009 else: 4010 table = exp.Identifier(this="*") 4011 4012 # We bubble up comments from the Identifier to the Table 4013 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4014 4015 if is_db_reference: 4016 catalog = db 4017 db = table 4018 table = None 4019 4020 if not table and not is_db_reference: 4021 self.raise_error(f"Expected table name but got {self._curr}") 4022 if not db and is_db_reference: 4023 self.raise_error(f"Expected database name but got {self._curr}") 4024 4025 table = self.expression( 4026 exp.Table, 4027 comments=comments, 4028 this=table, 4029 db=db, 4030 catalog=catalog, 4031 ) 4032 4033 changes = self._parse_changes() 4034 if changes: 4035 table.set("changes", changes) 4036 4037 at_before = self._parse_historical_data() 4038 if at_before: 4039 table.set("when", at_before) 4040 4041 pivots = self._parse_pivots() 4042 if pivots: 4043 table.set("pivots", pivots) 4044 4045 return table 4046 4047 def _parse_table( 4048 self, 4049 schema: bool = False, 4050 joins: bool = False, 4051 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4052 parse_bracket: bool = False, 4053 is_db_reference: bool = False, 4054 parse_partition: bool = False, 4055 consume_pipe: bool = False, 4056 ) -> t.Optional[exp.Expression]: 4057 lateral = self._parse_lateral() 4058 if lateral: 4059 return lateral 4060 4061 unnest = self._parse_unnest() 4062 if unnest: 4063 return unnest 4064 4065 values = self._parse_derived_table_values() 4066 if values: 4067 return values 4068 4069 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4070 if subquery: 4071 if not subquery.args.get("pivots"): 4072 subquery.set("pivots", self._parse_pivots()) 4073 return subquery 4074 4075 bracket = parse_bracket and self._parse_bracket(None) 4076 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4077 4078 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4079 self._parse_table 4080 ) 4081 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4082 4083 only = self._match(TokenType.ONLY) 4084 4085 this = t.cast( 4086 exp.Expression, 4087 bracket 4088 or rows_from 4089 or self._parse_bracket( 4090 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4091 ), 4092 ) 4093 4094 if only: 4095 this.set("only", only) 4096 4097 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4098 self._match_text_seq("*") 4099 4100 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4101 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4102 this.set("partition", self._parse_partition()) 4103 4104 if schema: 4105 return self._parse_schema(this=this) 4106 4107 version = self._parse_version() 4108 4109 if version: 4110 this.set("version", version) 4111 4112 if self.dialect.ALIAS_POST_TABLESAMPLE: 4113 this.set("sample", self._parse_table_sample()) 4114 4115 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4116 if alias: 4117 this.set("alias", alias) 4118 4119 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4120 return self.expression( 4121 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4122 ) 4123 4124 this.set("hints", self._parse_table_hints()) 4125 4126 if not this.args.get("pivots"): 4127 this.set("pivots", self._parse_pivots()) 4128 4129 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4130 this.set("sample", self._parse_table_sample()) 4131 4132 if joins: 4133 for join in self._parse_joins(): 4134 this.append("joins", join) 4135 4136 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4137 this.set("ordinality", True) 4138 this.set("alias", self._parse_table_alias()) 4139 4140 return this 4141 4142 def _parse_version(self) -> t.Optional[exp.Version]: 4143 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4144 this = "TIMESTAMP" 4145 elif self._match(TokenType.VERSION_SNAPSHOT): 4146 this = "VERSION" 4147 else: 4148 return None 4149 4150 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4151 kind = self._prev.text.upper() 4152 start = self._parse_bitwise() 4153 self._match_texts(("TO", "AND")) 4154 end = self._parse_bitwise() 4155 expression: t.Optional[exp.Expression] = self.expression( 4156 exp.Tuple, expressions=[start, end] 4157 ) 4158 elif self._match_text_seq("CONTAINED", "IN"): 4159 kind = "CONTAINED IN" 4160 expression = self.expression( 4161 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4162 ) 4163 elif self._match(TokenType.ALL): 4164 kind = "ALL" 4165 expression = None 4166 else: 4167 self._match_text_seq("AS", "OF") 4168 kind = "AS OF" 4169 expression = self._parse_type() 4170 4171 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4172 4173 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4174 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4175 index = self._index 4176 historical_data = None 4177 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4178 this = self._prev.text.upper() 4179 kind = ( 4180 self._match(TokenType.L_PAREN) 4181 and self._match_texts(self.HISTORICAL_DATA_KIND) 4182 and self._prev.text.upper() 4183 ) 4184 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4185 4186 if expression: 4187 self._match_r_paren() 4188 historical_data = self.expression( 4189 exp.HistoricalData, this=this, kind=kind, expression=expression 4190 ) 4191 else: 4192 self._retreat(index) 4193 4194 return historical_data 4195 4196 def _parse_changes(self) -> t.Optional[exp.Changes]: 4197 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4198 return None 4199 4200 information = self._parse_var(any_token=True) 4201 self._match_r_paren() 4202 4203 return self.expression( 4204 exp.Changes, 4205 information=information, 4206 at_before=self._parse_historical_data(), 4207 end=self._parse_historical_data(), 4208 ) 4209 4210 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4211 if not self._match(TokenType.UNNEST): 4212 return None 4213 4214 expressions = self._parse_wrapped_csv(self._parse_equality) 4215 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4216 4217 alias = self._parse_table_alias() if with_alias else None 4218 4219 if alias: 4220 if self.dialect.UNNEST_COLUMN_ONLY: 4221 if alias.args.get("columns"): 4222 self.raise_error("Unexpected extra column alias in unnest.") 4223 4224 alias.set("columns", [alias.this]) 4225 alias.set("this", None) 4226 4227 columns = alias.args.get("columns") or [] 4228 if offset and len(expressions) < len(columns): 4229 offset = columns.pop() 4230 4231 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4232 self._match(TokenType.ALIAS) 4233 offset = self._parse_id_var( 4234 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4235 ) or exp.to_identifier("offset") 4236 4237 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4238 4239 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4240 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4241 if not is_derived and not ( 4242 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4243 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4244 ): 4245 return None 4246 4247 expressions = self._parse_csv(self._parse_value) 4248 alias = self._parse_table_alias() 4249 4250 if is_derived: 4251 self._match_r_paren() 4252 4253 return self.expression( 4254 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4255 ) 4256 4257 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4258 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4259 as_modifier and self._match_text_seq("USING", "SAMPLE") 4260 ): 4261 return None 4262 4263 bucket_numerator = None 4264 bucket_denominator = None 4265 bucket_field = None 4266 percent = None 4267 size = None 4268 seed = None 4269 4270 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4271 matched_l_paren = self._match(TokenType.L_PAREN) 4272 4273 if self.TABLESAMPLE_CSV: 4274 num = None 4275 expressions = self._parse_csv(self._parse_primary) 4276 else: 4277 expressions = None 4278 num = ( 4279 self._parse_factor() 4280 if self._match(TokenType.NUMBER, advance=False) 4281 else self._parse_primary() or self._parse_placeholder() 4282 ) 4283 4284 if self._match_text_seq("BUCKET"): 4285 bucket_numerator = self._parse_number() 4286 self._match_text_seq("OUT", "OF") 4287 bucket_denominator = bucket_denominator = self._parse_number() 4288 self._match(TokenType.ON) 4289 bucket_field = self._parse_field() 4290 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4291 percent = num 4292 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4293 size = num 4294 else: 4295 percent = num 4296 4297 if matched_l_paren: 4298 self._match_r_paren() 4299 4300 if self._match(TokenType.L_PAREN): 4301 method = self._parse_var(upper=True) 4302 seed = self._match(TokenType.COMMA) and self._parse_number() 4303 self._match_r_paren() 4304 elif self._match_texts(("SEED", "REPEATABLE")): 4305 seed = self._parse_wrapped(self._parse_number) 4306 4307 if not method and self.DEFAULT_SAMPLING_METHOD: 4308 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4309 4310 return self.expression( 4311 exp.TableSample, 4312 expressions=expressions, 4313 method=method, 4314 bucket_numerator=bucket_numerator, 4315 bucket_denominator=bucket_denominator, 4316 bucket_field=bucket_field, 4317 percent=percent, 4318 size=size, 4319 seed=seed, 4320 ) 4321 4322 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4323 return list(iter(self._parse_pivot, None)) or None 4324 4325 def _parse_joins(self) -> t.Iterator[exp.Join]: 4326 return iter(self._parse_join, None) 4327 4328 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4329 if not self._match(TokenType.INTO): 4330 return None 4331 4332 return self.expression( 4333 exp.UnpivotColumns, 4334 this=self._match_text_seq("NAME") and self._parse_column(), 4335 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4336 ) 4337 4338 # https://duckdb.org/docs/sql/statements/pivot 4339 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4340 def _parse_on() -> t.Optional[exp.Expression]: 4341 this = self._parse_bitwise() 4342 4343 if self._match(TokenType.IN): 4344 # PIVOT ... ON col IN (row_val1, row_val2) 4345 return self._parse_in(this) 4346 if self._match(TokenType.ALIAS, advance=False): 4347 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4348 return self._parse_alias(this) 4349 4350 return this 4351 4352 this = self._parse_table() 4353 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4354 into = self._parse_unpivot_columns() 4355 using = self._match(TokenType.USING) and self._parse_csv( 4356 lambda: self._parse_alias(self._parse_function()) 4357 ) 4358 group = self._parse_group() 4359 4360 return self.expression( 4361 exp.Pivot, 4362 this=this, 4363 expressions=expressions, 4364 using=using, 4365 group=group, 4366 unpivot=is_unpivot, 4367 into=into, 4368 ) 4369 4370 def _parse_pivot_in(self) -> exp.In: 4371 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4372 this = self._parse_select_or_expression() 4373 4374 self._match(TokenType.ALIAS) 4375 alias = self._parse_bitwise() 4376 if alias: 4377 if isinstance(alias, exp.Column) and not alias.db: 4378 alias = alias.this 4379 return self.expression(exp.PivotAlias, this=this, alias=alias) 4380 4381 return this 4382 4383 value = self._parse_column() 4384 4385 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4386 self.raise_error("Expecting IN (") 4387 4388 if self._match(TokenType.ANY): 4389 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4390 else: 4391 exprs = self._parse_csv(_parse_aliased_expression) 4392 4393 self._match_r_paren() 4394 return self.expression(exp.In, this=value, expressions=exprs) 4395 4396 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4397 func = self._parse_function() 4398 if not func: 4399 self.raise_error("Expecting an aggregation function in PIVOT") 4400 4401 return self._parse_alias(func) 4402 4403 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4404 index = self._index 4405 include_nulls = None 4406 4407 if self._match(TokenType.PIVOT): 4408 unpivot = False 4409 elif self._match(TokenType.UNPIVOT): 4410 unpivot = True 4411 4412 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4413 if self._match_text_seq("INCLUDE", "NULLS"): 4414 include_nulls = True 4415 elif self._match_text_seq("EXCLUDE", "NULLS"): 4416 include_nulls = False 4417 else: 4418 return None 4419 4420 expressions = [] 4421 4422 if not self._match(TokenType.L_PAREN): 4423 self._retreat(index) 4424 return None 4425 4426 if unpivot: 4427 expressions = self._parse_csv(self._parse_column) 4428 else: 4429 expressions = self._parse_csv(self._parse_pivot_aggregation) 4430 4431 if not expressions: 4432 self.raise_error("Failed to parse PIVOT's aggregation list") 4433 4434 if not self._match(TokenType.FOR): 4435 self.raise_error("Expecting FOR") 4436 4437 fields = [] 4438 while True: 4439 field = self._try_parse(self._parse_pivot_in) 4440 if not field: 4441 break 4442 fields.append(field) 4443 4444 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4445 self._parse_bitwise 4446 ) 4447 4448 group = self._parse_group() 4449 4450 self._match_r_paren() 4451 4452 pivot = self.expression( 4453 exp.Pivot, 4454 expressions=expressions, 4455 fields=fields, 4456 unpivot=unpivot, 4457 include_nulls=include_nulls, 4458 default_on_null=default_on_null, 4459 group=group, 4460 ) 4461 4462 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4463 pivot.set("alias", self._parse_table_alias()) 4464 4465 if not unpivot: 4466 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4467 4468 columns: t.List[exp.Expression] = [] 4469 all_fields = [] 4470 for pivot_field in pivot.fields: 4471 pivot_field_expressions = pivot_field.expressions 4472 4473 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4474 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4475 continue 4476 4477 all_fields.append( 4478 [ 4479 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4480 for fld in pivot_field_expressions 4481 ] 4482 ) 4483 4484 if all_fields: 4485 if names: 4486 all_fields.append(names) 4487 4488 # Generate all possible combinations of the pivot columns 4489 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4490 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4491 for fld_parts_tuple in itertools.product(*all_fields): 4492 fld_parts = list(fld_parts_tuple) 4493 4494 if names and self.PREFIXED_PIVOT_COLUMNS: 4495 # Move the "name" to the front of the list 4496 fld_parts.insert(0, fld_parts.pop(-1)) 4497 4498 columns.append(exp.to_identifier("_".join(fld_parts))) 4499 4500 pivot.set("columns", columns) 4501 4502 return pivot 4503 4504 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4505 return [agg.alias for agg in aggregations if agg.alias] 4506 4507 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4508 if not skip_where_token and not self._match(TokenType.PREWHERE): 4509 return None 4510 4511 return self.expression( 4512 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4513 ) 4514 4515 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4516 if not skip_where_token and not self._match(TokenType.WHERE): 4517 return None 4518 4519 return self.expression( 4520 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4521 ) 4522 4523 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4524 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4525 return None 4526 comments = self._prev_comments 4527 4528 elements: t.Dict[str, t.Any] = defaultdict(list) 4529 4530 if self._match(TokenType.ALL): 4531 elements["all"] = True 4532 elif self._match(TokenType.DISTINCT): 4533 elements["all"] = False 4534 4535 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4536 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4537 4538 while True: 4539 index = self._index 4540 4541 elements["expressions"].extend( 4542 self._parse_csv( 4543 lambda: None 4544 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4545 else self._parse_assignment() 4546 ) 4547 ) 4548 4549 before_with_index = self._index 4550 with_prefix = self._match(TokenType.WITH) 4551 4552 if self._match(TokenType.ROLLUP): 4553 elements["rollup"].append( 4554 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4555 ) 4556 elif self._match(TokenType.CUBE): 4557 elements["cube"].append( 4558 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4559 ) 4560 elif self._match(TokenType.GROUPING_SETS): 4561 elements["grouping_sets"].append( 4562 self.expression( 4563 exp.GroupingSets, 4564 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4565 ) 4566 ) 4567 elif self._match_text_seq("TOTALS"): 4568 elements["totals"] = True # type: ignore 4569 4570 if before_with_index <= self._index <= before_with_index + 1: 4571 self._retreat(before_with_index) 4572 break 4573 4574 if index == self._index: 4575 break 4576 4577 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4578 4579 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4580 return self.expression( 4581 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4582 ) 4583 4584 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4585 if self._match(TokenType.L_PAREN): 4586 grouping_set = self._parse_csv(self._parse_column) 4587 self._match_r_paren() 4588 return self.expression(exp.Tuple, expressions=grouping_set) 4589 4590 return self._parse_column() 4591 4592 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4593 if not skip_having_token and not self._match(TokenType.HAVING): 4594 return None 4595 return self.expression( 4596 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4597 ) 4598 4599 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4600 if not self._match(TokenType.QUALIFY): 4601 return None 4602 return self.expression(exp.Qualify, this=self._parse_assignment()) 4603 4604 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4605 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4606 exp.Prior, this=self._parse_bitwise() 4607 ) 4608 connect = self._parse_assignment() 4609 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4610 return connect 4611 4612 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4613 if skip_start_token: 4614 start = None 4615 elif self._match(TokenType.START_WITH): 4616 start = self._parse_assignment() 4617 else: 4618 return None 4619 4620 self._match(TokenType.CONNECT_BY) 4621 nocycle = self._match_text_seq("NOCYCLE") 4622 connect = self._parse_connect_with_prior() 4623 4624 if not start and self._match(TokenType.START_WITH): 4625 start = self._parse_assignment() 4626 4627 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4628 4629 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4630 this = self._parse_id_var(any_token=True) 4631 if self._match(TokenType.ALIAS): 4632 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4633 return this 4634 4635 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4636 if self._match_text_seq("INTERPOLATE"): 4637 return self._parse_wrapped_csv(self._parse_name_as_expression) 4638 return None 4639 4640 def _parse_order( 4641 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4642 ) -> t.Optional[exp.Expression]: 4643 siblings = None 4644 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4645 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4646 return this 4647 4648 siblings = True 4649 4650 return self.expression( 4651 exp.Order, 4652 comments=self._prev_comments, 4653 this=this, 4654 expressions=self._parse_csv(self._parse_ordered), 4655 siblings=siblings, 4656 ) 4657 4658 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4659 if not self._match(token): 4660 return None 4661 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4662 4663 def _parse_ordered( 4664 self, parse_method: t.Optional[t.Callable] = None 4665 ) -> t.Optional[exp.Ordered]: 4666 this = parse_method() if parse_method else self._parse_assignment() 4667 if not this: 4668 return None 4669 4670 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4671 this = exp.var("ALL") 4672 4673 asc = self._match(TokenType.ASC) 4674 desc = self._match(TokenType.DESC) or (asc and False) 4675 4676 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4677 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4678 4679 nulls_first = is_nulls_first or False 4680 explicitly_null_ordered = is_nulls_first or is_nulls_last 4681 4682 if ( 4683 not explicitly_null_ordered 4684 and ( 4685 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4686 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4687 ) 4688 and self.dialect.NULL_ORDERING != "nulls_are_last" 4689 ): 4690 nulls_first = True 4691 4692 if self._match_text_seq("WITH", "FILL"): 4693 with_fill = self.expression( 4694 exp.WithFill, 4695 **{ # type: ignore 4696 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4697 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4698 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4699 "interpolate": self._parse_interpolate(), 4700 }, 4701 ) 4702 else: 4703 with_fill = None 4704 4705 return self.expression( 4706 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4707 ) 4708 4709 def _parse_limit_options(self) -> exp.LimitOptions: 4710 percent = self._match(TokenType.PERCENT) 4711 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4712 self._match_text_seq("ONLY") 4713 with_ties = self._match_text_seq("WITH", "TIES") 4714 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4715 4716 def _parse_limit( 4717 self, 4718 this: t.Optional[exp.Expression] = None, 4719 top: bool = False, 4720 skip_limit_token: bool = False, 4721 ) -> t.Optional[exp.Expression]: 4722 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4723 comments = self._prev_comments 4724 if top: 4725 limit_paren = self._match(TokenType.L_PAREN) 4726 expression = self._parse_term() if limit_paren else self._parse_number() 4727 4728 if limit_paren: 4729 self._match_r_paren() 4730 4731 limit_options = self._parse_limit_options() 4732 else: 4733 limit_options = None 4734 expression = self._parse_term() 4735 4736 if self._match(TokenType.COMMA): 4737 offset = expression 4738 expression = self._parse_term() 4739 else: 4740 offset = None 4741 4742 limit_exp = self.expression( 4743 exp.Limit, 4744 this=this, 4745 expression=expression, 4746 offset=offset, 4747 comments=comments, 4748 limit_options=limit_options, 4749 expressions=self._parse_limit_by(), 4750 ) 4751 4752 return limit_exp 4753 4754 if self._match(TokenType.FETCH): 4755 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4756 direction = self._prev.text.upper() if direction else "FIRST" 4757 4758 count = self._parse_field(tokens=self.FETCH_TOKENS) 4759 4760 return self.expression( 4761 exp.Fetch, 4762 direction=direction, 4763 count=count, 4764 limit_options=self._parse_limit_options(), 4765 ) 4766 4767 return this 4768 4769 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4770 if not self._match(TokenType.OFFSET): 4771 return this 4772 4773 count = self._parse_term() 4774 self._match_set((TokenType.ROW, TokenType.ROWS)) 4775 4776 return self.expression( 4777 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4778 ) 4779 4780 def _can_parse_limit_or_offset(self) -> bool: 4781 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4782 return False 4783 4784 index = self._index 4785 result = bool( 4786 self._try_parse(self._parse_limit, retreat=True) 4787 or self._try_parse(self._parse_offset, retreat=True) 4788 ) 4789 self._retreat(index) 4790 return result 4791 4792 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4793 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4794 4795 def _parse_locks(self) -> t.List[exp.Lock]: 4796 locks = [] 4797 while True: 4798 update, key = None, None 4799 if self._match_text_seq("FOR", "UPDATE"): 4800 update = True 4801 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4802 "LOCK", "IN", "SHARE", "MODE" 4803 ): 4804 update = False 4805 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4806 update, key = False, True 4807 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4808 update, key = True, True 4809 else: 4810 break 4811 4812 expressions = None 4813 if self._match_text_seq("OF"): 4814 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4815 4816 wait: t.Optional[bool | exp.Expression] = None 4817 if self._match_text_seq("NOWAIT"): 4818 wait = True 4819 elif self._match_text_seq("WAIT"): 4820 wait = self._parse_primary() 4821 elif self._match_text_seq("SKIP", "LOCKED"): 4822 wait = False 4823 4824 locks.append( 4825 self.expression( 4826 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4827 ) 4828 ) 4829 4830 return locks 4831 4832 def parse_set_operation( 4833 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4834 ) -> t.Optional[exp.Expression]: 4835 start = self._index 4836 _, side_token, kind_token = self._parse_join_parts() 4837 4838 side = side_token.text if side_token else None 4839 kind = kind_token.text if kind_token else None 4840 4841 if not self._match_set(self.SET_OPERATIONS): 4842 self._retreat(start) 4843 return None 4844 4845 token_type = self._prev.token_type 4846 4847 if token_type == TokenType.UNION: 4848 operation: t.Type[exp.SetOperation] = exp.Union 4849 elif token_type == TokenType.EXCEPT: 4850 operation = exp.Except 4851 else: 4852 operation = exp.Intersect 4853 4854 comments = self._prev.comments 4855 4856 if self._match(TokenType.DISTINCT): 4857 distinct: t.Optional[bool] = True 4858 elif self._match(TokenType.ALL): 4859 distinct = False 4860 else: 4861 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4862 if distinct is None: 4863 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4864 4865 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4866 "STRICT", "CORRESPONDING" 4867 ) 4868 if self._match_text_seq("CORRESPONDING"): 4869 by_name = True 4870 if not side and not kind: 4871 kind = "INNER" 4872 4873 on_column_list = None 4874 if by_name and self._match_texts(("ON", "BY")): 4875 on_column_list = self._parse_wrapped_csv(self._parse_column) 4876 4877 expression = self._parse_select( 4878 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4879 ) 4880 4881 return self.expression( 4882 operation, 4883 comments=comments, 4884 this=this, 4885 distinct=distinct, 4886 by_name=by_name, 4887 expression=expression, 4888 side=side, 4889 kind=kind, 4890 on=on_column_list, 4891 ) 4892 4893 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4894 while this: 4895 setop = self.parse_set_operation(this) 4896 if not setop: 4897 break 4898 this = setop 4899 4900 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4901 expression = this.expression 4902 4903 if expression: 4904 for arg in self.SET_OP_MODIFIERS: 4905 expr = expression.args.get(arg) 4906 if expr: 4907 this.set(arg, expr.pop()) 4908 4909 return this 4910 4911 def _parse_expression(self) -> t.Optional[exp.Expression]: 4912 return self._parse_alias(self._parse_assignment()) 4913 4914 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4915 this = self._parse_disjunction() 4916 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4917 # This allows us to parse <non-identifier token> := <expr> 4918 this = exp.column( 4919 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4920 ) 4921 4922 while self._match_set(self.ASSIGNMENT): 4923 if isinstance(this, exp.Column) and len(this.parts) == 1: 4924 this = this.this 4925 4926 this = self.expression( 4927 self.ASSIGNMENT[self._prev.token_type], 4928 this=this, 4929 comments=self._prev_comments, 4930 expression=self._parse_assignment(), 4931 ) 4932 4933 return this 4934 4935 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4936 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4937 4938 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4939 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4940 4941 def _parse_equality(self) -> t.Optional[exp.Expression]: 4942 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4943 4944 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4945 return self._parse_tokens(self._parse_range, self.COMPARISON) 4946 4947 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4948 this = this or self._parse_bitwise() 4949 negate = self._match(TokenType.NOT) 4950 4951 if self._match_set(self.RANGE_PARSERS): 4952 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4953 if not expression: 4954 return this 4955 4956 this = expression 4957 elif self._match(TokenType.ISNULL): 4958 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4959 4960 # Postgres supports ISNULL and NOTNULL for conditions. 4961 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4962 if self._match(TokenType.NOTNULL): 4963 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4964 this = self.expression(exp.Not, this=this) 4965 4966 if negate: 4967 this = self._negate_range(this) 4968 4969 if self._match(TokenType.IS): 4970 this = self._parse_is(this) 4971 4972 return this 4973 4974 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4975 if not this: 4976 return this 4977 4978 return self.expression(exp.Not, this=this) 4979 4980 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4981 index = self._index - 1 4982 negate = self._match(TokenType.NOT) 4983 4984 if self._match_text_seq("DISTINCT", "FROM"): 4985 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4986 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4987 4988 if self._match(TokenType.JSON): 4989 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4990 4991 if self._match_text_seq("WITH"): 4992 _with = True 4993 elif self._match_text_seq("WITHOUT"): 4994 _with = False 4995 else: 4996 _with = None 4997 4998 unique = self._match(TokenType.UNIQUE) 4999 self._match_text_seq("KEYS") 5000 expression: t.Optional[exp.Expression] = self.expression( 5001 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5002 ) 5003 else: 5004 expression = self._parse_primary() or self._parse_null() 5005 if not expression: 5006 self._retreat(index) 5007 return None 5008 5009 this = self.expression(exp.Is, this=this, expression=expression) 5010 return self.expression(exp.Not, this=this) if negate else this 5011 5012 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5013 unnest = self._parse_unnest(with_alias=False) 5014 if unnest: 5015 this = self.expression(exp.In, this=this, unnest=unnest) 5016 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5017 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5018 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5019 5020 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5021 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5022 else: 5023 this = self.expression(exp.In, this=this, expressions=expressions) 5024 5025 if matched_l_paren: 5026 self._match_r_paren(this) 5027 elif not self._match(TokenType.R_BRACKET, expression=this): 5028 self.raise_error("Expecting ]") 5029 else: 5030 this = self.expression(exp.In, this=this, field=self._parse_column()) 5031 5032 return this 5033 5034 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5035 symmetric = None 5036 if self._match_text_seq("SYMMETRIC"): 5037 symmetric = True 5038 elif self._match_text_seq("ASYMMETRIC"): 5039 symmetric = False 5040 5041 low = self._parse_bitwise() 5042 self._match(TokenType.AND) 5043 high = self._parse_bitwise() 5044 5045 return self.expression( 5046 exp.Between, 5047 this=this, 5048 low=low, 5049 high=high, 5050 symmetric=symmetric, 5051 ) 5052 5053 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5054 if not self._match(TokenType.ESCAPE): 5055 return this 5056 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5057 5058 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5059 index = self._index 5060 5061 if not self._match(TokenType.INTERVAL) and match_interval: 5062 return None 5063 5064 if self._match(TokenType.STRING, advance=False): 5065 this = self._parse_primary() 5066 else: 5067 this = self._parse_term() 5068 5069 if not this or ( 5070 isinstance(this, exp.Column) 5071 and not this.table 5072 and not this.this.quoted 5073 and this.name.upper() == "IS" 5074 ): 5075 self._retreat(index) 5076 return None 5077 5078 unit = self._parse_function() or ( 5079 not self._match(TokenType.ALIAS, advance=False) 5080 and self._parse_var(any_token=True, upper=True) 5081 ) 5082 5083 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5084 # each INTERVAL expression into this canonical form so it's easy to transpile 5085 if this and this.is_number: 5086 this = exp.Literal.string(this.to_py()) 5087 elif this and this.is_string: 5088 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5089 if parts and unit: 5090 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5091 unit = None 5092 self._retreat(self._index - 1) 5093 5094 if len(parts) == 1: 5095 this = exp.Literal.string(parts[0][0]) 5096 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5097 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5098 unit = self.expression( 5099 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5100 ) 5101 5102 interval = self.expression(exp.Interval, this=this, unit=unit) 5103 5104 index = self._index 5105 self._match(TokenType.PLUS) 5106 5107 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5108 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5109 return self.expression( 5110 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5111 ) 5112 5113 self._retreat(index) 5114 return interval 5115 5116 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5117 this = self._parse_term() 5118 5119 while True: 5120 if self._match_set(self.BITWISE): 5121 this = self.expression( 5122 self.BITWISE[self._prev.token_type], 5123 this=this, 5124 expression=self._parse_term(), 5125 ) 5126 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5127 this = self.expression( 5128 exp.DPipe, 5129 this=this, 5130 expression=self._parse_term(), 5131 safe=not self.dialect.STRICT_STRING_CONCAT, 5132 ) 5133 elif self._match(TokenType.DQMARK): 5134 this = self.expression( 5135 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5136 ) 5137 elif self._match_pair(TokenType.LT, TokenType.LT): 5138 this = self.expression( 5139 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5140 ) 5141 elif self._match_pair(TokenType.GT, TokenType.GT): 5142 this = self.expression( 5143 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5144 ) 5145 else: 5146 break 5147 5148 return this 5149 5150 def _parse_term(self) -> t.Optional[exp.Expression]: 5151 this = self._parse_factor() 5152 5153 while self._match_set(self.TERM): 5154 klass = self.TERM[self._prev.token_type] 5155 comments = self._prev_comments 5156 expression = self._parse_factor() 5157 5158 this = self.expression(klass, this=this, comments=comments, expression=expression) 5159 5160 if isinstance(this, exp.Collate): 5161 expr = this.expression 5162 5163 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5164 # fallback to Identifier / Var 5165 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5166 ident = expr.this 5167 if isinstance(ident, exp.Identifier): 5168 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5169 5170 return this 5171 5172 def _parse_factor(self) -> t.Optional[exp.Expression]: 5173 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5174 this = parse_method() 5175 5176 while self._match_set(self.FACTOR): 5177 klass = self.FACTOR[self._prev.token_type] 5178 comments = self._prev_comments 5179 expression = parse_method() 5180 5181 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5182 self._retreat(self._index - 1) 5183 return this 5184 5185 this = self.expression(klass, this=this, comments=comments, expression=expression) 5186 5187 if isinstance(this, exp.Div): 5188 this.args["typed"] = self.dialect.TYPED_DIVISION 5189 this.args["safe"] = self.dialect.SAFE_DIVISION 5190 5191 return this 5192 5193 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5194 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5195 5196 def _parse_unary(self) -> t.Optional[exp.Expression]: 5197 if self._match_set(self.UNARY_PARSERS): 5198 return self.UNARY_PARSERS[self._prev.token_type](self) 5199 return self._parse_at_time_zone(self._parse_type()) 5200 5201 def _parse_type( 5202 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5203 ) -> t.Optional[exp.Expression]: 5204 interval = parse_interval and self._parse_interval() 5205 if interval: 5206 return interval 5207 5208 index = self._index 5209 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5210 5211 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5212 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5213 if isinstance(data_type, exp.Cast): 5214 # This constructor can contain ops directly after it, for instance struct unnesting: 5215 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5216 return self._parse_column_ops(data_type) 5217 5218 if data_type: 5219 index2 = self._index 5220 this = self._parse_primary() 5221 5222 if isinstance(this, exp.Literal): 5223 literal = this.name 5224 this = self._parse_column_ops(this) 5225 5226 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5227 if parser: 5228 return parser(self, this, data_type) 5229 5230 if ( 5231 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5232 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5233 and TIME_ZONE_RE.search(literal) 5234 ): 5235 data_type = exp.DataType.build("TIMESTAMPTZ") 5236 5237 return self.expression(exp.Cast, this=this, to=data_type) 5238 5239 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5240 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5241 # 5242 # If the index difference here is greater than 1, that means the parser itself must have 5243 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5244 # 5245 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5246 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5247 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5248 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5249 # 5250 # In these cases, we don't really want to return the converted type, but instead retreat 5251 # and try to parse a Column or Identifier in the section below. 5252 if data_type.expressions and index2 - index > 1: 5253 self._retreat(index2) 5254 return self._parse_column_ops(data_type) 5255 5256 self._retreat(index) 5257 5258 if fallback_to_identifier: 5259 return self._parse_id_var() 5260 5261 this = self._parse_column() 5262 return this and self._parse_column_ops(this) 5263 5264 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5265 this = self._parse_type() 5266 if not this: 5267 return None 5268 5269 if isinstance(this, exp.Column) and not this.table: 5270 this = exp.var(this.name.upper()) 5271 5272 return self.expression( 5273 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5274 ) 5275 5276 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5277 type_name = identifier.name 5278 5279 while self._match(TokenType.DOT): 5280 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5281 5282 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5283 5284 def _parse_types( 5285 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5286 ) -> t.Optional[exp.Expression]: 5287 index = self._index 5288 5289 this: t.Optional[exp.Expression] = None 5290 prefix = self._match_text_seq("SYSUDTLIB", ".") 5291 5292 if not self._match_set(self.TYPE_TOKENS): 5293 identifier = allow_identifiers and self._parse_id_var( 5294 any_token=False, tokens=(TokenType.VAR,) 5295 ) 5296 if isinstance(identifier, exp.Identifier): 5297 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5298 5299 if len(tokens) != 1: 5300 self.raise_error("Unexpected identifier", self._prev) 5301 5302 if tokens[0].token_type in self.TYPE_TOKENS: 5303 self._prev = tokens[0] 5304 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5305 this = self._parse_user_defined_type(identifier) 5306 else: 5307 self._retreat(self._index - 1) 5308 return None 5309 else: 5310 return None 5311 5312 type_token = self._prev.token_type 5313 5314 if type_token == TokenType.PSEUDO_TYPE: 5315 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5316 5317 if type_token == TokenType.OBJECT_IDENTIFIER: 5318 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5319 5320 # https://materialize.com/docs/sql/types/map/ 5321 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5322 key_type = self._parse_types( 5323 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5324 ) 5325 if not self._match(TokenType.FARROW): 5326 self._retreat(index) 5327 return None 5328 5329 value_type = self._parse_types( 5330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5331 ) 5332 if not self._match(TokenType.R_BRACKET): 5333 self._retreat(index) 5334 return None 5335 5336 return exp.DataType( 5337 this=exp.DataType.Type.MAP, 5338 expressions=[key_type, value_type], 5339 nested=True, 5340 prefix=prefix, 5341 ) 5342 5343 nested = type_token in self.NESTED_TYPE_TOKENS 5344 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5345 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5346 expressions = None 5347 maybe_func = False 5348 5349 if self._match(TokenType.L_PAREN): 5350 if is_struct: 5351 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5352 elif nested: 5353 expressions = self._parse_csv( 5354 lambda: self._parse_types( 5355 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5356 ) 5357 ) 5358 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5359 this = expressions[0] 5360 this.set("nullable", True) 5361 self._match_r_paren() 5362 return this 5363 elif type_token in self.ENUM_TYPE_TOKENS: 5364 expressions = self._parse_csv(self._parse_equality) 5365 elif is_aggregate: 5366 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5367 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5368 ) 5369 if not func_or_ident: 5370 return None 5371 expressions = [func_or_ident] 5372 if self._match(TokenType.COMMA): 5373 expressions.extend( 5374 self._parse_csv( 5375 lambda: self._parse_types( 5376 check_func=check_func, 5377 schema=schema, 5378 allow_identifiers=allow_identifiers, 5379 ) 5380 ) 5381 ) 5382 else: 5383 expressions = self._parse_csv(self._parse_type_size) 5384 5385 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5386 if type_token == TokenType.VECTOR and len(expressions) == 2: 5387 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5388 5389 if not expressions or not self._match(TokenType.R_PAREN): 5390 self._retreat(index) 5391 return None 5392 5393 maybe_func = True 5394 5395 values: t.Optional[t.List[exp.Expression]] = None 5396 5397 if nested and self._match(TokenType.LT): 5398 if is_struct: 5399 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5400 else: 5401 expressions = self._parse_csv( 5402 lambda: self._parse_types( 5403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5404 ) 5405 ) 5406 5407 if not self._match(TokenType.GT): 5408 self.raise_error("Expecting >") 5409 5410 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5411 values = self._parse_csv(self._parse_assignment) 5412 if not values and is_struct: 5413 values = None 5414 self._retreat(self._index - 1) 5415 else: 5416 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5417 5418 if type_token in self.TIMESTAMPS: 5419 if self._match_text_seq("WITH", "TIME", "ZONE"): 5420 maybe_func = False 5421 tz_type = ( 5422 exp.DataType.Type.TIMETZ 5423 if type_token in self.TIMES 5424 else exp.DataType.Type.TIMESTAMPTZ 5425 ) 5426 this = exp.DataType(this=tz_type, expressions=expressions) 5427 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5428 maybe_func = False 5429 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5430 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5431 maybe_func = False 5432 elif type_token == TokenType.INTERVAL: 5433 unit = self._parse_var(upper=True) 5434 if unit: 5435 if self._match_text_seq("TO"): 5436 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5437 5438 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5439 else: 5440 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5441 elif type_token == TokenType.VOID: 5442 this = exp.DataType(this=exp.DataType.Type.NULL) 5443 5444 if maybe_func and check_func: 5445 index2 = self._index 5446 peek = self._parse_string() 5447 5448 if not peek: 5449 self._retreat(index) 5450 return None 5451 5452 self._retreat(index2) 5453 5454 if not this: 5455 if self._match_text_seq("UNSIGNED"): 5456 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5457 if not unsigned_type_token: 5458 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5459 5460 type_token = unsigned_type_token or type_token 5461 5462 this = exp.DataType( 5463 this=exp.DataType.Type[type_token.value], 5464 expressions=expressions, 5465 nested=nested, 5466 prefix=prefix, 5467 ) 5468 5469 # Empty arrays/structs are allowed 5470 if values is not None: 5471 cls = exp.Struct if is_struct else exp.Array 5472 this = exp.cast(cls(expressions=values), this, copy=False) 5473 5474 elif expressions: 5475 this.set("expressions", expressions) 5476 5477 # https://materialize.com/docs/sql/types/list/#type-name 5478 while self._match(TokenType.LIST): 5479 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5480 5481 index = self._index 5482 5483 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5484 matched_array = self._match(TokenType.ARRAY) 5485 5486 while self._curr: 5487 datatype_token = self._prev.token_type 5488 matched_l_bracket = self._match(TokenType.L_BRACKET) 5489 5490 if (not matched_l_bracket and not matched_array) or ( 5491 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5492 ): 5493 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5494 # not to be confused with the fixed size array parsing 5495 break 5496 5497 matched_array = False 5498 values = self._parse_csv(self._parse_assignment) or None 5499 if ( 5500 values 5501 and not schema 5502 and ( 5503 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5504 ) 5505 ): 5506 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5507 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5508 self._retreat(index) 5509 break 5510 5511 this = exp.DataType( 5512 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5513 ) 5514 self._match(TokenType.R_BRACKET) 5515 5516 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5517 converter = self.TYPE_CONVERTERS.get(this.this) 5518 if converter: 5519 this = converter(t.cast(exp.DataType, this)) 5520 5521 return this 5522 5523 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5524 index = self._index 5525 5526 if ( 5527 self._curr 5528 and self._next 5529 and self._curr.token_type in self.TYPE_TOKENS 5530 and self._next.token_type in self.TYPE_TOKENS 5531 ): 5532 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5533 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5534 this = self._parse_id_var() 5535 else: 5536 this = ( 5537 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5538 or self._parse_id_var() 5539 ) 5540 5541 self._match(TokenType.COLON) 5542 5543 if ( 5544 type_required 5545 and not isinstance(this, exp.DataType) 5546 and not self._match_set(self.TYPE_TOKENS, advance=False) 5547 ): 5548 self._retreat(index) 5549 return self._parse_types() 5550 5551 return self._parse_column_def(this) 5552 5553 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5554 if not self._match_text_seq("AT", "TIME", "ZONE"): 5555 return this 5556 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5557 5558 def _parse_column(self) -> t.Optional[exp.Expression]: 5559 this = self._parse_column_reference() 5560 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5561 5562 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5563 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5564 5565 return column 5566 5567 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5568 this = self._parse_field() 5569 if ( 5570 not this 5571 and self._match(TokenType.VALUES, advance=False) 5572 and self.VALUES_FOLLOWED_BY_PAREN 5573 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5574 ): 5575 this = self._parse_id_var() 5576 5577 if isinstance(this, exp.Identifier): 5578 # We bubble up comments from the Identifier to the Column 5579 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5580 5581 return this 5582 5583 def _parse_colon_as_variant_extract( 5584 self, this: t.Optional[exp.Expression] 5585 ) -> t.Optional[exp.Expression]: 5586 casts = [] 5587 json_path = [] 5588 escape = None 5589 5590 while self._match(TokenType.COLON): 5591 start_index = self._index 5592 5593 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5594 path = self._parse_column_ops( 5595 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5596 ) 5597 5598 # The cast :: operator has a lower precedence than the extraction operator :, so 5599 # we rearrange the AST appropriately to avoid casting the JSON path 5600 while isinstance(path, exp.Cast): 5601 casts.append(path.to) 5602 path = path.this 5603 5604 if casts: 5605 dcolon_offset = next( 5606 i 5607 for i, t in enumerate(self._tokens[start_index:]) 5608 if t.token_type == TokenType.DCOLON 5609 ) 5610 end_token = self._tokens[start_index + dcolon_offset - 1] 5611 else: 5612 end_token = self._prev 5613 5614 if path: 5615 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5616 # it'll roundtrip to a string literal in GET_PATH 5617 if isinstance(path, exp.Identifier) and path.quoted: 5618 escape = True 5619 5620 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5621 5622 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5623 # Databricks transforms it back to the colon/dot notation 5624 if json_path: 5625 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5626 5627 if json_path_expr: 5628 json_path_expr.set("escape", escape) 5629 5630 this = self.expression( 5631 exp.JSONExtract, 5632 this=this, 5633 expression=json_path_expr, 5634 variant_extract=True, 5635 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5636 ) 5637 5638 while casts: 5639 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5640 5641 return this 5642 5643 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5644 return self._parse_types() 5645 5646 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5647 this = self._parse_bracket(this) 5648 5649 while self._match_set(self.COLUMN_OPERATORS): 5650 op_token = self._prev.token_type 5651 op = self.COLUMN_OPERATORS.get(op_token) 5652 5653 if op_token in self.CAST_COLUMN_OPERATORS: 5654 field = self._parse_dcolon() 5655 if not field: 5656 self.raise_error("Expected type") 5657 elif op and self._curr: 5658 field = self._parse_column_reference() or self._parse_bracket() 5659 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5660 field = self._parse_column_ops(field) 5661 else: 5662 field = self._parse_field(any_token=True, anonymous_func=True) 5663 5664 # Function calls can be qualified, e.g., x.y.FOO() 5665 # This converts the final AST to a series of Dots leading to the function call 5666 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5667 if isinstance(field, (exp.Func, exp.Window)) and this: 5668 this = this.transform( 5669 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5670 ) 5671 5672 if op: 5673 this = op(self, this, field) 5674 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5675 this = self.expression( 5676 exp.Column, 5677 comments=this.comments, 5678 this=field, 5679 table=this.this, 5680 db=this.args.get("table"), 5681 catalog=this.args.get("db"), 5682 ) 5683 elif isinstance(field, exp.Window): 5684 # Move the exp.Dot's to the window's function 5685 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5686 field.set("this", window_func) 5687 this = field 5688 else: 5689 this = self.expression(exp.Dot, this=this, expression=field) 5690 5691 if field and field.comments: 5692 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5693 5694 this = self._parse_bracket(this) 5695 5696 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5697 5698 def _parse_paren(self) -> t.Optional[exp.Expression]: 5699 if not self._match(TokenType.L_PAREN): 5700 return None 5701 5702 comments = self._prev_comments 5703 query = self._parse_select() 5704 5705 if query: 5706 expressions = [query] 5707 else: 5708 expressions = self._parse_expressions() 5709 5710 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5711 5712 if not this and self._match(TokenType.R_PAREN, advance=False): 5713 this = self.expression(exp.Tuple) 5714 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5715 this = self._parse_subquery(this=this, parse_alias=False) 5716 elif isinstance(this, exp.Subquery): 5717 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5718 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5719 this = self.expression(exp.Tuple, expressions=expressions) 5720 else: 5721 this = self.expression(exp.Paren, this=this) 5722 5723 if this: 5724 this.add_comments(comments) 5725 5726 self._match_r_paren(expression=this) 5727 return this 5728 5729 def _parse_primary(self) -> t.Optional[exp.Expression]: 5730 if self._match_set(self.PRIMARY_PARSERS): 5731 token_type = self._prev.token_type 5732 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5733 5734 if token_type == TokenType.STRING: 5735 expressions = [primary] 5736 while self._match(TokenType.STRING): 5737 expressions.append(exp.Literal.string(self._prev.text)) 5738 5739 if len(expressions) > 1: 5740 return self.expression(exp.Concat, expressions=expressions) 5741 5742 return primary 5743 5744 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5745 return exp.Literal.number(f"0.{self._prev.text}") 5746 5747 return self._parse_paren() 5748 5749 def _parse_field( 5750 self, 5751 any_token: bool = False, 5752 tokens: t.Optional[t.Collection[TokenType]] = None, 5753 anonymous_func: bool = False, 5754 ) -> t.Optional[exp.Expression]: 5755 if anonymous_func: 5756 field = ( 5757 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5758 or self._parse_primary() 5759 ) 5760 else: 5761 field = self._parse_primary() or self._parse_function( 5762 anonymous=anonymous_func, any_token=any_token 5763 ) 5764 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5765 5766 def _parse_function( 5767 self, 5768 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5769 anonymous: bool = False, 5770 optional_parens: bool = True, 5771 any_token: bool = False, 5772 ) -> t.Optional[exp.Expression]: 5773 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5774 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5775 fn_syntax = False 5776 if ( 5777 self._match(TokenType.L_BRACE, advance=False) 5778 and self._next 5779 and self._next.text.upper() == "FN" 5780 ): 5781 self._advance(2) 5782 fn_syntax = True 5783 5784 func = self._parse_function_call( 5785 functions=functions, 5786 anonymous=anonymous, 5787 optional_parens=optional_parens, 5788 any_token=any_token, 5789 ) 5790 5791 if fn_syntax: 5792 self._match(TokenType.R_BRACE) 5793 5794 return func 5795 5796 def _parse_function_call( 5797 self, 5798 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5799 anonymous: bool = False, 5800 optional_parens: bool = True, 5801 any_token: bool = False, 5802 ) -> t.Optional[exp.Expression]: 5803 if not self._curr: 5804 return None 5805 5806 comments = self._curr.comments 5807 prev = self._prev 5808 token = self._curr 5809 token_type = self._curr.token_type 5810 this = self._curr.text 5811 upper = this.upper() 5812 5813 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5814 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5815 self._advance() 5816 return self._parse_window(parser(self)) 5817 5818 if not self._next or self._next.token_type != TokenType.L_PAREN: 5819 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5820 self._advance() 5821 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5822 5823 return None 5824 5825 if any_token: 5826 if token_type in self.RESERVED_TOKENS: 5827 return None 5828 elif token_type not in self.FUNC_TOKENS: 5829 return None 5830 5831 self._advance(2) 5832 5833 parser = self.FUNCTION_PARSERS.get(upper) 5834 if parser and not anonymous: 5835 this = parser(self) 5836 else: 5837 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5838 5839 if subquery_predicate: 5840 expr = None 5841 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5842 expr = self._parse_select() 5843 self._match_r_paren() 5844 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5845 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5846 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5847 self._advance(-1) 5848 expr = self._parse_bitwise() 5849 5850 if expr: 5851 return self.expression(subquery_predicate, comments=comments, this=expr) 5852 5853 if functions is None: 5854 functions = self.FUNCTIONS 5855 5856 function = functions.get(upper) 5857 known_function = function and not anonymous 5858 5859 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5860 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5861 5862 post_func_comments = self._curr and self._curr.comments 5863 if known_function and post_func_comments: 5864 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5865 # call we'll construct it as exp.Anonymous, even if it's "known" 5866 if any( 5867 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5868 for comment in post_func_comments 5869 ): 5870 known_function = False 5871 5872 if alias and known_function: 5873 args = self._kv_to_prop_eq(args) 5874 5875 if known_function: 5876 func_builder = t.cast(t.Callable, function) 5877 5878 if "dialect" in func_builder.__code__.co_varnames: 5879 func = func_builder(args, dialect=self.dialect) 5880 else: 5881 func = func_builder(args) 5882 5883 func = self.validate_expression(func, args) 5884 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5885 func.meta["name"] = this 5886 5887 this = func 5888 else: 5889 if token_type == TokenType.IDENTIFIER: 5890 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5891 5892 this = self.expression(exp.Anonymous, this=this, expressions=args) 5893 this = this.update_positions(token) 5894 5895 if isinstance(this, exp.Expression): 5896 this.add_comments(comments) 5897 5898 self._match_r_paren(this) 5899 return self._parse_window(this) 5900 5901 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5902 return expression 5903 5904 def _kv_to_prop_eq( 5905 self, expressions: t.List[exp.Expression], parse_map: bool = False 5906 ) -> t.List[exp.Expression]: 5907 transformed = [] 5908 5909 for index, e in enumerate(expressions): 5910 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5911 if isinstance(e, exp.Alias): 5912 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5913 5914 if not isinstance(e, exp.PropertyEQ): 5915 e = self.expression( 5916 exp.PropertyEQ, 5917 this=e.this if parse_map else exp.to_identifier(e.this.name), 5918 expression=e.expression, 5919 ) 5920 5921 if isinstance(e.this, exp.Column): 5922 e.this.replace(e.this.this) 5923 else: 5924 e = self._to_prop_eq(e, index) 5925 5926 transformed.append(e) 5927 5928 return transformed 5929 5930 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5931 return self._parse_statement() 5932 5933 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5934 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5935 5936 def _parse_user_defined_function( 5937 self, kind: t.Optional[TokenType] = None 5938 ) -> t.Optional[exp.Expression]: 5939 this = self._parse_table_parts(schema=True) 5940 5941 if not self._match(TokenType.L_PAREN): 5942 return this 5943 5944 expressions = self._parse_csv(self._parse_function_parameter) 5945 self._match_r_paren() 5946 return self.expression( 5947 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5948 ) 5949 5950 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5951 literal = self._parse_primary() 5952 if literal: 5953 return self.expression(exp.Introducer, this=token.text, expression=literal) 5954 5955 return self._identifier_expression(token) 5956 5957 def _parse_session_parameter(self) -> exp.SessionParameter: 5958 kind = None 5959 this = self._parse_id_var() or self._parse_primary() 5960 5961 if this and self._match(TokenType.DOT): 5962 kind = this.name 5963 this = self._parse_var() or self._parse_primary() 5964 5965 return self.expression(exp.SessionParameter, this=this, kind=kind) 5966 5967 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5968 return self._parse_id_var() 5969 5970 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5971 index = self._index 5972 5973 if self._match(TokenType.L_PAREN): 5974 expressions = t.cast( 5975 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5976 ) 5977 5978 if not self._match(TokenType.R_PAREN): 5979 self._retreat(index) 5980 else: 5981 expressions = [self._parse_lambda_arg()] 5982 5983 if self._match_set(self.LAMBDAS): 5984 return self.LAMBDAS[self._prev.token_type](self, expressions) 5985 5986 self._retreat(index) 5987 5988 this: t.Optional[exp.Expression] 5989 5990 if self._match(TokenType.DISTINCT): 5991 this = self.expression( 5992 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5993 ) 5994 else: 5995 this = self._parse_select_or_expression(alias=alias) 5996 5997 return self._parse_limit( 5998 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5999 ) 6000 6001 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6002 index = self._index 6003 if not self._match(TokenType.L_PAREN): 6004 return this 6005 6006 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6007 # expr can be of both types 6008 if self._match_set(self.SELECT_START_TOKENS): 6009 self._retreat(index) 6010 return this 6011 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6012 self._match_r_paren() 6013 return self.expression(exp.Schema, this=this, expressions=args) 6014 6015 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6016 return self._parse_column_def(self._parse_field(any_token=True)) 6017 6018 def _parse_column_def( 6019 self, this: t.Optional[exp.Expression], computed_column: bool = True 6020 ) -> t.Optional[exp.Expression]: 6021 # column defs are not really columns, they're identifiers 6022 if isinstance(this, exp.Column): 6023 this = this.this 6024 6025 if not computed_column: 6026 self._match(TokenType.ALIAS) 6027 6028 kind = self._parse_types(schema=True) 6029 6030 if self._match_text_seq("FOR", "ORDINALITY"): 6031 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6032 6033 constraints: t.List[exp.Expression] = [] 6034 6035 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6036 ("ALIAS", "MATERIALIZED") 6037 ): 6038 persisted = self._prev.text.upper() == "MATERIALIZED" 6039 constraint_kind = exp.ComputedColumnConstraint( 6040 this=self._parse_assignment(), 6041 persisted=persisted or self._match_text_seq("PERSISTED"), 6042 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6043 ) 6044 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6045 elif ( 6046 kind 6047 and self._match(TokenType.ALIAS, advance=False) 6048 and ( 6049 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6050 or (self._next and self._next.token_type == TokenType.L_PAREN) 6051 ) 6052 ): 6053 self._advance() 6054 constraints.append( 6055 self.expression( 6056 exp.ColumnConstraint, 6057 kind=exp.ComputedColumnConstraint( 6058 this=self._parse_disjunction(), 6059 persisted=self._match_texts(("STORED", "VIRTUAL")) 6060 and self._prev.text.upper() == "STORED", 6061 ), 6062 ) 6063 ) 6064 6065 while True: 6066 constraint = self._parse_column_constraint() 6067 if not constraint: 6068 break 6069 constraints.append(constraint) 6070 6071 if not kind and not constraints: 6072 return this 6073 6074 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6075 6076 def _parse_auto_increment( 6077 self, 6078 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6079 start = None 6080 increment = None 6081 order = None 6082 6083 if self._match(TokenType.L_PAREN, advance=False): 6084 args = self._parse_wrapped_csv(self._parse_bitwise) 6085 start = seq_get(args, 0) 6086 increment = seq_get(args, 1) 6087 elif self._match_text_seq("START"): 6088 start = self._parse_bitwise() 6089 self._match_text_seq("INCREMENT") 6090 increment = self._parse_bitwise() 6091 if self._match_text_seq("ORDER"): 6092 order = True 6093 elif self._match_text_seq("NOORDER"): 6094 order = False 6095 6096 if start and increment: 6097 return exp.GeneratedAsIdentityColumnConstraint( 6098 start=start, increment=increment, this=False, order=order 6099 ) 6100 6101 return exp.AutoIncrementColumnConstraint() 6102 6103 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6104 if not self._match_text_seq("REFRESH"): 6105 self._retreat(self._index - 1) 6106 return None 6107 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6108 6109 def _parse_compress(self) -> exp.CompressColumnConstraint: 6110 if self._match(TokenType.L_PAREN, advance=False): 6111 return self.expression( 6112 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6113 ) 6114 6115 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6116 6117 def _parse_generated_as_identity( 6118 self, 6119 ) -> ( 6120 exp.GeneratedAsIdentityColumnConstraint 6121 | exp.ComputedColumnConstraint 6122 | exp.GeneratedAsRowColumnConstraint 6123 ): 6124 if self._match_text_seq("BY", "DEFAULT"): 6125 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6126 this = self.expression( 6127 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6128 ) 6129 else: 6130 self._match_text_seq("ALWAYS") 6131 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6132 6133 self._match(TokenType.ALIAS) 6134 6135 if self._match_text_seq("ROW"): 6136 start = self._match_text_seq("START") 6137 if not start: 6138 self._match(TokenType.END) 6139 hidden = self._match_text_seq("HIDDEN") 6140 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6141 6142 identity = self._match_text_seq("IDENTITY") 6143 6144 if self._match(TokenType.L_PAREN): 6145 if self._match(TokenType.START_WITH): 6146 this.set("start", self._parse_bitwise()) 6147 if self._match_text_seq("INCREMENT", "BY"): 6148 this.set("increment", self._parse_bitwise()) 6149 if self._match_text_seq("MINVALUE"): 6150 this.set("minvalue", self._parse_bitwise()) 6151 if self._match_text_seq("MAXVALUE"): 6152 this.set("maxvalue", self._parse_bitwise()) 6153 6154 if self._match_text_seq("CYCLE"): 6155 this.set("cycle", True) 6156 elif self._match_text_seq("NO", "CYCLE"): 6157 this.set("cycle", False) 6158 6159 if not identity: 6160 this.set("expression", self._parse_range()) 6161 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6162 args = self._parse_csv(self._parse_bitwise) 6163 this.set("start", seq_get(args, 0)) 6164 this.set("increment", seq_get(args, 1)) 6165 6166 self._match_r_paren() 6167 6168 return this 6169 6170 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6171 self._match_text_seq("LENGTH") 6172 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6173 6174 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6175 if self._match_text_seq("NULL"): 6176 return self.expression(exp.NotNullColumnConstraint) 6177 if self._match_text_seq("CASESPECIFIC"): 6178 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6179 if self._match_text_seq("FOR", "REPLICATION"): 6180 return self.expression(exp.NotForReplicationColumnConstraint) 6181 6182 # Unconsume the `NOT` token 6183 self._retreat(self._index - 1) 6184 return None 6185 6186 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6187 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6188 6189 procedure_option_follows = ( 6190 self._match(TokenType.WITH, advance=False) 6191 and self._next 6192 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6193 ) 6194 6195 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6196 return self.expression( 6197 exp.ColumnConstraint, 6198 this=this, 6199 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6200 ) 6201 6202 return this 6203 6204 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6205 if not self._match(TokenType.CONSTRAINT): 6206 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6207 6208 return self.expression( 6209 exp.Constraint, 6210 this=self._parse_id_var(), 6211 expressions=self._parse_unnamed_constraints(), 6212 ) 6213 6214 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6215 constraints = [] 6216 while True: 6217 constraint = self._parse_unnamed_constraint() or self._parse_function() 6218 if not constraint: 6219 break 6220 constraints.append(constraint) 6221 6222 return constraints 6223 6224 def _parse_unnamed_constraint( 6225 self, constraints: t.Optional[t.Collection[str]] = None 6226 ) -> t.Optional[exp.Expression]: 6227 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6228 constraints or self.CONSTRAINT_PARSERS 6229 ): 6230 return None 6231 6232 constraint = self._prev.text.upper() 6233 if constraint not in self.CONSTRAINT_PARSERS: 6234 self.raise_error(f"No parser found for schema constraint {constraint}.") 6235 6236 return self.CONSTRAINT_PARSERS[constraint](self) 6237 6238 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6239 return self._parse_id_var(any_token=False) 6240 6241 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6242 self._match_texts(("KEY", "INDEX")) 6243 return self.expression( 6244 exp.UniqueColumnConstraint, 6245 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6246 this=self._parse_schema(self._parse_unique_key()), 6247 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6248 on_conflict=self._parse_on_conflict(), 6249 options=self._parse_key_constraint_options(), 6250 ) 6251 6252 def _parse_key_constraint_options(self) -> t.List[str]: 6253 options = [] 6254 while True: 6255 if not self._curr: 6256 break 6257 6258 if self._match(TokenType.ON): 6259 action = None 6260 on = self._advance_any() and self._prev.text 6261 6262 if self._match_text_seq("NO", "ACTION"): 6263 action = "NO ACTION" 6264 elif self._match_text_seq("CASCADE"): 6265 action = "CASCADE" 6266 elif self._match_text_seq("RESTRICT"): 6267 action = "RESTRICT" 6268 elif self._match_pair(TokenType.SET, TokenType.NULL): 6269 action = "SET NULL" 6270 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6271 action = "SET DEFAULT" 6272 else: 6273 self.raise_error("Invalid key constraint") 6274 6275 options.append(f"ON {on} {action}") 6276 else: 6277 var = self._parse_var_from_options( 6278 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6279 ) 6280 if not var: 6281 break 6282 options.append(var.name) 6283 6284 return options 6285 6286 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6287 if match and not self._match(TokenType.REFERENCES): 6288 return None 6289 6290 expressions = None 6291 this = self._parse_table(schema=True) 6292 options = self._parse_key_constraint_options() 6293 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6294 6295 def _parse_foreign_key(self) -> exp.ForeignKey: 6296 expressions = ( 6297 self._parse_wrapped_id_vars() 6298 if not self._match(TokenType.REFERENCES, advance=False) 6299 else None 6300 ) 6301 reference = self._parse_references() 6302 on_options = {} 6303 6304 while self._match(TokenType.ON): 6305 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6306 self.raise_error("Expected DELETE or UPDATE") 6307 6308 kind = self._prev.text.lower() 6309 6310 if self._match_text_seq("NO", "ACTION"): 6311 action = "NO ACTION" 6312 elif self._match(TokenType.SET): 6313 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6314 action = "SET " + self._prev.text.upper() 6315 else: 6316 self._advance() 6317 action = self._prev.text.upper() 6318 6319 on_options[kind] = action 6320 6321 return self.expression( 6322 exp.ForeignKey, 6323 expressions=expressions, 6324 reference=reference, 6325 options=self._parse_key_constraint_options(), 6326 **on_options, # type: ignore 6327 ) 6328 6329 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6330 return self._parse_ordered() or self._parse_field() 6331 6332 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6333 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6334 self._retreat(self._index - 1) 6335 return None 6336 6337 id_vars = self._parse_wrapped_id_vars() 6338 return self.expression( 6339 exp.PeriodForSystemTimeConstraint, 6340 this=seq_get(id_vars, 0), 6341 expression=seq_get(id_vars, 1), 6342 ) 6343 6344 def _parse_primary_key( 6345 self, wrapped_optional: bool = False, in_props: bool = False 6346 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6347 desc = ( 6348 self._match_set((TokenType.ASC, TokenType.DESC)) 6349 and self._prev.token_type == TokenType.DESC 6350 ) 6351 6352 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6353 return self.expression( 6354 exp.PrimaryKeyColumnConstraint, 6355 desc=desc, 6356 options=self._parse_key_constraint_options(), 6357 ) 6358 6359 expressions = self._parse_wrapped_csv( 6360 self._parse_primary_key_part, optional=wrapped_optional 6361 ) 6362 6363 return self.expression( 6364 exp.PrimaryKey, 6365 expressions=expressions, 6366 include=self._parse_index_params(), 6367 options=self._parse_key_constraint_options(), 6368 ) 6369 6370 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6371 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6372 6373 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6374 """ 6375 Parses a datetime column in ODBC format. We parse the column into the corresponding 6376 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6377 same as we did for `DATE('yyyy-mm-dd')`. 6378 6379 Reference: 6380 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6381 """ 6382 self._match(TokenType.VAR) 6383 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6384 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6385 if not self._match(TokenType.R_BRACE): 6386 self.raise_error("Expected }") 6387 return expression 6388 6389 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6390 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6391 return this 6392 6393 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6394 map_token = seq_get(self._tokens, self._index - 2) 6395 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6396 else: 6397 parse_map = False 6398 6399 bracket_kind = self._prev.token_type 6400 if ( 6401 bracket_kind == TokenType.L_BRACE 6402 and self._curr 6403 and self._curr.token_type == TokenType.VAR 6404 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6405 ): 6406 return self._parse_odbc_datetime_literal() 6407 6408 expressions = self._parse_csv( 6409 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6410 ) 6411 6412 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6413 self.raise_error("Expected ]") 6414 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6415 self.raise_error("Expected }") 6416 6417 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6418 if bracket_kind == TokenType.L_BRACE: 6419 this = self.expression( 6420 exp.Struct, 6421 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6422 ) 6423 elif not this: 6424 this = build_array_constructor( 6425 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6426 ) 6427 else: 6428 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6429 if constructor_type: 6430 return build_array_constructor( 6431 constructor_type, 6432 args=expressions, 6433 bracket_kind=bracket_kind, 6434 dialect=self.dialect, 6435 ) 6436 6437 expressions = apply_index_offset( 6438 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6439 ) 6440 this = self.expression( 6441 exp.Bracket, 6442 this=this, 6443 expressions=expressions, 6444 comments=this.pop_comments(), 6445 ) 6446 6447 self._add_comments(this) 6448 return self._parse_bracket(this) 6449 6450 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6451 if self._match(TokenType.COLON): 6452 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6453 return this 6454 6455 def _parse_case(self) -> t.Optional[exp.Expression]: 6456 ifs = [] 6457 default = None 6458 6459 comments = self._prev_comments 6460 expression = self._parse_assignment() 6461 6462 while self._match(TokenType.WHEN): 6463 this = self._parse_assignment() 6464 self._match(TokenType.THEN) 6465 then = self._parse_assignment() 6466 ifs.append(self.expression(exp.If, this=this, true=then)) 6467 6468 if self._match(TokenType.ELSE): 6469 default = self._parse_assignment() 6470 6471 if not self._match(TokenType.END): 6472 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6473 default = exp.column("interval") 6474 else: 6475 self.raise_error("Expected END after CASE", self._prev) 6476 6477 return self.expression( 6478 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6479 ) 6480 6481 def _parse_if(self) -> t.Optional[exp.Expression]: 6482 if self._match(TokenType.L_PAREN): 6483 args = self._parse_csv( 6484 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6485 ) 6486 this = self.validate_expression(exp.If.from_arg_list(args), args) 6487 self._match_r_paren() 6488 else: 6489 index = self._index - 1 6490 6491 if self.NO_PAREN_IF_COMMANDS and index == 0: 6492 return self._parse_as_command(self._prev) 6493 6494 condition = self._parse_assignment() 6495 6496 if not condition: 6497 self._retreat(index) 6498 return None 6499 6500 self._match(TokenType.THEN) 6501 true = self._parse_assignment() 6502 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6503 self._match(TokenType.END) 6504 this = self.expression(exp.If, this=condition, true=true, false=false) 6505 6506 return this 6507 6508 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6509 if not self._match_text_seq("VALUE", "FOR"): 6510 self._retreat(self._index - 1) 6511 return None 6512 6513 return self.expression( 6514 exp.NextValueFor, 6515 this=self._parse_column(), 6516 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6517 ) 6518 6519 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6520 this = self._parse_function() or self._parse_var_or_string(upper=True) 6521 6522 if self._match(TokenType.FROM): 6523 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6524 6525 if not self._match(TokenType.COMMA): 6526 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6527 6528 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6529 6530 def _parse_gap_fill(self) -> exp.GapFill: 6531 self._match(TokenType.TABLE) 6532 this = self._parse_table() 6533 6534 self._match(TokenType.COMMA) 6535 args = [this, *self._parse_csv(self._parse_lambda)] 6536 6537 gap_fill = exp.GapFill.from_arg_list(args) 6538 return self.validate_expression(gap_fill, args) 6539 6540 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6541 this = self._parse_assignment() 6542 6543 if not self._match(TokenType.ALIAS): 6544 if self._match(TokenType.COMMA): 6545 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6546 6547 self.raise_error("Expected AS after CAST") 6548 6549 fmt = None 6550 to = self._parse_types() 6551 6552 default = self._match(TokenType.DEFAULT) 6553 if default: 6554 default = self._parse_bitwise() 6555 self._match_text_seq("ON", "CONVERSION", "ERROR") 6556 6557 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6558 fmt_string = self._parse_string() 6559 fmt = self._parse_at_time_zone(fmt_string) 6560 6561 if not to: 6562 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6563 if to.this in exp.DataType.TEMPORAL_TYPES: 6564 this = self.expression( 6565 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6566 this=this, 6567 format=exp.Literal.string( 6568 format_time( 6569 fmt_string.this if fmt_string else "", 6570 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6571 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6572 ) 6573 ), 6574 safe=safe, 6575 ) 6576 6577 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6578 this.set("zone", fmt.args["zone"]) 6579 return this 6580 elif not to: 6581 self.raise_error("Expected TYPE after CAST") 6582 elif isinstance(to, exp.Identifier): 6583 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6584 elif to.this == exp.DataType.Type.CHAR: 6585 if self._match(TokenType.CHARACTER_SET): 6586 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6587 6588 return self.build_cast( 6589 strict=strict, 6590 this=this, 6591 to=to, 6592 format=fmt, 6593 safe=safe, 6594 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6595 default=default, 6596 ) 6597 6598 def _parse_string_agg(self) -> exp.GroupConcat: 6599 if self._match(TokenType.DISTINCT): 6600 args: t.List[t.Optional[exp.Expression]] = [ 6601 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6602 ] 6603 if self._match(TokenType.COMMA): 6604 args.extend(self._parse_csv(self._parse_assignment)) 6605 else: 6606 args = self._parse_csv(self._parse_assignment) # type: ignore 6607 6608 if self._match_text_seq("ON", "OVERFLOW"): 6609 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6610 if self._match_text_seq("ERROR"): 6611 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6612 else: 6613 self._match_text_seq("TRUNCATE") 6614 on_overflow = self.expression( 6615 exp.OverflowTruncateBehavior, 6616 this=self._parse_string(), 6617 with_count=( 6618 self._match_text_seq("WITH", "COUNT") 6619 or not self._match_text_seq("WITHOUT", "COUNT") 6620 ), 6621 ) 6622 else: 6623 on_overflow = None 6624 6625 index = self._index 6626 if not self._match(TokenType.R_PAREN) and args: 6627 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6628 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6629 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6630 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6631 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6632 6633 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6634 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6635 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6636 if not self._match_text_seq("WITHIN", "GROUP"): 6637 self._retreat(index) 6638 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6639 6640 # The corresponding match_r_paren will be called in parse_function (caller) 6641 self._match_l_paren() 6642 6643 return self.expression( 6644 exp.GroupConcat, 6645 this=self._parse_order(this=seq_get(args, 0)), 6646 separator=seq_get(args, 1), 6647 on_overflow=on_overflow, 6648 ) 6649 6650 def _parse_convert( 6651 self, strict: bool, safe: t.Optional[bool] = None 6652 ) -> t.Optional[exp.Expression]: 6653 this = self._parse_bitwise() 6654 6655 if self._match(TokenType.USING): 6656 to: t.Optional[exp.Expression] = self.expression( 6657 exp.CharacterSet, this=self._parse_var() 6658 ) 6659 elif self._match(TokenType.COMMA): 6660 to = self._parse_types() 6661 else: 6662 to = None 6663 6664 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6665 6666 def _parse_xml_table(self) -> exp.XMLTable: 6667 namespaces = None 6668 passing = None 6669 columns = None 6670 6671 if self._match_text_seq("XMLNAMESPACES", "("): 6672 namespaces = self._parse_xml_namespace() 6673 self._match_text_seq(")", ",") 6674 6675 this = self._parse_string() 6676 6677 if self._match_text_seq("PASSING"): 6678 # The BY VALUE keywords are optional and are provided for semantic clarity 6679 self._match_text_seq("BY", "VALUE") 6680 passing = self._parse_csv(self._parse_column) 6681 6682 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6683 6684 if self._match_text_seq("COLUMNS"): 6685 columns = self._parse_csv(self._parse_field_def) 6686 6687 return self.expression( 6688 exp.XMLTable, 6689 this=this, 6690 namespaces=namespaces, 6691 passing=passing, 6692 columns=columns, 6693 by_ref=by_ref, 6694 ) 6695 6696 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6697 namespaces = [] 6698 6699 while True: 6700 if self._match(TokenType.DEFAULT): 6701 uri = self._parse_string() 6702 else: 6703 uri = self._parse_alias(self._parse_string()) 6704 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6705 if not self._match(TokenType.COMMA): 6706 break 6707 6708 return namespaces 6709 6710 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6711 args = self._parse_csv(self._parse_assignment) 6712 6713 if len(args) < 3: 6714 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6715 6716 return self.expression(exp.DecodeCase, expressions=args) 6717 6718 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6719 self._match_text_seq("KEY") 6720 key = self._parse_column() 6721 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6722 self._match_text_seq("VALUE") 6723 value = self._parse_bitwise() 6724 6725 if not key and not value: 6726 return None 6727 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6728 6729 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6730 if not this or not self._match_text_seq("FORMAT", "JSON"): 6731 return this 6732 6733 return self.expression(exp.FormatJson, this=this) 6734 6735 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6736 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6737 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6738 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6739 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6740 else: 6741 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6742 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6743 6744 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6745 6746 if not empty and not error and not null: 6747 return None 6748 6749 return self.expression( 6750 exp.OnCondition, 6751 empty=empty, 6752 error=error, 6753 null=null, 6754 ) 6755 6756 def _parse_on_handling( 6757 self, on: str, *values: str 6758 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6759 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6760 for value in values: 6761 if self._match_text_seq(value, "ON", on): 6762 return f"{value} ON {on}" 6763 6764 index = self._index 6765 if self._match(TokenType.DEFAULT): 6766 default_value = self._parse_bitwise() 6767 if self._match_text_seq("ON", on): 6768 return default_value 6769 6770 self._retreat(index) 6771 6772 return None 6773 6774 @t.overload 6775 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6776 6777 @t.overload 6778 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6779 6780 def _parse_json_object(self, agg=False): 6781 star = self._parse_star() 6782 expressions = ( 6783 [star] 6784 if star 6785 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6786 ) 6787 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6788 6789 unique_keys = None 6790 if self._match_text_seq("WITH", "UNIQUE"): 6791 unique_keys = True 6792 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6793 unique_keys = False 6794 6795 self._match_text_seq("KEYS") 6796 6797 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6798 self._parse_type() 6799 ) 6800 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6801 6802 return self.expression( 6803 exp.JSONObjectAgg if agg else exp.JSONObject, 6804 expressions=expressions, 6805 null_handling=null_handling, 6806 unique_keys=unique_keys, 6807 return_type=return_type, 6808 encoding=encoding, 6809 ) 6810 6811 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6812 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6813 if not self._match_text_seq("NESTED"): 6814 this = self._parse_id_var() 6815 kind = self._parse_types(allow_identifiers=False) 6816 nested = None 6817 else: 6818 this = None 6819 kind = None 6820 nested = True 6821 6822 path = self._match_text_seq("PATH") and self._parse_string() 6823 nested_schema = nested and self._parse_json_schema() 6824 6825 return self.expression( 6826 exp.JSONColumnDef, 6827 this=this, 6828 kind=kind, 6829 path=path, 6830 nested_schema=nested_schema, 6831 ) 6832 6833 def _parse_json_schema(self) -> exp.JSONSchema: 6834 self._match_text_seq("COLUMNS") 6835 return self.expression( 6836 exp.JSONSchema, 6837 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6838 ) 6839 6840 def _parse_json_table(self) -> exp.JSONTable: 6841 this = self._parse_format_json(self._parse_bitwise()) 6842 path = self._match(TokenType.COMMA) and self._parse_string() 6843 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6844 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6845 schema = self._parse_json_schema() 6846 6847 return exp.JSONTable( 6848 this=this, 6849 schema=schema, 6850 path=path, 6851 error_handling=error_handling, 6852 empty_handling=empty_handling, 6853 ) 6854 6855 def _parse_match_against(self) -> exp.MatchAgainst: 6856 expressions = self._parse_csv(self._parse_column) 6857 6858 self._match_text_seq(")", "AGAINST", "(") 6859 6860 this = self._parse_string() 6861 6862 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6863 modifier = "IN NATURAL LANGUAGE MODE" 6864 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6865 modifier = f"{modifier} WITH QUERY EXPANSION" 6866 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6867 modifier = "IN BOOLEAN MODE" 6868 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6869 modifier = "WITH QUERY EXPANSION" 6870 else: 6871 modifier = None 6872 6873 return self.expression( 6874 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6875 ) 6876 6877 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6878 def _parse_open_json(self) -> exp.OpenJSON: 6879 this = self._parse_bitwise() 6880 path = self._match(TokenType.COMMA) and self._parse_string() 6881 6882 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6883 this = self._parse_field(any_token=True) 6884 kind = self._parse_types() 6885 path = self._parse_string() 6886 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6887 6888 return self.expression( 6889 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6890 ) 6891 6892 expressions = None 6893 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6894 self._match_l_paren() 6895 expressions = self._parse_csv(_parse_open_json_column_def) 6896 6897 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6898 6899 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6900 args = self._parse_csv(self._parse_bitwise) 6901 6902 if self._match(TokenType.IN): 6903 return self.expression( 6904 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6905 ) 6906 6907 if haystack_first: 6908 haystack = seq_get(args, 0) 6909 needle = seq_get(args, 1) 6910 else: 6911 haystack = seq_get(args, 1) 6912 needle = seq_get(args, 0) 6913 6914 return self.expression( 6915 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6916 ) 6917 6918 def _parse_predict(self) -> exp.Predict: 6919 self._match_text_seq("MODEL") 6920 this = self._parse_table() 6921 6922 self._match(TokenType.COMMA) 6923 self._match_text_seq("TABLE") 6924 6925 return self.expression( 6926 exp.Predict, 6927 this=this, 6928 expression=self._parse_table(), 6929 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6930 ) 6931 6932 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6933 args = self._parse_csv(self._parse_table) 6934 return exp.JoinHint(this=func_name.upper(), expressions=args) 6935 6936 def _parse_substring(self) -> exp.Substring: 6937 # Postgres supports the form: substring(string [from int] [for int]) 6938 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6939 6940 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6941 6942 if self._match(TokenType.FROM): 6943 args.append(self._parse_bitwise()) 6944 if self._match(TokenType.FOR): 6945 if len(args) == 1: 6946 args.append(exp.Literal.number(1)) 6947 args.append(self._parse_bitwise()) 6948 6949 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6950 6951 def _parse_trim(self) -> exp.Trim: 6952 # https://www.w3resource.com/sql/character-functions/trim.php 6953 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6954 6955 position = None 6956 collation = None 6957 expression = None 6958 6959 if self._match_texts(self.TRIM_TYPES): 6960 position = self._prev.text.upper() 6961 6962 this = self._parse_bitwise() 6963 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6964 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6965 expression = self._parse_bitwise() 6966 6967 if invert_order: 6968 this, expression = expression, this 6969 6970 if self._match(TokenType.COLLATE): 6971 collation = self._parse_bitwise() 6972 6973 return self.expression( 6974 exp.Trim, this=this, position=position, expression=expression, collation=collation 6975 ) 6976 6977 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6978 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6979 6980 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6981 return self._parse_window(self._parse_id_var(), alias=True) 6982 6983 def _parse_respect_or_ignore_nulls( 6984 self, this: t.Optional[exp.Expression] 6985 ) -> t.Optional[exp.Expression]: 6986 if self._match_text_seq("IGNORE", "NULLS"): 6987 return self.expression(exp.IgnoreNulls, this=this) 6988 if self._match_text_seq("RESPECT", "NULLS"): 6989 return self.expression(exp.RespectNulls, this=this) 6990 return this 6991 6992 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6993 if self._match(TokenType.HAVING): 6994 self._match_texts(("MAX", "MIN")) 6995 max = self._prev.text.upper() != "MIN" 6996 return self.expression( 6997 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6998 ) 6999 7000 return this 7001 7002 def _parse_window( 7003 self, this: t.Optional[exp.Expression], alias: bool = False 7004 ) -> t.Optional[exp.Expression]: 7005 func = this 7006 comments = func.comments if isinstance(func, exp.Expression) else None 7007 7008 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7009 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7010 if self._match_text_seq("WITHIN", "GROUP"): 7011 order = self._parse_wrapped(self._parse_order) 7012 this = self.expression(exp.WithinGroup, this=this, expression=order) 7013 7014 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7015 self._match(TokenType.WHERE) 7016 this = self.expression( 7017 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7018 ) 7019 self._match_r_paren() 7020 7021 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7022 # Some dialects choose to implement and some do not. 7023 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7024 7025 # There is some code above in _parse_lambda that handles 7026 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7027 7028 # The below changes handle 7029 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7030 7031 # Oracle allows both formats 7032 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7033 # and Snowflake chose to do the same for familiarity 7034 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7035 if isinstance(this, exp.AggFunc): 7036 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7037 7038 if ignore_respect and ignore_respect is not this: 7039 ignore_respect.replace(ignore_respect.this) 7040 this = self.expression(ignore_respect.__class__, this=this) 7041 7042 this = self._parse_respect_or_ignore_nulls(this) 7043 7044 # bigquery select from window x AS (partition by ...) 7045 if alias: 7046 over = None 7047 self._match(TokenType.ALIAS) 7048 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7049 return this 7050 else: 7051 over = self._prev.text.upper() 7052 7053 if comments and isinstance(func, exp.Expression): 7054 func.pop_comments() 7055 7056 if not self._match(TokenType.L_PAREN): 7057 return self.expression( 7058 exp.Window, 7059 comments=comments, 7060 this=this, 7061 alias=self._parse_id_var(False), 7062 over=over, 7063 ) 7064 7065 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7066 7067 first = self._match(TokenType.FIRST) 7068 if self._match_text_seq("LAST"): 7069 first = False 7070 7071 partition, order = self._parse_partition_and_order() 7072 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7073 7074 if kind: 7075 self._match(TokenType.BETWEEN) 7076 start = self._parse_window_spec() 7077 self._match(TokenType.AND) 7078 end = self._parse_window_spec() 7079 exclude = ( 7080 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7081 if self._match_text_seq("EXCLUDE") 7082 else None 7083 ) 7084 7085 spec = self.expression( 7086 exp.WindowSpec, 7087 kind=kind, 7088 start=start["value"], 7089 start_side=start["side"], 7090 end=end["value"], 7091 end_side=end["side"], 7092 exclude=exclude, 7093 ) 7094 else: 7095 spec = None 7096 7097 self._match_r_paren() 7098 7099 window = self.expression( 7100 exp.Window, 7101 comments=comments, 7102 this=this, 7103 partition_by=partition, 7104 order=order, 7105 spec=spec, 7106 alias=window_alias, 7107 over=over, 7108 first=first, 7109 ) 7110 7111 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7112 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7113 return self._parse_window(window, alias=alias) 7114 7115 return window 7116 7117 def _parse_partition_and_order( 7118 self, 7119 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7120 return self._parse_partition_by(), self._parse_order() 7121 7122 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7123 self._match(TokenType.BETWEEN) 7124 7125 return { 7126 "value": ( 7127 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7128 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7129 or self._parse_bitwise() 7130 ), 7131 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7132 } 7133 7134 def _parse_alias( 7135 self, this: t.Optional[exp.Expression], explicit: bool = False 7136 ) -> t.Optional[exp.Expression]: 7137 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7138 # so this section tries to parse the clause version and if it fails, it treats the token 7139 # as an identifier (alias) 7140 if self._can_parse_limit_or_offset(): 7141 return this 7142 7143 any_token = self._match(TokenType.ALIAS) 7144 comments = self._prev_comments or [] 7145 7146 if explicit and not any_token: 7147 return this 7148 7149 if self._match(TokenType.L_PAREN): 7150 aliases = self.expression( 7151 exp.Aliases, 7152 comments=comments, 7153 this=this, 7154 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7155 ) 7156 self._match_r_paren(aliases) 7157 return aliases 7158 7159 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7160 self.STRING_ALIASES and self._parse_string_as_identifier() 7161 ) 7162 7163 if alias: 7164 comments.extend(alias.pop_comments()) 7165 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7166 column = this.this 7167 7168 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7169 if not this.comments and column and column.comments: 7170 this.comments = column.pop_comments() 7171 7172 return this 7173 7174 def _parse_id_var( 7175 self, 7176 any_token: bool = True, 7177 tokens: t.Optional[t.Collection[TokenType]] = None, 7178 ) -> t.Optional[exp.Expression]: 7179 expression = self._parse_identifier() 7180 if not expression and ( 7181 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7182 ): 7183 quoted = self._prev.token_type == TokenType.STRING 7184 expression = self._identifier_expression(quoted=quoted) 7185 7186 return expression 7187 7188 def _parse_string(self) -> t.Optional[exp.Expression]: 7189 if self._match_set(self.STRING_PARSERS): 7190 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7191 return self._parse_placeholder() 7192 7193 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7194 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7195 if output: 7196 output.update_positions(self._prev) 7197 return output 7198 7199 def _parse_number(self) -> t.Optional[exp.Expression]: 7200 if self._match_set(self.NUMERIC_PARSERS): 7201 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7202 return self._parse_placeholder() 7203 7204 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7205 if self._match(TokenType.IDENTIFIER): 7206 return self._identifier_expression(quoted=True) 7207 return self._parse_placeholder() 7208 7209 def _parse_var( 7210 self, 7211 any_token: bool = False, 7212 tokens: t.Optional[t.Collection[TokenType]] = None, 7213 upper: bool = False, 7214 ) -> t.Optional[exp.Expression]: 7215 if ( 7216 (any_token and self._advance_any()) 7217 or self._match(TokenType.VAR) 7218 or (self._match_set(tokens) if tokens else False) 7219 ): 7220 return self.expression( 7221 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7222 ) 7223 return self._parse_placeholder() 7224 7225 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7226 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7227 self._advance() 7228 return self._prev 7229 return None 7230 7231 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7232 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7233 7234 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7235 return self._parse_primary() or self._parse_var(any_token=True) 7236 7237 def _parse_null(self) -> t.Optional[exp.Expression]: 7238 if self._match_set(self.NULL_TOKENS): 7239 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7240 return self._parse_placeholder() 7241 7242 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7243 if self._match(TokenType.TRUE): 7244 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7245 if self._match(TokenType.FALSE): 7246 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7247 return self._parse_placeholder() 7248 7249 def _parse_star(self) -> t.Optional[exp.Expression]: 7250 if self._match(TokenType.STAR): 7251 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7252 return self._parse_placeholder() 7253 7254 def _parse_parameter(self) -> exp.Parameter: 7255 this = self._parse_identifier() or self._parse_primary_or_var() 7256 return self.expression(exp.Parameter, this=this) 7257 7258 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7259 if self._match_set(self.PLACEHOLDER_PARSERS): 7260 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7261 if placeholder: 7262 return placeholder 7263 self._advance(-1) 7264 return None 7265 7266 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7267 if not self._match_texts(keywords): 7268 return None 7269 if self._match(TokenType.L_PAREN, advance=False): 7270 return self._parse_wrapped_csv(self._parse_expression) 7271 7272 expression = self._parse_expression() 7273 return [expression] if expression else None 7274 7275 def _parse_csv( 7276 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7277 ) -> t.List[exp.Expression]: 7278 parse_result = parse_method() 7279 items = [parse_result] if parse_result is not None else [] 7280 7281 while self._match(sep): 7282 self._add_comments(parse_result) 7283 parse_result = parse_method() 7284 if parse_result is not None: 7285 items.append(parse_result) 7286 7287 return items 7288 7289 def _parse_tokens( 7290 self, parse_method: t.Callable, expressions: t.Dict 7291 ) -> t.Optional[exp.Expression]: 7292 this = parse_method() 7293 7294 while self._match_set(expressions): 7295 this = self.expression( 7296 expressions[self._prev.token_type], 7297 this=this, 7298 comments=self._prev_comments, 7299 expression=parse_method(), 7300 ) 7301 7302 return this 7303 7304 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7305 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7306 7307 def _parse_wrapped_csv( 7308 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7309 ) -> t.List[exp.Expression]: 7310 return self._parse_wrapped( 7311 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7312 ) 7313 7314 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7315 wrapped = self._match(TokenType.L_PAREN) 7316 if not wrapped and not optional: 7317 self.raise_error("Expecting (") 7318 parse_result = parse_method() 7319 if wrapped: 7320 self._match_r_paren() 7321 return parse_result 7322 7323 def _parse_expressions(self) -> t.List[exp.Expression]: 7324 return self._parse_csv(self._parse_expression) 7325 7326 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7327 return self._parse_select() or self._parse_set_operations( 7328 self._parse_alias(self._parse_assignment(), explicit=True) 7329 if alias 7330 else self._parse_assignment() 7331 ) 7332 7333 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7334 return self._parse_query_modifiers( 7335 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7336 ) 7337 7338 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7339 this = None 7340 if self._match_texts(self.TRANSACTION_KIND): 7341 this = self._prev.text 7342 7343 self._match_texts(("TRANSACTION", "WORK")) 7344 7345 modes = [] 7346 while True: 7347 mode = [] 7348 while self._match(TokenType.VAR): 7349 mode.append(self._prev.text) 7350 7351 if mode: 7352 modes.append(" ".join(mode)) 7353 if not self._match(TokenType.COMMA): 7354 break 7355 7356 return self.expression(exp.Transaction, this=this, modes=modes) 7357 7358 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7359 chain = None 7360 savepoint = None 7361 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7362 7363 self._match_texts(("TRANSACTION", "WORK")) 7364 7365 if self._match_text_seq("TO"): 7366 self._match_text_seq("SAVEPOINT") 7367 savepoint = self._parse_id_var() 7368 7369 if self._match(TokenType.AND): 7370 chain = not self._match_text_seq("NO") 7371 self._match_text_seq("CHAIN") 7372 7373 if is_rollback: 7374 return self.expression(exp.Rollback, savepoint=savepoint) 7375 7376 return self.expression(exp.Commit, chain=chain) 7377 7378 def _parse_refresh(self) -> exp.Refresh: 7379 self._match(TokenType.TABLE) 7380 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7381 7382 def _parse_column_def_with_exists(self): 7383 start = self._index 7384 self._match(TokenType.COLUMN) 7385 7386 exists_column = self._parse_exists(not_=True) 7387 expression = self._parse_field_def() 7388 7389 if not isinstance(expression, exp.ColumnDef): 7390 self._retreat(start) 7391 return None 7392 7393 expression.set("exists", exists_column) 7394 7395 return expression 7396 7397 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7398 if not self._prev.text.upper() == "ADD": 7399 return None 7400 7401 expression = self._parse_column_def_with_exists() 7402 if not expression: 7403 return None 7404 7405 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7406 if self._match_texts(("FIRST", "AFTER")): 7407 position = self._prev.text 7408 column_position = self.expression( 7409 exp.ColumnPosition, this=self._parse_column(), position=position 7410 ) 7411 expression.set("position", column_position) 7412 7413 return expression 7414 7415 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7416 drop = self._match(TokenType.DROP) and self._parse_drop() 7417 if drop and not isinstance(drop, exp.Command): 7418 drop.set("kind", drop.args.get("kind", "COLUMN")) 7419 return drop 7420 7421 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7422 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7423 return self.expression( 7424 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7425 ) 7426 7427 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7428 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7429 self._match_text_seq("ADD") 7430 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7431 return self.expression( 7432 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7433 ) 7434 7435 column_def = self._parse_add_column() 7436 if isinstance(column_def, exp.ColumnDef): 7437 return column_def 7438 7439 exists = self._parse_exists(not_=True) 7440 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7441 return self.expression( 7442 exp.AddPartition, 7443 exists=exists, 7444 this=self._parse_field(any_token=True), 7445 location=self._match_text_seq("LOCATION", advance=False) 7446 and self._parse_property(), 7447 ) 7448 7449 return None 7450 7451 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7452 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7453 or self._match_text_seq("COLUMNS") 7454 ): 7455 schema = self._parse_schema() 7456 7457 return ( 7458 ensure_list(schema) 7459 if schema 7460 else self._parse_csv(self._parse_column_def_with_exists) 7461 ) 7462 7463 return self._parse_csv(_parse_add_alteration) 7464 7465 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7466 if self._match_texts(self.ALTER_ALTER_PARSERS): 7467 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7468 7469 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7470 # keyword after ALTER we default to parsing this statement 7471 self._match(TokenType.COLUMN) 7472 column = self._parse_field(any_token=True) 7473 7474 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7475 return self.expression(exp.AlterColumn, this=column, drop=True) 7476 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7477 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7478 if self._match(TokenType.COMMENT): 7479 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7480 if self._match_text_seq("DROP", "NOT", "NULL"): 7481 return self.expression( 7482 exp.AlterColumn, 7483 this=column, 7484 drop=True, 7485 allow_null=True, 7486 ) 7487 if self._match_text_seq("SET", "NOT", "NULL"): 7488 return self.expression( 7489 exp.AlterColumn, 7490 this=column, 7491 allow_null=False, 7492 ) 7493 7494 if self._match_text_seq("SET", "VISIBLE"): 7495 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7496 if self._match_text_seq("SET", "INVISIBLE"): 7497 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7498 7499 self._match_text_seq("SET", "DATA") 7500 self._match_text_seq("TYPE") 7501 return self.expression( 7502 exp.AlterColumn, 7503 this=column, 7504 dtype=self._parse_types(), 7505 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7506 using=self._match(TokenType.USING) and self._parse_assignment(), 7507 ) 7508 7509 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7510 if self._match_texts(("ALL", "EVEN", "AUTO")): 7511 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7512 7513 self._match_text_seq("KEY", "DISTKEY") 7514 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7515 7516 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7517 if compound: 7518 self._match_text_seq("SORTKEY") 7519 7520 if self._match(TokenType.L_PAREN, advance=False): 7521 return self.expression( 7522 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7523 ) 7524 7525 self._match_texts(("AUTO", "NONE")) 7526 return self.expression( 7527 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7528 ) 7529 7530 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7531 index = self._index - 1 7532 7533 partition_exists = self._parse_exists() 7534 if self._match(TokenType.PARTITION, advance=False): 7535 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7536 7537 self._retreat(index) 7538 return self._parse_csv(self._parse_drop_column) 7539 7540 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7541 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7542 exists = self._parse_exists() 7543 old_column = self._parse_column() 7544 to = self._match_text_seq("TO") 7545 new_column = self._parse_column() 7546 7547 if old_column is None or to is None or new_column is None: 7548 return None 7549 7550 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7551 7552 self._match_text_seq("TO") 7553 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7554 7555 def _parse_alter_table_set(self) -> exp.AlterSet: 7556 alter_set = self.expression(exp.AlterSet) 7557 7558 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7559 "TABLE", "PROPERTIES" 7560 ): 7561 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7562 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7563 alter_set.set("expressions", [self._parse_assignment()]) 7564 elif self._match_texts(("LOGGED", "UNLOGGED")): 7565 alter_set.set("option", exp.var(self._prev.text.upper())) 7566 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7567 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7568 elif self._match_text_seq("LOCATION"): 7569 alter_set.set("location", self._parse_field()) 7570 elif self._match_text_seq("ACCESS", "METHOD"): 7571 alter_set.set("access_method", self._parse_field()) 7572 elif self._match_text_seq("TABLESPACE"): 7573 alter_set.set("tablespace", self._parse_field()) 7574 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7575 alter_set.set("file_format", [self._parse_field()]) 7576 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7577 alter_set.set("file_format", self._parse_wrapped_options()) 7578 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7579 alter_set.set("copy_options", self._parse_wrapped_options()) 7580 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7581 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7582 else: 7583 if self._match_text_seq("SERDE"): 7584 alter_set.set("serde", self._parse_field()) 7585 7586 properties = self._parse_wrapped(self._parse_properties, optional=True) 7587 alter_set.set("expressions", [properties]) 7588 7589 return alter_set 7590 7591 def _parse_alter(self) -> exp.Alter | exp.Command: 7592 start = self._prev 7593 7594 alter_token = self._match_set(self.ALTERABLES) and self._prev 7595 if not alter_token: 7596 return self._parse_as_command(start) 7597 7598 exists = self._parse_exists() 7599 only = self._match_text_seq("ONLY") 7600 this = self._parse_table(schema=True) 7601 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7602 7603 if self._next: 7604 self._advance() 7605 7606 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7607 if parser: 7608 actions = ensure_list(parser(self)) 7609 not_valid = self._match_text_seq("NOT", "VALID") 7610 options = self._parse_csv(self._parse_property) 7611 7612 if not self._curr and actions: 7613 return self.expression( 7614 exp.Alter, 7615 this=this, 7616 kind=alter_token.text.upper(), 7617 exists=exists, 7618 actions=actions, 7619 only=only, 7620 options=options, 7621 cluster=cluster, 7622 not_valid=not_valid, 7623 ) 7624 7625 return self._parse_as_command(start) 7626 7627 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7628 start = self._prev 7629 # https://duckdb.org/docs/sql/statements/analyze 7630 if not self._curr: 7631 return self.expression(exp.Analyze) 7632 7633 options = [] 7634 while self._match_texts(self.ANALYZE_STYLES): 7635 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7636 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7637 else: 7638 options.append(self._prev.text.upper()) 7639 7640 this: t.Optional[exp.Expression] = None 7641 inner_expression: t.Optional[exp.Expression] = None 7642 7643 kind = self._curr and self._curr.text.upper() 7644 7645 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7646 this = self._parse_table_parts() 7647 elif self._match_text_seq("TABLES"): 7648 if self._match_set((TokenType.FROM, TokenType.IN)): 7649 kind = f"{kind} {self._prev.text.upper()}" 7650 this = self._parse_table(schema=True, is_db_reference=True) 7651 elif self._match_text_seq("DATABASE"): 7652 this = self._parse_table(schema=True, is_db_reference=True) 7653 elif self._match_text_seq("CLUSTER"): 7654 this = self._parse_table() 7655 # Try matching inner expr keywords before fallback to parse table. 7656 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7657 kind = None 7658 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7659 else: 7660 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7661 kind = None 7662 this = self._parse_table_parts() 7663 7664 partition = self._try_parse(self._parse_partition) 7665 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7666 return self._parse_as_command(start) 7667 7668 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7669 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7670 "WITH", "ASYNC", "MODE" 7671 ): 7672 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7673 else: 7674 mode = None 7675 7676 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7677 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7678 7679 properties = self._parse_properties() 7680 return self.expression( 7681 exp.Analyze, 7682 kind=kind, 7683 this=this, 7684 mode=mode, 7685 partition=partition, 7686 properties=properties, 7687 expression=inner_expression, 7688 options=options, 7689 ) 7690 7691 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7692 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7693 this = None 7694 kind = self._prev.text.upper() 7695 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7696 expressions = [] 7697 7698 if not self._match_text_seq("STATISTICS"): 7699 self.raise_error("Expecting token STATISTICS") 7700 7701 if self._match_text_seq("NOSCAN"): 7702 this = "NOSCAN" 7703 elif self._match(TokenType.FOR): 7704 if self._match_text_seq("ALL", "COLUMNS"): 7705 this = "FOR ALL COLUMNS" 7706 if self._match_texts("COLUMNS"): 7707 this = "FOR COLUMNS" 7708 expressions = self._parse_csv(self._parse_column_reference) 7709 elif self._match_text_seq("SAMPLE"): 7710 sample = self._parse_number() 7711 expressions = [ 7712 self.expression( 7713 exp.AnalyzeSample, 7714 sample=sample, 7715 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7716 ) 7717 ] 7718 7719 return self.expression( 7720 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7721 ) 7722 7723 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7724 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7725 kind = None 7726 this = None 7727 expression: t.Optional[exp.Expression] = None 7728 if self._match_text_seq("REF", "UPDATE"): 7729 kind = "REF" 7730 this = "UPDATE" 7731 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7732 this = "UPDATE SET DANGLING TO NULL" 7733 elif self._match_text_seq("STRUCTURE"): 7734 kind = "STRUCTURE" 7735 if self._match_text_seq("CASCADE", "FAST"): 7736 this = "CASCADE FAST" 7737 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7738 ("ONLINE", "OFFLINE") 7739 ): 7740 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7741 expression = self._parse_into() 7742 7743 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7744 7745 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7746 this = self._prev.text.upper() 7747 if self._match_text_seq("COLUMNS"): 7748 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7749 return None 7750 7751 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7752 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7753 if self._match_text_seq("STATISTICS"): 7754 return self.expression(exp.AnalyzeDelete, kind=kind) 7755 return None 7756 7757 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7758 if self._match_text_seq("CHAINED", "ROWS"): 7759 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7760 return None 7761 7762 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7763 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7764 this = self._prev.text.upper() 7765 expression: t.Optional[exp.Expression] = None 7766 expressions = [] 7767 update_options = None 7768 7769 if self._match_text_seq("HISTOGRAM", "ON"): 7770 expressions = self._parse_csv(self._parse_column_reference) 7771 with_expressions = [] 7772 while self._match(TokenType.WITH): 7773 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7774 if self._match_texts(("SYNC", "ASYNC")): 7775 if self._match_text_seq("MODE", advance=False): 7776 with_expressions.append(f"{self._prev.text.upper()} MODE") 7777 self._advance() 7778 else: 7779 buckets = self._parse_number() 7780 if self._match_text_seq("BUCKETS"): 7781 with_expressions.append(f"{buckets} BUCKETS") 7782 if with_expressions: 7783 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7784 7785 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7786 TokenType.UPDATE, advance=False 7787 ): 7788 update_options = self._prev.text.upper() 7789 self._advance() 7790 elif self._match_text_seq("USING", "DATA"): 7791 expression = self.expression(exp.UsingData, this=self._parse_string()) 7792 7793 return self.expression( 7794 exp.AnalyzeHistogram, 7795 this=this, 7796 expressions=expressions, 7797 expression=expression, 7798 update_options=update_options, 7799 ) 7800 7801 def _parse_merge(self) -> exp.Merge: 7802 self._match(TokenType.INTO) 7803 target = self._parse_table() 7804 7805 if target and self._match(TokenType.ALIAS, advance=False): 7806 target.set("alias", self._parse_table_alias()) 7807 7808 self._match(TokenType.USING) 7809 using = self._parse_table() 7810 7811 self._match(TokenType.ON) 7812 on = self._parse_assignment() 7813 7814 return self.expression( 7815 exp.Merge, 7816 this=target, 7817 using=using, 7818 on=on, 7819 whens=self._parse_when_matched(), 7820 returning=self._parse_returning(), 7821 ) 7822 7823 def _parse_when_matched(self) -> exp.Whens: 7824 whens = [] 7825 7826 while self._match(TokenType.WHEN): 7827 matched = not self._match(TokenType.NOT) 7828 self._match_text_seq("MATCHED") 7829 source = ( 7830 False 7831 if self._match_text_seq("BY", "TARGET") 7832 else self._match_text_seq("BY", "SOURCE") 7833 ) 7834 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7835 7836 self._match(TokenType.THEN) 7837 7838 if self._match(TokenType.INSERT): 7839 this = self._parse_star() 7840 if this: 7841 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7842 else: 7843 then = self.expression( 7844 exp.Insert, 7845 this=exp.var("ROW") 7846 if self._match_text_seq("ROW") 7847 else self._parse_value(values=False), 7848 expression=self._match_text_seq("VALUES") and self._parse_value(), 7849 ) 7850 elif self._match(TokenType.UPDATE): 7851 expressions = self._parse_star() 7852 if expressions: 7853 then = self.expression(exp.Update, expressions=expressions) 7854 else: 7855 then = self.expression( 7856 exp.Update, 7857 expressions=self._match(TokenType.SET) 7858 and self._parse_csv(self._parse_equality), 7859 ) 7860 elif self._match(TokenType.DELETE): 7861 then = self.expression(exp.Var, this=self._prev.text) 7862 else: 7863 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7864 7865 whens.append( 7866 self.expression( 7867 exp.When, 7868 matched=matched, 7869 source=source, 7870 condition=condition, 7871 then=then, 7872 ) 7873 ) 7874 return self.expression(exp.Whens, expressions=whens) 7875 7876 def _parse_show(self) -> t.Optional[exp.Expression]: 7877 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7878 if parser: 7879 return parser(self) 7880 return self._parse_as_command(self._prev) 7881 7882 def _parse_set_item_assignment( 7883 self, kind: t.Optional[str] = None 7884 ) -> t.Optional[exp.Expression]: 7885 index = self._index 7886 7887 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7888 return self._parse_set_transaction(global_=kind == "GLOBAL") 7889 7890 left = self._parse_primary() or self._parse_column() 7891 assignment_delimiter = self._match_texts(("=", "TO")) 7892 7893 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7894 self._retreat(index) 7895 return None 7896 7897 right = self._parse_statement() or self._parse_id_var() 7898 if isinstance(right, (exp.Column, exp.Identifier)): 7899 right = exp.var(right.name) 7900 7901 this = self.expression(exp.EQ, this=left, expression=right) 7902 return self.expression(exp.SetItem, this=this, kind=kind) 7903 7904 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7905 self._match_text_seq("TRANSACTION") 7906 characteristics = self._parse_csv( 7907 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7908 ) 7909 return self.expression( 7910 exp.SetItem, 7911 expressions=characteristics, 7912 kind="TRANSACTION", 7913 **{"global": global_}, # type: ignore 7914 ) 7915 7916 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7917 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7918 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7919 7920 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7921 index = self._index 7922 set_ = self.expression( 7923 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7924 ) 7925 7926 if self._curr: 7927 self._retreat(index) 7928 return self._parse_as_command(self._prev) 7929 7930 return set_ 7931 7932 def _parse_var_from_options( 7933 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7934 ) -> t.Optional[exp.Var]: 7935 start = self._curr 7936 if not start: 7937 return None 7938 7939 option = start.text.upper() 7940 continuations = options.get(option) 7941 7942 index = self._index 7943 self._advance() 7944 for keywords in continuations or []: 7945 if isinstance(keywords, str): 7946 keywords = (keywords,) 7947 7948 if self._match_text_seq(*keywords): 7949 option = f"{option} {' '.join(keywords)}" 7950 break 7951 else: 7952 if continuations or continuations is None: 7953 if raise_unmatched: 7954 self.raise_error(f"Unknown option {option}") 7955 7956 self._retreat(index) 7957 return None 7958 7959 return exp.var(option) 7960 7961 def _parse_as_command(self, start: Token) -> exp.Command: 7962 while self._curr: 7963 self._advance() 7964 text = self._find_sql(start, self._prev) 7965 size = len(start.text) 7966 self._warn_unsupported() 7967 return exp.Command(this=text[:size], expression=text[size:]) 7968 7969 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7970 settings = [] 7971 7972 self._match_l_paren() 7973 kind = self._parse_id_var() 7974 7975 if self._match(TokenType.L_PAREN): 7976 while True: 7977 key = self._parse_id_var() 7978 value = self._parse_primary() 7979 if not key and value is None: 7980 break 7981 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7982 self._match(TokenType.R_PAREN) 7983 7984 self._match_r_paren() 7985 7986 return self.expression( 7987 exp.DictProperty, 7988 this=this, 7989 kind=kind.this if kind else None, 7990 settings=settings, 7991 ) 7992 7993 def _parse_dict_range(self, this: str) -> exp.DictRange: 7994 self._match_l_paren() 7995 has_min = self._match_text_seq("MIN") 7996 if has_min: 7997 min = self._parse_var() or self._parse_primary() 7998 self._match_text_seq("MAX") 7999 max = self._parse_var() or self._parse_primary() 8000 else: 8001 max = self._parse_var() or self._parse_primary() 8002 min = exp.Literal.number(0) 8003 self._match_r_paren() 8004 return self.expression(exp.DictRange, this=this, min=min, max=max) 8005 8006 def _parse_comprehension( 8007 self, this: t.Optional[exp.Expression] 8008 ) -> t.Optional[exp.Comprehension]: 8009 index = self._index 8010 expression = self._parse_column() 8011 if not self._match(TokenType.IN): 8012 self._retreat(index - 1) 8013 return None 8014 iterator = self._parse_column() 8015 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8016 return self.expression( 8017 exp.Comprehension, 8018 this=this, 8019 expression=expression, 8020 iterator=iterator, 8021 condition=condition, 8022 ) 8023 8024 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8025 if self._match(TokenType.HEREDOC_STRING): 8026 return self.expression(exp.Heredoc, this=self._prev.text) 8027 8028 if not self._match_text_seq("$"): 8029 return None 8030 8031 tags = ["$"] 8032 tag_text = None 8033 8034 if self._is_connected(): 8035 self._advance() 8036 tags.append(self._prev.text.upper()) 8037 else: 8038 self.raise_error("No closing $ found") 8039 8040 if tags[-1] != "$": 8041 if self._is_connected() and self._match_text_seq("$"): 8042 tag_text = tags[-1] 8043 tags.append("$") 8044 else: 8045 self.raise_error("No closing $ found") 8046 8047 heredoc_start = self._curr 8048 8049 while self._curr: 8050 if self._match_text_seq(*tags, advance=False): 8051 this = self._find_sql(heredoc_start, self._prev) 8052 self._advance(len(tags)) 8053 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8054 8055 self._advance() 8056 8057 self.raise_error(f"No closing {''.join(tags)} found") 8058 return None 8059 8060 def _find_parser( 8061 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8062 ) -> t.Optional[t.Callable]: 8063 if not self._curr: 8064 return None 8065 8066 index = self._index 8067 this = [] 8068 while True: 8069 # The current token might be multiple words 8070 curr = self._curr.text.upper() 8071 key = curr.split(" ") 8072 this.append(curr) 8073 8074 self._advance() 8075 result, trie = in_trie(trie, key) 8076 if result == TrieResult.FAILED: 8077 break 8078 8079 if result == TrieResult.EXISTS: 8080 subparser = parsers[" ".join(this)] 8081 return subparser 8082 8083 self._retreat(index) 8084 return None 8085 8086 def _match(self, token_type, advance=True, expression=None): 8087 if not self._curr: 8088 return None 8089 8090 if self._curr.token_type == token_type: 8091 if advance: 8092 self._advance() 8093 self._add_comments(expression) 8094 return True 8095 8096 return None 8097 8098 def _match_set(self, types, advance=True): 8099 if not self._curr: 8100 return None 8101 8102 if self._curr.token_type in types: 8103 if advance: 8104 self._advance() 8105 return True 8106 8107 return None 8108 8109 def _match_pair(self, token_type_a, token_type_b, advance=True): 8110 if not self._curr or not self._next: 8111 return None 8112 8113 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8114 if advance: 8115 self._advance(2) 8116 return True 8117 8118 return None 8119 8120 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8121 if not self._match(TokenType.L_PAREN, expression=expression): 8122 self.raise_error("Expecting (") 8123 8124 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8125 if not self._match(TokenType.R_PAREN, expression=expression): 8126 self.raise_error("Expecting )") 8127 8128 def _match_texts(self, texts, advance=True): 8129 if ( 8130 self._curr 8131 and self._curr.token_type != TokenType.STRING 8132 and self._curr.text.upper() in texts 8133 ): 8134 if advance: 8135 self._advance() 8136 return True 8137 return None 8138 8139 def _match_text_seq(self, *texts, advance=True): 8140 index = self._index 8141 for text in texts: 8142 if ( 8143 self._curr 8144 and self._curr.token_type != TokenType.STRING 8145 and self._curr.text.upper() == text 8146 ): 8147 self._advance() 8148 else: 8149 self._retreat(index) 8150 return None 8151 8152 if not advance: 8153 self._retreat(index) 8154 8155 return True 8156 8157 def _replace_lambda( 8158 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8159 ) -> t.Optional[exp.Expression]: 8160 if not node: 8161 return node 8162 8163 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8164 8165 for column in node.find_all(exp.Column): 8166 typ = lambda_types.get(column.parts[0].name) 8167 if typ is not None: 8168 dot_or_id = column.to_dot() if column.table else column.this 8169 8170 if typ: 8171 dot_or_id = self.expression( 8172 exp.Cast, 8173 this=dot_or_id, 8174 to=typ, 8175 ) 8176 8177 parent = column.parent 8178 8179 while isinstance(parent, exp.Dot): 8180 if not isinstance(parent.parent, exp.Dot): 8181 parent.replace(dot_or_id) 8182 break 8183 parent = parent.parent 8184 else: 8185 if column is node: 8186 node = dot_or_id 8187 else: 8188 column.replace(dot_or_id) 8189 return node 8190 8191 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8192 start = self._prev 8193 8194 # Not to be confused with TRUNCATE(number, decimals) function call 8195 if self._match(TokenType.L_PAREN): 8196 self._retreat(self._index - 2) 8197 return self._parse_function() 8198 8199 # Clickhouse supports TRUNCATE DATABASE as well 8200 is_database = self._match(TokenType.DATABASE) 8201 8202 self._match(TokenType.TABLE) 8203 8204 exists = self._parse_exists(not_=False) 8205 8206 expressions = self._parse_csv( 8207 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8208 ) 8209 8210 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8211 8212 if self._match_text_seq("RESTART", "IDENTITY"): 8213 identity = "RESTART" 8214 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8215 identity = "CONTINUE" 8216 else: 8217 identity = None 8218 8219 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8220 option = self._prev.text 8221 else: 8222 option = None 8223 8224 partition = self._parse_partition() 8225 8226 # Fallback case 8227 if self._curr: 8228 return self._parse_as_command(start) 8229 8230 return self.expression( 8231 exp.TruncateTable, 8232 expressions=expressions, 8233 is_database=is_database, 8234 exists=exists, 8235 cluster=cluster, 8236 identity=identity, 8237 option=option, 8238 partition=partition, 8239 ) 8240 8241 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8242 this = self._parse_ordered(self._parse_opclass) 8243 8244 if not self._match(TokenType.WITH): 8245 return this 8246 8247 op = self._parse_var(any_token=True) 8248 8249 return self.expression(exp.WithOperator, this=this, op=op) 8250 8251 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8252 self._match(TokenType.EQ) 8253 self._match(TokenType.L_PAREN) 8254 8255 opts: t.List[t.Optional[exp.Expression]] = [] 8256 option: exp.Expression | None 8257 while self._curr and not self._match(TokenType.R_PAREN): 8258 if self._match_text_seq("FORMAT_NAME", "="): 8259 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8260 option = self._parse_format_name() 8261 else: 8262 option = self._parse_property() 8263 8264 if option is None: 8265 self.raise_error("Unable to parse option") 8266 break 8267 8268 opts.append(option) 8269 8270 return opts 8271 8272 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8273 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8274 8275 options = [] 8276 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8277 option = self._parse_var(any_token=True) 8278 prev = self._prev.text.upper() 8279 8280 # Different dialects might separate options and values by white space, "=" and "AS" 8281 self._match(TokenType.EQ) 8282 self._match(TokenType.ALIAS) 8283 8284 param = self.expression(exp.CopyParameter, this=option) 8285 8286 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8287 TokenType.L_PAREN, advance=False 8288 ): 8289 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8290 param.set("expressions", self._parse_wrapped_options()) 8291 elif prev == "FILE_FORMAT": 8292 # T-SQL's external file format case 8293 param.set("expression", self._parse_field()) 8294 else: 8295 param.set("expression", self._parse_unquoted_field()) 8296 8297 options.append(param) 8298 self._match(sep) 8299 8300 return options 8301 8302 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8303 expr = self.expression(exp.Credentials) 8304 8305 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8306 expr.set("storage", self._parse_field()) 8307 if self._match_text_seq("CREDENTIALS"): 8308 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8309 creds = ( 8310 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8311 ) 8312 expr.set("credentials", creds) 8313 if self._match_text_seq("ENCRYPTION"): 8314 expr.set("encryption", self._parse_wrapped_options()) 8315 if self._match_text_seq("IAM_ROLE"): 8316 expr.set("iam_role", self._parse_field()) 8317 if self._match_text_seq("REGION"): 8318 expr.set("region", self._parse_field()) 8319 8320 return expr 8321 8322 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8323 return self._parse_field() 8324 8325 def _parse_copy(self) -> exp.Copy | exp.Command: 8326 start = self._prev 8327 8328 self._match(TokenType.INTO) 8329 8330 this = ( 8331 self._parse_select(nested=True, parse_subquery_alias=False) 8332 if self._match(TokenType.L_PAREN, advance=False) 8333 else self._parse_table(schema=True) 8334 ) 8335 8336 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8337 8338 files = self._parse_csv(self._parse_file_location) 8339 credentials = self._parse_credentials() 8340 8341 self._match_text_seq("WITH") 8342 8343 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8344 8345 # Fallback case 8346 if self._curr: 8347 return self._parse_as_command(start) 8348 8349 return self.expression( 8350 exp.Copy, 8351 this=this, 8352 kind=kind, 8353 credentials=credentials, 8354 files=files, 8355 params=params, 8356 ) 8357 8358 def _parse_normalize(self) -> exp.Normalize: 8359 return self.expression( 8360 exp.Normalize, 8361 this=self._parse_bitwise(), 8362 form=self._match(TokenType.COMMA) and self._parse_var(), 8363 ) 8364 8365 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8366 args = self._parse_csv(lambda: self._parse_lambda()) 8367 8368 this = seq_get(args, 0) 8369 decimals = seq_get(args, 1) 8370 8371 return expr_type( 8372 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8373 ) 8374 8375 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8376 star_token = self._prev 8377 8378 if self._match_text_seq("COLUMNS", "(", advance=False): 8379 this = self._parse_function() 8380 if isinstance(this, exp.Columns): 8381 this.set("unpack", True) 8382 return this 8383 8384 return self.expression( 8385 exp.Star, 8386 **{ # type: ignore 8387 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8388 "replace": self._parse_star_op("REPLACE"), 8389 "rename": self._parse_star_op("RENAME"), 8390 }, 8391 ).update_positions(star_token) 8392 8393 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8394 privilege_parts = [] 8395 8396 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8397 # (end of privilege list) or L_PAREN (start of column list) are met 8398 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8399 privilege_parts.append(self._curr.text.upper()) 8400 self._advance() 8401 8402 this = exp.var(" ".join(privilege_parts)) 8403 expressions = ( 8404 self._parse_wrapped_csv(self._parse_column) 8405 if self._match(TokenType.L_PAREN, advance=False) 8406 else None 8407 ) 8408 8409 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8410 8411 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8412 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8413 principal = self._parse_id_var() 8414 8415 if not principal: 8416 return None 8417 8418 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8419 8420 def _parse_grant(self) -> exp.Grant | exp.Command: 8421 start = self._prev 8422 8423 privileges = self._parse_csv(self._parse_grant_privilege) 8424 8425 self._match(TokenType.ON) 8426 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8427 8428 # Attempt to parse the securable e.g. MySQL allows names 8429 # such as "foo.*", "*.*" which are not easily parseable yet 8430 securable = self._try_parse(self._parse_table_parts) 8431 8432 if not securable or not self._match_text_seq("TO"): 8433 return self._parse_as_command(start) 8434 8435 principals = self._parse_csv(self._parse_grant_principal) 8436 8437 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8438 8439 if self._curr: 8440 return self._parse_as_command(start) 8441 8442 return self.expression( 8443 exp.Grant, 8444 privileges=privileges, 8445 kind=kind, 8446 securable=securable, 8447 principals=principals, 8448 grant_option=grant_option, 8449 ) 8450 8451 def _parse_overlay(self) -> exp.Overlay: 8452 return self.expression( 8453 exp.Overlay, 8454 **{ # type: ignore 8455 "this": self._parse_bitwise(), 8456 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8457 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8458 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8459 }, 8460 ) 8461 8462 def _parse_format_name(self) -> exp.Property: 8463 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8464 # for FILE_FORMAT = <format_name> 8465 return self.expression( 8466 exp.Property, 8467 this=exp.var("FORMAT_NAME"), 8468 value=self._parse_string() or self._parse_table_parts(), 8469 ) 8470 8471 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8472 args: t.List[exp.Expression] = [] 8473 8474 if self._match(TokenType.DISTINCT): 8475 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8476 self._match(TokenType.COMMA) 8477 8478 args.extend(self._parse_csv(self._parse_assignment)) 8479 8480 return self.expression( 8481 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8482 ) 8483 8484 def _identifier_expression( 8485 self, token: t.Optional[Token] = None, **kwargs: t.Any 8486 ) -> exp.Identifier: 8487 token = token or self._prev 8488 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8489 expression.update_positions(token) 8490 return expression 8491 8492 def _build_pipe_cte( 8493 self, 8494 query: exp.Query, 8495 expressions: t.List[exp.Expression], 8496 alias_cte: t.Optional[exp.TableAlias] = None, 8497 ) -> exp.Select: 8498 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8499 if alias_cte: 8500 new_cte = alias_cte 8501 else: 8502 self._pipe_cte_counter += 1 8503 new_cte = f"__tmp{self._pipe_cte_counter}" 8504 8505 with_ = query.args.get("with") 8506 ctes = with_.pop() if with_ else None 8507 8508 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8509 if ctes: 8510 new_select.set("with", ctes) 8511 8512 return new_select.with_(new_cte, as_=query, copy=False) 8513 8514 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8515 select = self._parse_select(consume_pipe=False) 8516 if not select: 8517 return query 8518 8519 return self._build_pipe_cte( 8520 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8521 ) 8522 8523 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8524 limit = self._parse_limit() 8525 offset = self._parse_offset() 8526 if limit: 8527 curr_limit = query.args.get("limit", limit) 8528 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8529 query.limit(limit, copy=False) 8530 if offset: 8531 curr_offset = query.args.get("offset") 8532 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8533 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8534 8535 return query 8536 8537 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8538 this = self._parse_assignment() 8539 if self._match_text_seq("GROUP", "AND", advance=False): 8540 return this 8541 8542 this = self._parse_alias(this) 8543 8544 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8545 return self._parse_ordered(lambda: this) 8546 8547 return this 8548 8549 def _parse_pipe_syntax_aggregate_group_order_by( 8550 self, query: exp.Select, group_by_exists: bool = True 8551 ) -> exp.Select: 8552 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8553 aggregates_or_groups, orders = [], [] 8554 for element in expr: 8555 if isinstance(element, exp.Ordered): 8556 this = element.this 8557 if isinstance(this, exp.Alias): 8558 element.set("this", this.args["alias"]) 8559 orders.append(element) 8560 else: 8561 this = element 8562 aggregates_or_groups.append(this) 8563 8564 if group_by_exists: 8565 query.select(*aggregates_or_groups, copy=False).group_by( 8566 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8567 copy=False, 8568 ) 8569 else: 8570 query.select(*aggregates_or_groups, append=False, copy=False) 8571 8572 if orders: 8573 return query.order_by(*orders, append=False, copy=False) 8574 8575 return query 8576 8577 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8578 self._match_text_seq("AGGREGATE") 8579 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8580 8581 if self._match(TokenType.GROUP_BY) or ( 8582 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8583 ): 8584 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8585 8586 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8587 8588 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8589 first_setop = self.parse_set_operation(this=query) 8590 if not first_setop: 8591 return None 8592 8593 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8594 expr = self._parse_paren() 8595 return expr.assert_is(exp.Subquery).unnest() if expr else None 8596 8597 first_setop.this.pop() 8598 8599 setops = [ 8600 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8601 *self._parse_csv(_parse_and_unwrap_query), 8602 ] 8603 8604 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8605 with_ = query.args.get("with") 8606 ctes = with_.pop() if with_ else None 8607 8608 if isinstance(first_setop, exp.Union): 8609 query = query.union(*setops, copy=False, **first_setop.args) 8610 elif isinstance(first_setop, exp.Except): 8611 query = query.except_(*setops, copy=False, **first_setop.args) 8612 else: 8613 query = query.intersect(*setops, copy=False, **first_setop.args) 8614 8615 query.set("with", ctes) 8616 8617 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8618 8619 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8620 join = self._parse_join() 8621 if not join: 8622 return None 8623 8624 if isinstance(query, exp.Select): 8625 return query.join(join, copy=False) 8626 8627 return query 8628 8629 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8630 pivots = self._parse_pivots() 8631 if not pivots: 8632 return query 8633 8634 from_ = query.args.get("from") 8635 if from_: 8636 from_.this.set("pivots", pivots) 8637 8638 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8639 8640 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8641 self._match_text_seq("EXTEND") 8642 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8643 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8644 8645 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8646 sample = self._parse_table_sample() 8647 8648 with_ = query.args.get("with") 8649 if with_: 8650 with_.expressions[-1].this.set("sample", sample) 8651 else: 8652 query.set("sample", sample) 8653 8654 return query 8655 8656 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8657 if isinstance(query, exp.Subquery): 8658 query = exp.select("*").from_(query, copy=False) 8659 8660 if not query.args.get("from"): 8661 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8662 8663 while self._match(TokenType.PIPE_GT): 8664 start = self._curr 8665 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8666 if not parser: 8667 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8668 # keywords, making it tricky to disambiguate them without lookahead. The approach 8669 # here is to try and parse a set operation and if that fails, then try to parse a 8670 # join operator. If that fails as well, then the operator is not supported. 8671 parsed_query = self._parse_pipe_syntax_set_operator(query) 8672 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8673 if not parsed_query: 8674 self._retreat(start) 8675 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8676 break 8677 query = parsed_query 8678 else: 8679 query = parser(self, query) 8680 8681 return query 8682 8683 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8684 vars = self._parse_csv(self._parse_id_var) 8685 if not vars: 8686 return None 8687 8688 return self.expression( 8689 exp.DeclareItem, 8690 this=vars, 8691 kind=self._parse_types(), 8692 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8693 ) 8694 8695 def _parse_declare(self) -> exp.Declare | exp.Command: 8696 start = self._prev 8697 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8698 8699 if not expressions or self._curr: 8700 return self._parse_as_command(start) 8701 8702 return self.expression(exp.Declare, expressions=expressions) 8703 8704 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8705 exp_class = exp.Cast if strict else exp.TryCast 8706 8707 if exp_class == exp.TryCast: 8708 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8709 8710 return self.expression(exp_class, **kwargs)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.APPLY, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.WINDOW, 647 TokenType.XOR, 648 *TYPE_TOKENS, 649 *SUBQUERY_PREDICATES, 650 } 651 652 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 653 TokenType.AND: exp.And, 654 } 655 656 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.COLON_EQ: exp.PropertyEQ, 658 } 659 660 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.OR: exp.Or, 662 } 663 664 EQUALITY = { 665 TokenType.EQ: exp.EQ, 666 TokenType.NEQ: exp.NEQ, 667 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 668 } 669 670 COMPARISON = { 671 TokenType.GT: exp.GT, 672 TokenType.GTE: exp.GTE, 673 TokenType.LT: exp.LT, 674 TokenType.LTE: exp.LTE, 675 } 676 677 BITWISE = { 678 TokenType.AMP: exp.BitwiseAnd, 679 TokenType.CARET: exp.BitwiseXor, 680 TokenType.PIPE: exp.BitwiseOr, 681 } 682 683 TERM = { 684 TokenType.DASH: exp.Sub, 685 TokenType.PLUS: exp.Add, 686 TokenType.MOD: exp.Mod, 687 TokenType.COLLATE: exp.Collate, 688 } 689 690 FACTOR = { 691 TokenType.DIV: exp.IntDiv, 692 TokenType.LR_ARROW: exp.Distance, 693 TokenType.SLASH: exp.Div, 694 TokenType.STAR: exp.Mul, 695 } 696 697 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 698 699 TIMES = { 700 TokenType.TIME, 701 TokenType.TIMETZ, 702 } 703 704 TIMESTAMPS = { 705 TokenType.TIMESTAMP, 706 TokenType.TIMESTAMPNTZ, 707 TokenType.TIMESTAMPTZ, 708 TokenType.TIMESTAMPLTZ, 709 *TIMES, 710 } 711 712 SET_OPERATIONS = { 713 TokenType.UNION, 714 TokenType.INTERSECT, 715 TokenType.EXCEPT, 716 } 717 718 JOIN_METHODS = { 719 TokenType.ASOF, 720 TokenType.NATURAL, 721 TokenType.POSITIONAL, 722 } 723 724 JOIN_SIDES = { 725 TokenType.LEFT, 726 TokenType.RIGHT, 727 TokenType.FULL, 728 } 729 730 JOIN_KINDS = { 731 TokenType.ANTI, 732 TokenType.CROSS, 733 TokenType.INNER, 734 TokenType.OUTER, 735 TokenType.SEMI, 736 TokenType.STRAIGHT_JOIN, 737 } 738 739 JOIN_HINTS: t.Set[str] = set() 740 741 LAMBDAS = { 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=expressions, 749 ), 750 TokenType.FARROW: lambda self, expressions: self.expression( 751 exp.Kwarg, 752 this=exp.var(expressions[0].name), 753 expression=self._parse_assignment(), 754 ), 755 } 756 757 COLUMN_OPERATORS = { 758 TokenType.DOT: None, 759 TokenType.DOTCOLON: lambda self, this, to: self.expression( 760 exp.JSONCast, 761 this=this, 762 to=to, 763 ), 764 TokenType.DCOLON: lambda self, this, to: self.build_cast( 765 strict=self.STRICT_CAST, this=this, to=to 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 CAST_COLUMN_OPERATORS = { 797 TokenType.DOTCOLON, 798 TokenType.DCOLON, 799 } 800 801 EXPRESSION_PARSERS = { 802 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 803 exp.Column: lambda self: self._parse_column(), 804 exp.Condition: lambda self: self._parse_assignment(), 805 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 806 exp.Expression: lambda self: self._parse_expression(), 807 exp.From: lambda self: self._parse_from(joins=True), 808 exp.Group: lambda self: self._parse_group(), 809 exp.Having: lambda self: self._parse_having(), 810 exp.Hint: lambda self: self._parse_hint_body(), 811 exp.Identifier: lambda self: self._parse_id_var(), 812 exp.Join: lambda self: self._parse_join(), 813 exp.Lambda: lambda self: self._parse_lambda(), 814 exp.Lateral: lambda self: self._parse_lateral(), 815 exp.Limit: lambda self: self._parse_limit(), 816 exp.Offset: lambda self: self._parse_offset(), 817 exp.Order: lambda self: self._parse_order(), 818 exp.Ordered: lambda self: self._parse_ordered(), 819 exp.Properties: lambda self: self._parse_properties(), 820 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 821 exp.Qualify: lambda self: self._parse_qualify(), 822 exp.Returning: lambda self: self._parse_returning(), 823 exp.Select: lambda self: self._parse_select(), 824 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 825 exp.Table: lambda self: self._parse_table_parts(), 826 exp.TableAlias: lambda self: self._parse_table_alias(), 827 exp.Tuple: lambda self: self._parse_value(values=False), 828 exp.Whens: lambda self: self._parse_when_matched(), 829 exp.Where: lambda self: self._parse_where(), 830 exp.Window: lambda self: self._parse_named_window(), 831 exp.With: lambda self: self._parse_with(), 832 "JOIN_TYPE": lambda self: self._parse_join_parts(), 833 } 834 835 STATEMENT_PARSERS = { 836 TokenType.ALTER: lambda self: self._parse_alter(), 837 TokenType.ANALYZE: lambda self: self._parse_analyze(), 838 TokenType.BEGIN: lambda self: self._parse_transaction(), 839 TokenType.CACHE: lambda self: self._parse_cache(), 840 TokenType.COMMENT: lambda self: self._parse_comment(), 841 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 842 TokenType.COPY: lambda self: self._parse_copy(), 843 TokenType.CREATE: lambda self: self._parse_create(), 844 TokenType.DELETE: lambda self: self._parse_delete(), 845 TokenType.DESC: lambda self: self._parse_describe(), 846 TokenType.DESCRIBE: lambda self: self._parse_describe(), 847 TokenType.DROP: lambda self: self._parse_drop(), 848 TokenType.GRANT: lambda self: self._parse_grant(), 849 TokenType.INSERT: lambda self: self._parse_insert(), 850 TokenType.KILL: lambda self: self._parse_kill(), 851 TokenType.LOAD: lambda self: self._parse_load(), 852 TokenType.MERGE: lambda self: self._parse_merge(), 853 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 854 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 855 TokenType.REFRESH: lambda self: self._parse_refresh(), 856 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 857 TokenType.SET: lambda self: self._parse_set(), 858 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 859 TokenType.UNCACHE: lambda self: self._parse_uncache(), 860 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 861 TokenType.UPDATE: lambda self: self._parse_update(), 862 TokenType.USE: lambda self: self._parse_use(), 863 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 864 } 865 866 UNARY_PARSERS = { 867 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 868 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 869 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 870 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 871 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 872 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 873 } 874 875 STRING_PARSERS = { 876 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 877 exp.RawString, this=token.text 878 ), 879 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 880 exp.National, this=token.text 881 ), 882 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 883 TokenType.STRING: lambda self, token: self.expression( 884 exp.Literal, this=token.text, is_string=True 885 ), 886 TokenType.UNICODE_STRING: lambda self, token: self.expression( 887 exp.UnicodeString, 888 this=token.text, 889 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 890 ), 891 } 892 893 NUMERIC_PARSERS = { 894 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 895 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 896 TokenType.HEX_STRING: lambda self, token: self.expression( 897 exp.HexString, 898 this=token.text, 899 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 900 ), 901 TokenType.NUMBER: lambda self, token: self.expression( 902 exp.Literal, this=token.text, is_string=False 903 ), 904 } 905 906 PRIMARY_PARSERS = { 907 **STRING_PARSERS, 908 **NUMERIC_PARSERS, 909 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 910 TokenType.NULL: lambda self, _: self.expression(exp.Null), 911 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 912 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 913 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 914 TokenType.STAR: lambda self, _: self._parse_star_ops(), 915 } 916 917 PLACEHOLDER_PARSERS = { 918 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 919 TokenType.PARAMETER: lambda self: self._parse_parameter(), 920 TokenType.COLON: lambda self: ( 921 self.expression(exp.Placeholder, this=self._prev.text) 922 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 923 else None 924 ), 925 } 926 927 RANGE_PARSERS = { 928 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 929 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 930 TokenType.GLOB: binary_range_parser(exp.Glob), 931 TokenType.ILIKE: binary_range_parser(exp.ILike), 932 TokenType.IN: lambda self, this: self._parse_in(this), 933 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 934 TokenType.IS: lambda self, this: self._parse_is(this), 935 TokenType.LIKE: binary_range_parser(exp.Like), 936 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 937 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 938 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 939 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 940 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 941 } 942 943 PIPE_SYNTAX_TRANSFORM_PARSERS = { 944 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 945 "AS": lambda self, query: self._build_pipe_cte( 946 query, [exp.Star()], self._parse_table_alias() 947 ), 948 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 949 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 950 "ORDER BY": lambda self, query: query.order_by( 951 self._parse_order(), append=False, copy=False 952 ), 953 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 954 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 955 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 956 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 958 } 959 960 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 961 "ALLOWED_VALUES": lambda self: self.expression( 962 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 963 ), 964 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 965 "AUTO": lambda self: self._parse_auto_property(), 966 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 967 "BACKUP": lambda self: self.expression( 968 exp.BackupProperty, this=self._parse_var(any_token=True) 969 ), 970 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 971 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 972 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 973 "CHECKSUM": lambda self: self._parse_checksum(), 974 "CLUSTER BY": lambda self: self._parse_cluster(), 975 "CLUSTERED": lambda self: self._parse_clustered_by(), 976 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 977 exp.CollateProperty, **kwargs 978 ), 979 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 980 "CONTAINS": lambda self: self._parse_contains_property(), 981 "COPY": lambda self: self._parse_copy_property(), 982 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 983 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 984 "DEFINER": lambda self: self._parse_definer(), 985 "DETERMINISTIC": lambda self: self.expression( 986 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 987 ), 988 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 989 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 990 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 991 "DISTKEY": lambda self: self._parse_distkey(), 992 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 993 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 994 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 995 "ENVIRONMENT": lambda self: self.expression( 996 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 997 ), 998 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 999 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1000 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1001 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1002 "FREESPACE": lambda self: self._parse_freespace(), 1003 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1004 "HEAP": lambda self: self.expression(exp.HeapProperty), 1005 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1006 "IMMUTABLE": lambda self: self.expression( 1007 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1008 ), 1009 "INHERITS": lambda self: self.expression( 1010 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1011 ), 1012 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1013 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1014 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1015 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1016 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1017 "LIKE": lambda self: self._parse_create_like(), 1018 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1019 "LOCK": lambda self: self._parse_locking(), 1020 "LOCKING": lambda self: self._parse_locking(), 1021 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1022 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1023 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1024 "MODIFIES": lambda self: self._parse_modifies_property(), 1025 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1026 "NO": lambda self: self._parse_no_property(), 1027 "ON": lambda self: self._parse_on_property(), 1028 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1029 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1030 "PARTITION": lambda self: self._parse_partitioned_of(), 1031 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1032 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1033 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1034 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1035 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1036 "READS": lambda self: self._parse_reads_property(), 1037 "REMOTE": lambda self: self._parse_remote_with_connection(), 1038 "RETURNS": lambda self: self._parse_returns(), 1039 "STRICT": lambda self: self.expression(exp.StrictProperty), 1040 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1041 "ROW": lambda self: self._parse_row(), 1042 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1043 "SAMPLE": lambda self: self.expression( 1044 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1045 ), 1046 "SECURE": lambda self: self.expression(exp.SecureProperty), 1047 "SECURITY": lambda self: self._parse_security(), 1048 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1049 "SETTINGS": lambda self: self._parse_settings_property(), 1050 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1051 "SORTKEY": lambda self: self._parse_sortkey(), 1052 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1053 "STABLE": lambda self: self.expression( 1054 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1055 ), 1056 "STORED": lambda self: self._parse_stored(), 1057 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1058 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1059 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1060 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1061 "TO": lambda self: self._parse_to_table(), 1062 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1063 "TRANSFORM": lambda self: self.expression( 1064 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1065 ), 1066 "TTL": lambda self: self._parse_ttl(), 1067 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1068 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1069 "VOLATILE": lambda self: self._parse_volatile_property(), 1070 "WITH": lambda self: self._parse_with_property(), 1071 } 1072 1073 CONSTRAINT_PARSERS = { 1074 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1075 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1076 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1077 "CHARACTER SET": lambda self: self.expression( 1078 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "CHECK": lambda self: self.expression( 1081 exp.CheckColumnConstraint, 1082 this=self._parse_wrapped(self._parse_assignment), 1083 enforced=self._match_text_seq("ENFORCED"), 1084 ), 1085 "COLLATE": lambda self: self.expression( 1086 exp.CollateColumnConstraint, 1087 this=self._parse_identifier() or self._parse_column(), 1088 ), 1089 "COMMENT": lambda self: self.expression( 1090 exp.CommentColumnConstraint, this=self._parse_string() 1091 ), 1092 "COMPRESS": lambda self: self._parse_compress(), 1093 "CLUSTERED": lambda self: self.expression( 1094 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1095 ), 1096 "NONCLUSTERED": lambda self: self.expression( 1097 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "DEFAULT": lambda self: self.expression( 1100 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1101 ), 1102 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1103 "EPHEMERAL": lambda self: self.expression( 1104 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "EXCLUDE": lambda self: self.expression( 1107 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1108 ), 1109 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1110 "FORMAT": lambda self: self.expression( 1111 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1112 ), 1113 "GENERATED": lambda self: self._parse_generated_as_identity(), 1114 "IDENTITY": lambda self: self._parse_auto_increment(), 1115 "INLINE": lambda self: self._parse_inline(), 1116 "LIKE": lambda self: self._parse_create_like(), 1117 "NOT": lambda self: self._parse_not_constraint(), 1118 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1119 "ON": lambda self: ( 1120 self._match(TokenType.UPDATE) 1121 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1122 ) 1123 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1124 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1125 "PERIOD": lambda self: self._parse_period_for_system_time(), 1126 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1127 "REFERENCES": lambda self: self._parse_references(match=False), 1128 "TITLE": lambda self: self.expression( 1129 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1130 ), 1131 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1132 "UNIQUE": lambda self: self._parse_unique(), 1133 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1134 "WATERMARK": lambda self: self.expression( 1135 exp.WatermarkColumnConstraint, 1136 this=self._match(TokenType.FOR) and self._parse_column(), 1137 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1138 ), 1139 "WITH": lambda self: self.expression( 1140 exp.Properties, expressions=self._parse_wrapped_properties() 1141 ), 1142 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1143 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1144 } 1145 1146 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1147 if not self._match(TokenType.L_PAREN, advance=False): 1148 # Partitioning by bucket or truncate follows the syntax: 1149 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1150 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1151 self._retreat(self._index - 1) 1152 return None 1153 1154 klass = ( 1155 exp.PartitionedByBucket 1156 if self._prev.text.upper() == "BUCKET" 1157 else exp.PartitionByTruncate 1158 ) 1159 1160 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1161 this, expression = seq_get(args, 0), seq_get(args, 1) 1162 1163 if isinstance(this, exp.Literal): 1164 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1165 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1166 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1167 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1168 # 1169 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1170 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1171 this, expression = expression, this 1172 1173 return self.expression(klass, this=this, expression=expression) 1174 1175 ALTER_PARSERS = { 1176 "ADD": lambda self: self._parse_alter_table_add(), 1177 "AS": lambda self: self._parse_select(), 1178 "ALTER": lambda self: self._parse_alter_table_alter(), 1179 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1180 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1181 "DROP": lambda self: self._parse_alter_table_drop(), 1182 "RENAME": lambda self: self._parse_alter_table_rename(), 1183 "SET": lambda self: self._parse_alter_table_set(), 1184 "SWAP": lambda self: self.expression( 1185 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1186 ), 1187 } 1188 1189 ALTER_ALTER_PARSERS = { 1190 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1191 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1192 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1193 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1194 } 1195 1196 SCHEMA_UNNAMED_CONSTRAINTS = { 1197 "CHECK", 1198 "EXCLUDE", 1199 "FOREIGN KEY", 1200 "LIKE", 1201 "PERIOD", 1202 "PRIMARY KEY", 1203 "UNIQUE", 1204 "WATERMARK", 1205 "BUCKET", 1206 "TRUNCATE", 1207 } 1208 1209 NO_PAREN_FUNCTION_PARSERS = { 1210 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1211 "CASE": lambda self: self._parse_case(), 1212 "CONNECT_BY_ROOT": lambda self: self.expression( 1213 exp.ConnectByRoot, this=self._parse_column() 1214 ), 1215 "IF": lambda self: self._parse_if(), 1216 } 1217 1218 INVALID_FUNC_NAME_TOKENS = { 1219 TokenType.IDENTIFIER, 1220 TokenType.STRING, 1221 } 1222 1223 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1224 1225 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1226 1227 FUNCTION_PARSERS = { 1228 **{ 1229 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1230 }, 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1233 }, 1234 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1235 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1236 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1237 "DECODE": lambda self: self._parse_decode(), 1238 "EXTRACT": lambda self: self._parse_extract(), 1239 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1240 "GAP_FILL": lambda self: self._parse_gap_fill(), 1241 "JSON_OBJECT": lambda self: self._parse_json_object(), 1242 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1243 "JSON_TABLE": lambda self: self._parse_json_table(), 1244 "MATCH": lambda self: self._parse_match_against(), 1245 "NORMALIZE": lambda self: self._parse_normalize(), 1246 "OPENJSON": lambda self: self._parse_open_json(), 1247 "OVERLAY": lambda self: self._parse_overlay(), 1248 "POSITION": lambda self: self._parse_position(), 1249 "PREDICT": lambda self: self._parse_predict(), 1250 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1251 "STRING_AGG": lambda self: self._parse_string_agg(), 1252 "SUBSTRING": lambda self: self._parse_substring(), 1253 "TRIM": lambda self: self._parse_trim(), 1254 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1255 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1256 "XMLELEMENT": lambda self: self.expression( 1257 exp.XMLElement, 1258 this=self._match_text_seq("NAME") and self._parse_id_var(), 1259 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1260 ), 1261 "XMLTABLE": lambda self: self._parse_xml_table(), 1262 } 1263 1264 QUERY_MODIFIER_PARSERS = { 1265 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1266 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1267 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1268 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1269 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1270 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1271 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1272 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1273 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1274 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1275 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1276 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1277 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1278 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1279 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1280 TokenType.CLUSTER_BY: lambda self: ( 1281 "cluster", 1282 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1283 ), 1284 TokenType.DISTRIBUTE_BY: lambda self: ( 1285 "distribute", 1286 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1287 ), 1288 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1289 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1290 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1291 } 1292 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1293 1294 SET_PARSERS = { 1295 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1296 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1297 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1298 "TRANSACTION": lambda self: self._parse_set_transaction(), 1299 } 1300 1301 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1302 1303 TYPE_LITERAL_PARSERS = { 1304 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1305 } 1306 1307 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1308 1309 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1310 1311 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1312 1313 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1314 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1315 "ISOLATION": ( 1316 ("LEVEL", "REPEATABLE", "READ"), 1317 ("LEVEL", "READ", "COMMITTED"), 1318 ("LEVEL", "READ", "UNCOMITTED"), 1319 ("LEVEL", "SERIALIZABLE"), 1320 ), 1321 "READ": ("WRITE", "ONLY"), 1322 } 1323 1324 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1325 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1326 ) 1327 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1328 1329 CREATE_SEQUENCE: OPTIONS_TYPE = { 1330 "SCALE": ("EXTEND", "NOEXTEND"), 1331 "SHARD": ("EXTEND", "NOEXTEND"), 1332 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1333 **dict.fromkeys( 1334 ( 1335 "SESSION", 1336 "GLOBAL", 1337 "KEEP", 1338 "NOKEEP", 1339 "ORDER", 1340 "NOORDER", 1341 "NOCACHE", 1342 "CYCLE", 1343 "NOCYCLE", 1344 "NOMINVALUE", 1345 "NOMAXVALUE", 1346 "NOSCALE", 1347 "NOSHARD", 1348 ), 1349 tuple(), 1350 ), 1351 } 1352 1353 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1354 1355 USABLES: OPTIONS_TYPE = dict.fromkeys( 1356 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1357 ) 1358 1359 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1360 1361 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1362 "TYPE": ("EVOLUTION",), 1363 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1364 } 1365 1366 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1367 1368 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1369 1370 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1371 "NOT": ("ENFORCED",), 1372 "MATCH": ( 1373 "FULL", 1374 "PARTIAL", 1375 "SIMPLE", 1376 ), 1377 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1378 "USING": ( 1379 "BTREE", 1380 "HASH", 1381 ), 1382 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1383 } 1384 1385 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1386 "NO": ("OTHERS",), 1387 "CURRENT": ("ROW",), 1388 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1389 } 1390 1391 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1392 1393 CLONE_KEYWORDS = {"CLONE", "COPY"} 1394 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1395 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1396 1397 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1398 1399 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1400 1401 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1402 1403 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1404 1405 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1406 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1407 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1408 1409 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1410 1411 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1412 1413 ADD_CONSTRAINT_TOKENS = { 1414 TokenType.CONSTRAINT, 1415 TokenType.FOREIGN_KEY, 1416 TokenType.INDEX, 1417 TokenType.KEY, 1418 TokenType.PRIMARY_KEY, 1419 TokenType.UNIQUE, 1420 } 1421 1422 DISTINCT_TOKENS = {TokenType.DISTINCT} 1423 1424 NULL_TOKENS = {TokenType.NULL} 1425 1426 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1427 1428 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1429 1430 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1431 1432 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1433 1434 ODBC_DATETIME_LITERALS = { 1435 "d": exp.Date, 1436 "t": exp.Time, 1437 "ts": exp.Timestamp, 1438 } 1439 1440 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1441 1442 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1443 1444 # The style options for the DESCRIBE statement 1445 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1446 1447 # The style options for the ANALYZE statement 1448 ANALYZE_STYLES = { 1449 "BUFFER_USAGE_LIMIT", 1450 "FULL", 1451 "LOCAL", 1452 "NO_WRITE_TO_BINLOG", 1453 "SAMPLE", 1454 "SKIP_LOCKED", 1455 "VERBOSE", 1456 } 1457 1458 ANALYZE_EXPRESSION_PARSERS = { 1459 "ALL": lambda self: self._parse_analyze_columns(), 1460 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1461 "DELETE": lambda self: self._parse_analyze_delete(), 1462 "DROP": lambda self: self._parse_analyze_histogram(), 1463 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1464 "LIST": lambda self: self._parse_analyze_list(), 1465 "PREDICATE": lambda self: self._parse_analyze_columns(), 1466 "UPDATE": lambda self: self._parse_analyze_histogram(), 1467 "VALIDATE": lambda self: self._parse_analyze_validate(), 1468 } 1469 1470 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1471 1472 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1473 1474 OPERATION_MODIFIERS: t.Set[str] = set() 1475 1476 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1477 1478 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1479 1480 STRICT_CAST = True 1481 1482 PREFIXED_PIVOT_COLUMNS = False 1483 IDENTIFY_PIVOT_STRINGS = False 1484 1485 LOG_DEFAULTS_TO_LN = False 1486 1487 # Whether the table sample clause expects CSV syntax 1488 TABLESAMPLE_CSV = False 1489 1490 # The default method used for table sampling 1491 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1492 1493 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1494 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1495 1496 # Whether the TRIM function expects the characters to trim as its first argument 1497 TRIM_PATTERN_FIRST = False 1498 1499 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1500 STRING_ALIASES = False 1501 1502 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1503 MODIFIERS_ATTACHED_TO_SET_OP = True 1504 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1505 1506 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1507 NO_PAREN_IF_COMMANDS = True 1508 1509 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1510 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1511 1512 # Whether the `:` operator is used to extract a value from a VARIANT column 1513 COLON_IS_VARIANT_EXTRACT = False 1514 1515 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1516 # If this is True and '(' is not found, the keyword will be treated as an identifier 1517 VALUES_FOLLOWED_BY_PAREN = True 1518 1519 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1520 SUPPORTS_IMPLICIT_UNNEST = False 1521 1522 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1523 INTERVAL_SPANS = True 1524 1525 # Whether a PARTITION clause can follow a table reference 1526 SUPPORTS_PARTITION_SELECTION = False 1527 1528 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1529 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1530 1531 # Whether the 'AS' keyword is optional in the CTE definition syntax 1532 OPTIONAL_ALIAS_TOKEN_CTE = True 1533 1534 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1535 ALTER_RENAME_REQUIRES_COLUMN = True 1536 1537 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1538 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1539 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1540 # as BigQuery, where all joins have the same precedence. 1541 JOINS_HAVE_EQUAL_PRECEDENCE = False 1542 1543 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1544 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1545 1546 # Whether map literals support arbitrary expressions as keys. 1547 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1548 # When False, keys are typically restricted to identifiers. 1549 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1550 1551 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1552 # is true for Snowflake but not for BigQuery which can also process strings 1553 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1554 1555 __slots__ = ( 1556 "error_level", 1557 "error_message_context", 1558 "max_errors", 1559 "dialect", 1560 "sql", 1561 "errors", 1562 "_tokens", 1563 "_index", 1564 "_curr", 1565 "_next", 1566 "_prev", 1567 "_prev_comments", 1568 "_pipe_cte_counter", 1569 ) 1570 1571 # Autofilled 1572 SHOW_TRIE: t.Dict = {} 1573 SET_TRIE: t.Dict = {} 1574 1575 def __init__( 1576 self, 1577 error_level: t.Optional[ErrorLevel] = None, 1578 error_message_context: int = 100, 1579 max_errors: int = 3, 1580 dialect: DialectType = None, 1581 ): 1582 from sqlglot.dialects import Dialect 1583 1584 self.error_level = error_level or ErrorLevel.IMMEDIATE 1585 self.error_message_context = error_message_context 1586 self.max_errors = max_errors 1587 self.dialect = Dialect.get_or_raise(dialect) 1588 self.reset() 1589 1590 def reset(self): 1591 self.sql = "" 1592 self.errors = [] 1593 self._tokens = [] 1594 self._index = 0 1595 self._curr = None 1596 self._next = None 1597 self._prev = None 1598 self._prev_comments = None 1599 self._pipe_cte_counter = 0 1600 1601 def parse( 1602 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1603 ) -> t.List[t.Optional[exp.Expression]]: 1604 """ 1605 Parses a list of tokens and returns a list of syntax trees, one tree 1606 per parsed SQL statement. 1607 1608 Args: 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The list of the produced syntax trees. 1614 """ 1615 return self._parse( 1616 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1617 ) 1618 1619 def parse_into( 1620 self, 1621 expression_types: exp.IntoType, 1622 raw_tokens: t.List[Token], 1623 sql: t.Optional[str] = None, 1624 ) -> t.List[t.Optional[exp.Expression]]: 1625 """ 1626 Parses a list of tokens into a given Expression type. If a collection of Expression 1627 types is given instead, this method will try to parse the token list into each one 1628 of them, stopping at the first for which the parsing succeeds. 1629 1630 Args: 1631 expression_types: The expression type(s) to try and parse the token list into. 1632 raw_tokens: The list of tokens. 1633 sql: The original SQL string, used to produce helpful debug messages. 1634 1635 Returns: 1636 The target Expression. 1637 """ 1638 errors = [] 1639 for expression_type in ensure_list(expression_types): 1640 parser = self.EXPRESSION_PARSERS.get(expression_type) 1641 if not parser: 1642 raise TypeError(f"No parser registered for {expression_type}") 1643 1644 try: 1645 return self._parse(parser, raw_tokens, sql) 1646 except ParseError as e: 1647 e.errors[0]["into_expression"] = expression_type 1648 errors.append(e) 1649 1650 raise ParseError( 1651 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1652 errors=merge_errors(errors), 1653 ) from errors[-1] 1654 1655 def _parse( 1656 self, 1657 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1658 raw_tokens: t.List[Token], 1659 sql: t.Optional[str] = None, 1660 ) -> t.List[t.Optional[exp.Expression]]: 1661 self.reset() 1662 self.sql = sql or "" 1663 1664 total = len(raw_tokens) 1665 chunks: t.List[t.List[Token]] = [[]] 1666 1667 for i, token in enumerate(raw_tokens): 1668 if token.token_type == TokenType.SEMICOLON: 1669 if token.comments: 1670 chunks.append([token]) 1671 1672 if i < total - 1: 1673 chunks.append([]) 1674 else: 1675 chunks[-1].append(token) 1676 1677 expressions = [] 1678 1679 for tokens in chunks: 1680 self._index = -1 1681 self._tokens = tokens 1682 self._advance() 1683 1684 expressions.append(parse_method(self)) 1685 1686 if self._index < len(self._tokens): 1687 self.raise_error("Invalid expression / Unexpected token") 1688 1689 self.check_errors() 1690 1691 return expressions 1692 1693 def check_errors(self) -> None: 1694 """Logs or raises any found errors, depending on the chosen error level setting.""" 1695 if self.error_level == ErrorLevel.WARN: 1696 for error in self.errors: 1697 logger.error(str(error)) 1698 elif self.error_level == ErrorLevel.RAISE and self.errors: 1699 raise ParseError( 1700 concat_messages(self.errors, self.max_errors), 1701 errors=merge_errors(self.errors), 1702 ) 1703 1704 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1705 """ 1706 Appends an error in the list of recorded errors or raises it, depending on the chosen 1707 error level setting. 1708 """ 1709 token = token or self._curr or self._prev or Token.string("") 1710 start = token.start 1711 end = token.end + 1 1712 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1713 highlight = self.sql[start:end] 1714 end_context = self.sql[end : end + self.error_message_context] 1715 1716 error = ParseError.new( 1717 f"{message}. Line {token.line}, Col: {token.col}.\n" 1718 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1719 description=message, 1720 line=token.line, 1721 col=token.col, 1722 start_context=start_context, 1723 highlight=highlight, 1724 end_context=end_context, 1725 ) 1726 1727 if self.error_level == ErrorLevel.IMMEDIATE: 1728 raise error 1729 1730 self.errors.append(error) 1731 1732 def expression( 1733 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1734 ) -> E: 1735 """ 1736 Creates a new, validated Expression. 1737 1738 Args: 1739 exp_class: The expression class to instantiate. 1740 comments: An optional list of comments to attach to the expression. 1741 kwargs: The arguments to set for the expression along with their respective values. 1742 1743 Returns: 1744 The target expression. 1745 """ 1746 instance = exp_class(**kwargs) 1747 instance.add_comments(comments) if comments else self._add_comments(instance) 1748 return self.validate_expression(instance) 1749 1750 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1751 if expression and self._prev_comments: 1752 expression.add_comments(self._prev_comments) 1753 self._prev_comments = None 1754 1755 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1756 """ 1757 Validates an Expression, making sure that all its mandatory arguments are set. 1758 1759 Args: 1760 expression: The expression to validate. 1761 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1762 1763 Returns: 1764 The validated expression. 1765 """ 1766 if self.error_level != ErrorLevel.IGNORE: 1767 for error_message in expression.error_messages(args): 1768 self.raise_error(error_message) 1769 1770 return expression 1771 1772 def _find_sql(self, start: Token, end: Token) -> str: 1773 return self.sql[start.start : end.end + 1] 1774 1775 def _is_connected(self) -> bool: 1776 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1777 1778 def _advance(self, times: int = 1) -> None: 1779 self._index += times 1780 self._curr = seq_get(self._tokens, self._index) 1781 self._next = seq_get(self._tokens, self._index + 1) 1782 1783 if self._index > 0: 1784 self._prev = self._tokens[self._index - 1] 1785 self._prev_comments = self._prev.comments 1786 else: 1787 self._prev = None 1788 self._prev_comments = None 1789 1790 def _retreat(self, index: int) -> None: 1791 if index != self._index: 1792 self._advance(index - self._index) 1793 1794 def _warn_unsupported(self) -> None: 1795 if len(self._tokens) <= 1: 1796 return 1797 1798 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1799 # interested in emitting a warning for the one being currently processed. 1800 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1801 1802 logger.warning( 1803 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1804 ) 1805 1806 def _parse_command(self) -> exp.Command: 1807 self._warn_unsupported() 1808 return self.expression( 1809 exp.Command, 1810 comments=self._prev_comments, 1811 this=self._prev.text.upper(), 1812 expression=self._parse_string(), 1813 ) 1814 1815 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1816 """ 1817 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1818 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1819 solve this by setting & resetting the parser state accordingly 1820 """ 1821 index = self._index 1822 error_level = self.error_level 1823 1824 self.error_level = ErrorLevel.IMMEDIATE 1825 try: 1826 this = parse_method() 1827 except ParseError: 1828 this = None 1829 finally: 1830 if not this or retreat: 1831 self._retreat(index) 1832 self.error_level = error_level 1833 1834 return this 1835 1836 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1837 start = self._prev 1838 exists = self._parse_exists() if allow_exists else None 1839 1840 self._match(TokenType.ON) 1841 1842 materialized = self._match_text_seq("MATERIALIZED") 1843 kind = self._match_set(self.CREATABLES) and self._prev 1844 if not kind: 1845 return self._parse_as_command(start) 1846 1847 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1848 this = self._parse_user_defined_function(kind=kind.token_type) 1849 elif kind.token_type == TokenType.TABLE: 1850 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1851 elif kind.token_type == TokenType.COLUMN: 1852 this = self._parse_column() 1853 else: 1854 this = self._parse_id_var() 1855 1856 self._match(TokenType.IS) 1857 1858 return self.expression( 1859 exp.Comment, 1860 this=this, 1861 kind=kind.text, 1862 expression=self._parse_string(), 1863 exists=exists, 1864 materialized=materialized, 1865 ) 1866 1867 def _parse_to_table( 1868 self, 1869 ) -> exp.ToTableProperty: 1870 table = self._parse_table_parts(schema=True) 1871 return self.expression(exp.ToTableProperty, this=table) 1872 1873 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1874 def _parse_ttl(self) -> exp.Expression: 1875 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1876 this = self._parse_bitwise() 1877 1878 if self._match_text_seq("DELETE"): 1879 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1880 if self._match_text_seq("RECOMPRESS"): 1881 return self.expression( 1882 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1883 ) 1884 if self._match_text_seq("TO", "DISK"): 1885 return self.expression( 1886 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1887 ) 1888 if self._match_text_seq("TO", "VOLUME"): 1889 return self.expression( 1890 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1891 ) 1892 1893 return this 1894 1895 expressions = self._parse_csv(_parse_ttl_action) 1896 where = self._parse_where() 1897 group = self._parse_group() 1898 1899 aggregates = None 1900 if group and self._match(TokenType.SET): 1901 aggregates = self._parse_csv(self._parse_set_item) 1902 1903 return self.expression( 1904 exp.MergeTreeTTL, 1905 expressions=expressions, 1906 where=where, 1907 group=group, 1908 aggregates=aggregates, 1909 ) 1910 1911 def _parse_statement(self) -> t.Optional[exp.Expression]: 1912 if self._curr is None: 1913 return None 1914 1915 if self._match_set(self.STATEMENT_PARSERS): 1916 comments = self._prev_comments 1917 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1918 stmt.add_comments(comments, prepend=True) 1919 return stmt 1920 1921 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1922 return self._parse_command() 1923 1924 expression = self._parse_expression() 1925 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1926 return self._parse_query_modifiers(expression) 1927 1928 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1929 start = self._prev 1930 temporary = self._match(TokenType.TEMPORARY) 1931 materialized = self._match_text_seq("MATERIALIZED") 1932 1933 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1934 if not kind: 1935 return self._parse_as_command(start) 1936 1937 concurrently = self._match_text_seq("CONCURRENTLY") 1938 if_exists = exists or self._parse_exists() 1939 1940 if kind == "COLUMN": 1941 this = self._parse_column() 1942 else: 1943 this = self._parse_table_parts( 1944 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1945 ) 1946 1947 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1948 1949 if self._match(TokenType.L_PAREN, advance=False): 1950 expressions = self._parse_wrapped_csv(self._parse_types) 1951 else: 1952 expressions = None 1953 1954 return self.expression( 1955 exp.Drop, 1956 exists=if_exists, 1957 this=this, 1958 expressions=expressions, 1959 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1960 temporary=temporary, 1961 materialized=materialized, 1962 cascade=self._match_text_seq("CASCADE"), 1963 constraints=self._match_text_seq("CONSTRAINTS"), 1964 purge=self._match_text_seq("PURGE"), 1965 cluster=cluster, 1966 concurrently=concurrently, 1967 ) 1968 1969 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1970 return ( 1971 self._match_text_seq("IF") 1972 and (not not_ or self._match(TokenType.NOT)) 1973 and self._match(TokenType.EXISTS) 1974 ) 1975 1976 def _parse_create(self) -> exp.Create | exp.Command: 1977 # Note: this can't be None because we've matched a statement parser 1978 start = self._prev 1979 1980 replace = ( 1981 start.token_type == TokenType.REPLACE 1982 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1983 or self._match_pair(TokenType.OR, TokenType.ALTER) 1984 ) 1985 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1986 1987 unique = self._match(TokenType.UNIQUE) 1988 1989 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1990 clustered = True 1991 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1992 "COLUMNSTORE" 1993 ): 1994 clustered = False 1995 else: 1996 clustered = None 1997 1998 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1999 self._advance() 2000 2001 properties = None 2002 create_token = self._match_set(self.CREATABLES) and self._prev 2003 2004 if not create_token: 2005 # exp.Properties.Location.POST_CREATE 2006 properties = self._parse_properties() 2007 create_token = self._match_set(self.CREATABLES) and self._prev 2008 2009 if not properties or not create_token: 2010 return self._parse_as_command(start) 2011 2012 concurrently = self._match_text_seq("CONCURRENTLY") 2013 exists = self._parse_exists(not_=True) 2014 this = None 2015 expression: t.Optional[exp.Expression] = None 2016 indexes = None 2017 no_schema_binding = None 2018 begin = None 2019 end = None 2020 clone = None 2021 2022 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2023 nonlocal properties 2024 if properties and temp_props: 2025 properties.expressions.extend(temp_props.expressions) 2026 elif temp_props: 2027 properties = temp_props 2028 2029 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2030 this = self._parse_user_defined_function(kind=create_token.token_type) 2031 2032 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2033 extend_props(self._parse_properties()) 2034 2035 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2036 extend_props(self._parse_properties()) 2037 2038 if not expression: 2039 if self._match(TokenType.COMMAND): 2040 expression = self._parse_as_command(self._prev) 2041 else: 2042 begin = self._match(TokenType.BEGIN) 2043 return_ = self._match_text_seq("RETURN") 2044 2045 if self._match(TokenType.STRING, advance=False): 2046 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2047 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2048 expression = self._parse_string() 2049 extend_props(self._parse_properties()) 2050 else: 2051 expression = self._parse_user_defined_function_expression() 2052 2053 end = self._match_text_seq("END") 2054 2055 if return_: 2056 expression = self.expression(exp.Return, this=expression) 2057 elif create_token.token_type == TokenType.INDEX: 2058 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2059 if not self._match(TokenType.ON): 2060 index = self._parse_id_var() 2061 anonymous = False 2062 else: 2063 index = None 2064 anonymous = True 2065 2066 this = self._parse_index(index=index, anonymous=anonymous) 2067 elif create_token.token_type in self.DB_CREATABLES: 2068 table_parts = self._parse_table_parts( 2069 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2070 ) 2071 2072 # exp.Properties.Location.POST_NAME 2073 self._match(TokenType.COMMA) 2074 extend_props(self._parse_properties(before=True)) 2075 2076 this = self._parse_schema(this=table_parts) 2077 2078 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2079 extend_props(self._parse_properties()) 2080 2081 has_alias = self._match(TokenType.ALIAS) 2082 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2083 # exp.Properties.Location.POST_ALIAS 2084 extend_props(self._parse_properties()) 2085 2086 if create_token.token_type == TokenType.SEQUENCE: 2087 expression = self._parse_types() 2088 props = self._parse_properties() 2089 if props: 2090 sequence_props = exp.SequenceProperties() 2091 options = [] 2092 for prop in props: 2093 if isinstance(prop, exp.SequenceProperties): 2094 for arg, value in prop.args.items(): 2095 if arg == "options": 2096 options.extend(value) 2097 else: 2098 sequence_props.set(arg, value) 2099 prop.pop() 2100 2101 if options: 2102 sequence_props.set("options", options) 2103 2104 props.append("expressions", sequence_props) 2105 extend_props(props) 2106 else: 2107 expression = self._parse_ddl_select() 2108 2109 # Some dialects also support using a table as an alias instead of a SELECT. 2110 # Here we fallback to this as an alternative. 2111 if not expression and has_alias: 2112 expression = self._try_parse(self._parse_table_parts) 2113 2114 if create_token.token_type == TokenType.TABLE: 2115 # exp.Properties.Location.POST_EXPRESSION 2116 extend_props(self._parse_properties()) 2117 2118 indexes = [] 2119 while True: 2120 index = self._parse_index() 2121 2122 # exp.Properties.Location.POST_INDEX 2123 extend_props(self._parse_properties()) 2124 if not index: 2125 break 2126 else: 2127 self._match(TokenType.COMMA) 2128 indexes.append(index) 2129 elif create_token.token_type == TokenType.VIEW: 2130 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2131 no_schema_binding = True 2132 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2133 extend_props(self._parse_properties()) 2134 2135 shallow = self._match_text_seq("SHALLOW") 2136 2137 if self._match_texts(self.CLONE_KEYWORDS): 2138 copy = self._prev.text.lower() == "copy" 2139 clone = self.expression( 2140 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2141 ) 2142 2143 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2144 return self._parse_as_command(start) 2145 2146 create_kind_text = create_token.text.upper() 2147 return self.expression( 2148 exp.Create, 2149 this=this, 2150 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2151 replace=replace, 2152 refresh=refresh, 2153 unique=unique, 2154 expression=expression, 2155 exists=exists, 2156 properties=properties, 2157 indexes=indexes, 2158 no_schema_binding=no_schema_binding, 2159 begin=begin, 2160 end=end, 2161 clone=clone, 2162 concurrently=concurrently, 2163 clustered=clustered, 2164 ) 2165 2166 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2167 seq = exp.SequenceProperties() 2168 2169 options = [] 2170 index = self._index 2171 2172 while self._curr: 2173 self._match(TokenType.COMMA) 2174 if self._match_text_seq("INCREMENT"): 2175 self._match_text_seq("BY") 2176 self._match_text_seq("=") 2177 seq.set("increment", self._parse_term()) 2178 elif self._match_text_seq("MINVALUE"): 2179 seq.set("minvalue", self._parse_term()) 2180 elif self._match_text_seq("MAXVALUE"): 2181 seq.set("maxvalue", self._parse_term()) 2182 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2183 self._match_text_seq("=") 2184 seq.set("start", self._parse_term()) 2185 elif self._match_text_seq("CACHE"): 2186 # T-SQL allows empty CACHE which is initialized dynamically 2187 seq.set("cache", self._parse_number() or True) 2188 elif self._match_text_seq("OWNED", "BY"): 2189 # "OWNED BY NONE" is the default 2190 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2191 else: 2192 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2193 if opt: 2194 options.append(opt) 2195 else: 2196 break 2197 2198 seq.set("options", options if options else None) 2199 return None if self._index == index else seq 2200 2201 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2202 # only used for teradata currently 2203 self._match(TokenType.COMMA) 2204 2205 kwargs = { 2206 "no": self._match_text_seq("NO"), 2207 "dual": self._match_text_seq("DUAL"), 2208 "before": self._match_text_seq("BEFORE"), 2209 "default": self._match_text_seq("DEFAULT"), 2210 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2211 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2212 "after": self._match_text_seq("AFTER"), 2213 "minimum": self._match_texts(("MIN", "MINIMUM")), 2214 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2215 } 2216 2217 if self._match_texts(self.PROPERTY_PARSERS): 2218 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2219 try: 2220 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2221 except TypeError: 2222 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2223 2224 return None 2225 2226 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2227 return self._parse_wrapped_csv(self._parse_property) 2228 2229 def _parse_property(self) -> t.Optional[exp.Expression]: 2230 if self._match_texts(self.PROPERTY_PARSERS): 2231 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2232 2233 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2234 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2235 2236 if self._match_text_seq("COMPOUND", "SORTKEY"): 2237 return self._parse_sortkey(compound=True) 2238 2239 if self._match_text_seq("SQL", "SECURITY"): 2240 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2241 2242 index = self._index 2243 2244 seq_props = self._parse_sequence_properties() 2245 if seq_props: 2246 return seq_props 2247 2248 self._retreat(index) 2249 key = self._parse_column() 2250 2251 if not self._match(TokenType.EQ): 2252 self._retreat(index) 2253 return None 2254 2255 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2256 if isinstance(key, exp.Column): 2257 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2258 2259 value = self._parse_bitwise() or self._parse_var(any_token=True) 2260 2261 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2262 if isinstance(value, exp.Column): 2263 value = exp.var(value.name) 2264 2265 return self.expression(exp.Property, this=key, value=value) 2266 2267 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2268 if self._match_text_seq("BY"): 2269 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2270 2271 self._match(TokenType.ALIAS) 2272 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2273 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2274 2275 return self.expression( 2276 exp.FileFormatProperty, 2277 this=( 2278 self.expression( 2279 exp.InputOutputFormat, 2280 input_format=input_format, 2281 output_format=output_format, 2282 ) 2283 if input_format or output_format 2284 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2285 ), 2286 hive_format=True, 2287 ) 2288 2289 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2290 field = self._parse_field() 2291 if isinstance(field, exp.Identifier) and not field.quoted: 2292 field = exp.var(field) 2293 2294 return field 2295 2296 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2297 self._match(TokenType.EQ) 2298 self._match(TokenType.ALIAS) 2299 2300 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2301 2302 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2303 properties = [] 2304 while True: 2305 if before: 2306 prop = self._parse_property_before() 2307 else: 2308 prop = self._parse_property() 2309 if not prop: 2310 break 2311 for p in ensure_list(prop): 2312 properties.append(p) 2313 2314 if properties: 2315 return self.expression(exp.Properties, expressions=properties) 2316 2317 return None 2318 2319 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2320 return self.expression( 2321 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2322 ) 2323 2324 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2325 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2326 security_specifier = self._prev.text.upper() 2327 return self.expression(exp.SecurityProperty, this=security_specifier) 2328 return None 2329 2330 def _parse_settings_property(self) -> exp.SettingsProperty: 2331 return self.expression( 2332 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2333 ) 2334 2335 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2336 if self._index >= 2: 2337 pre_volatile_token = self._tokens[self._index - 2] 2338 else: 2339 pre_volatile_token = None 2340 2341 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2342 return exp.VolatileProperty() 2343 2344 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2345 2346 def _parse_retention_period(self) -> exp.Var: 2347 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2348 number = self._parse_number() 2349 number_str = f"{number} " if number else "" 2350 unit = self._parse_var(any_token=True) 2351 return exp.var(f"{number_str}{unit}") 2352 2353 def _parse_system_versioning_property( 2354 self, with_: bool = False 2355 ) -> exp.WithSystemVersioningProperty: 2356 self._match(TokenType.EQ) 2357 prop = self.expression( 2358 exp.WithSystemVersioningProperty, 2359 **{ # type: ignore 2360 "on": True, 2361 "with": with_, 2362 }, 2363 ) 2364 2365 if self._match_text_seq("OFF"): 2366 prop.set("on", False) 2367 return prop 2368 2369 self._match(TokenType.ON) 2370 if self._match(TokenType.L_PAREN): 2371 while self._curr and not self._match(TokenType.R_PAREN): 2372 if self._match_text_seq("HISTORY_TABLE", "="): 2373 prop.set("this", self._parse_table_parts()) 2374 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2375 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2376 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2377 prop.set("retention_period", self._parse_retention_period()) 2378 2379 self._match(TokenType.COMMA) 2380 2381 return prop 2382 2383 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2384 self._match(TokenType.EQ) 2385 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2386 prop = self.expression(exp.DataDeletionProperty, on=on) 2387 2388 if self._match(TokenType.L_PAREN): 2389 while self._curr and not self._match(TokenType.R_PAREN): 2390 if self._match_text_seq("FILTER_COLUMN", "="): 2391 prop.set("filter_column", self._parse_column()) 2392 elif self._match_text_seq("RETENTION_PERIOD", "="): 2393 prop.set("retention_period", self._parse_retention_period()) 2394 2395 self._match(TokenType.COMMA) 2396 2397 return prop 2398 2399 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2400 kind = "HASH" 2401 expressions: t.Optional[t.List[exp.Expression]] = None 2402 if self._match_text_seq("BY", "HASH"): 2403 expressions = self._parse_wrapped_csv(self._parse_id_var) 2404 elif self._match_text_seq("BY", "RANDOM"): 2405 kind = "RANDOM" 2406 2407 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2408 buckets: t.Optional[exp.Expression] = None 2409 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2410 buckets = self._parse_number() 2411 2412 return self.expression( 2413 exp.DistributedByProperty, 2414 expressions=expressions, 2415 kind=kind, 2416 buckets=buckets, 2417 order=self._parse_order(), 2418 ) 2419 2420 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2421 self._match_text_seq("KEY") 2422 expressions = self._parse_wrapped_id_vars() 2423 return self.expression(expr_type, expressions=expressions) 2424 2425 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2426 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2427 prop = self._parse_system_versioning_property(with_=True) 2428 self._match_r_paren() 2429 return prop 2430 2431 if self._match(TokenType.L_PAREN, advance=False): 2432 return self._parse_wrapped_properties() 2433 2434 if self._match_text_seq("JOURNAL"): 2435 return self._parse_withjournaltable() 2436 2437 if self._match_texts(self.VIEW_ATTRIBUTES): 2438 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2439 2440 if self._match_text_seq("DATA"): 2441 return self._parse_withdata(no=False) 2442 elif self._match_text_seq("NO", "DATA"): 2443 return self._parse_withdata(no=True) 2444 2445 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2446 return self._parse_serde_properties(with_=True) 2447 2448 if self._match(TokenType.SCHEMA): 2449 return self.expression( 2450 exp.WithSchemaBindingProperty, 2451 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2452 ) 2453 2454 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2455 return self.expression( 2456 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2457 ) 2458 2459 if not self._next: 2460 return None 2461 2462 return self._parse_withisolatedloading() 2463 2464 def _parse_procedure_option(self) -> exp.Expression | None: 2465 if self._match_text_seq("EXECUTE", "AS"): 2466 return self.expression( 2467 exp.ExecuteAsProperty, 2468 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2469 or self._parse_string(), 2470 ) 2471 2472 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2473 2474 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2475 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2476 self._match(TokenType.EQ) 2477 2478 user = self._parse_id_var() 2479 self._match(TokenType.PARAMETER) 2480 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2481 2482 if not user or not host: 2483 return None 2484 2485 return exp.DefinerProperty(this=f"{user}@{host}") 2486 2487 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2488 self._match(TokenType.TABLE) 2489 self._match(TokenType.EQ) 2490 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2491 2492 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2493 return self.expression(exp.LogProperty, no=no) 2494 2495 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2496 return self.expression(exp.JournalProperty, **kwargs) 2497 2498 def _parse_checksum(self) -> exp.ChecksumProperty: 2499 self._match(TokenType.EQ) 2500 2501 on = None 2502 if self._match(TokenType.ON): 2503 on = True 2504 elif self._match_text_seq("OFF"): 2505 on = False 2506 2507 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2508 2509 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2510 return self.expression( 2511 exp.Cluster, 2512 expressions=( 2513 self._parse_wrapped_csv(self._parse_ordered) 2514 if wrapped 2515 else self._parse_csv(self._parse_ordered) 2516 ), 2517 ) 2518 2519 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2520 self._match_text_seq("BY") 2521 2522 self._match_l_paren() 2523 expressions = self._parse_csv(self._parse_column) 2524 self._match_r_paren() 2525 2526 if self._match_text_seq("SORTED", "BY"): 2527 self._match_l_paren() 2528 sorted_by = self._parse_csv(self._parse_ordered) 2529 self._match_r_paren() 2530 else: 2531 sorted_by = None 2532 2533 self._match(TokenType.INTO) 2534 buckets = self._parse_number() 2535 self._match_text_seq("BUCKETS") 2536 2537 return self.expression( 2538 exp.ClusteredByProperty, 2539 expressions=expressions, 2540 sorted_by=sorted_by, 2541 buckets=buckets, 2542 ) 2543 2544 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2545 if not self._match_text_seq("GRANTS"): 2546 self._retreat(self._index - 1) 2547 return None 2548 2549 return self.expression(exp.CopyGrantsProperty) 2550 2551 def _parse_freespace(self) -> exp.FreespaceProperty: 2552 self._match(TokenType.EQ) 2553 return self.expression( 2554 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2555 ) 2556 2557 def _parse_mergeblockratio( 2558 self, no: bool = False, default: bool = False 2559 ) -> exp.MergeBlockRatioProperty: 2560 if self._match(TokenType.EQ): 2561 return self.expression( 2562 exp.MergeBlockRatioProperty, 2563 this=self._parse_number(), 2564 percent=self._match(TokenType.PERCENT), 2565 ) 2566 2567 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2568 2569 def _parse_datablocksize( 2570 self, 2571 default: t.Optional[bool] = None, 2572 minimum: t.Optional[bool] = None, 2573 maximum: t.Optional[bool] = None, 2574 ) -> exp.DataBlocksizeProperty: 2575 self._match(TokenType.EQ) 2576 size = self._parse_number() 2577 2578 units = None 2579 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2580 units = self._prev.text 2581 2582 return self.expression( 2583 exp.DataBlocksizeProperty, 2584 size=size, 2585 units=units, 2586 default=default, 2587 minimum=minimum, 2588 maximum=maximum, 2589 ) 2590 2591 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2592 self._match(TokenType.EQ) 2593 always = self._match_text_seq("ALWAYS") 2594 manual = self._match_text_seq("MANUAL") 2595 never = self._match_text_seq("NEVER") 2596 default = self._match_text_seq("DEFAULT") 2597 2598 autotemp = None 2599 if self._match_text_seq("AUTOTEMP"): 2600 autotemp = self._parse_schema() 2601 2602 return self.expression( 2603 exp.BlockCompressionProperty, 2604 always=always, 2605 manual=manual, 2606 never=never, 2607 default=default, 2608 autotemp=autotemp, 2609 ) 2610 2611 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2612 index = self._index 2613 no = self._match_text_seq("NO") 2614 concurrent = self._match_text_seq("CONCURRENT") 2615 2616 if not self._match_text_seq("ISOLATED", "LOADING"): 2617 self._retreat(index) 2618 return None 2619 2620 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2621 return self.expression( 2622 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2623 ) 2624 2625 def _parse_locking(self) -> exp.LockingProperty: 2626 if self._match(TokenType.TABLE): 2627 kind = "TABLE" 2628 elif self._match(TokenType.VIEW): 2629 kind = "VIEW" 2630 elif self._match(TokenType.ROW): 2631 kind = "ROW" 2632 elif self._match_text_seq("DATABASE"): 2633 kind = "DATABASE" 2634 else: 2635 kind = None 2636 2637 if kind in ("DATABASE", "TABLE", "VIEW"): 2638 this = self._parse_table_parts() 2639 else: 2640 this = None 2641 2642 if self._match(TokenType.FOR): 2643 for_or_in = "FOR" 2644 elif self._match(TokenType.IN): 2645 for_or_in = "IN" 2646 else: 2647 for_or_in = None 2648 2649 if self._match_text_seq("ACCESS"): 2650 lock_type = "ACCESS" 2651 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2652 lock_type = "EXCLUSIVE" 2653 elif self._match_text_seq("SHARE"): 2654 lock_type = "SHARE" 2655 elif self._match_text_seq("READ"): 2656 lock_type = "READ" 2657 elif self._match_text_seq("WRITE"): 2658 lock_type = "WRITE" 2659 elif self._match_text_seq("CHECKSUM"): 2660 lock_type = "CHECKSUM" 2661 else: 2662 lock_type = None 2663 2664 override = self._match_text_seq("OVERRIDE") 2665 2666 return self.expression( 2667 exp.LockingProperty, 2668 this=this, 2669 kind=kind, 2670 for_or_in=for_or_in, 2671 lock_type=lock_type, 2672 override=override, 2673 ) 2674 2675 def _parse_partition_by(self) -> t.List[exp.Expression]: 2676 if self._match(TokenType.PARTITION_BY): 2677 return self._parse_csv(self._parse_assignment) 2678 return [] 2679 2680 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2681 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2682 if self._match_text_seq("MINVALUE"): 2683 return exp.var("MINVALUE") 2684 if self._match_text_seq("MAXVALUE"): 2685 return exp.var("MAXVALUE") 2686 return self._parse_bitwise() 2687 2688 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2689 expression = None 2690 from_expressions = None 2691 to_expressions = None 2692 2693 if self._match(TokenType.IN): 2694 this = self._parse_wrapped_csv(self._parse_bitwise) 2695 elif self._match(TokenType.FROM): 2696 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2697 self._match_text_seq("TO") 2698 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2699 elif self._match_text_seq("WITH", "(", "MODULUS"): 2700 this = self._parse_number() 2701 self._match_text_seq(",", "REMAINDER") 2702 expression = self._parse_number() 2703 self._match_r_paren() 2704 else: 2705 self.raise_error("Failed to parse partition bound spec.") 2706 2707 return self.expression( 2708 exp.PartitionBoundSpec, 2709 this=this, 2710 expression=expression, 2711 from_expressions=from_expressions, 2712 to_expressions=to_expressions, 2713 ) 2714 2715 # https://www.postgresql.org/docs/current/sql-createtable.html 2716 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2717 if not self._match_text_seq("OF"): 2718 self._retreat(self._index - 1) 2719 return None 2720 2721 this = self._parse_table(schema=True) 2722 2723 if self._match(TokenType.DEFAULT): 2724 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2725 elif self._match_text_seq("FOR", "VALUES"): 2726 expression = self._parse_partition_bound_spec() 2727 else: 2728 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2729 2730 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2731 2732 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2733 self._match(TokenType.EQ) 2734 return self.expression( 2735 exp.PartitionedByProperty, 2736 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2737 ) 2738 2739 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2740 if self._match_text_seq("AND", "STATISTICS"): 2741 statistics = True 2742 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2743 statistics = False 2744 else: 2745 statistics = None 2746 2747 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2748 2749 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2750 if self._match_text_seq("SQL"): 2751 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2752 return None 2753 2754 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2755 if self._match_text_seq("SQL", "DATA"): 2756 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2757 return None 2758 2759 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2760 if self._match_text_seq("PRIMARY", "INDEX"): 2761 return exp.NoPrimaryIndexProperty() 2762 if self._match_text_seq("SQL"): 2763 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2764 return None 2765 2766 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2767 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2768 return exp.OnCommitProperty() 2769 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2770 return exp.OnCommitProperty(delete=True) 2771 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2772 2773 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2774 if self._match_text_seq("SQL", "DATA"): 2775 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2776 return None 2777 2778 def _parse_distkey(self) -> exp.DistKeyProperty: 2779 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2780 2781 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2782 table = self._parse_table(schema=True) 2783 2784 options = [] 2785 while self._match_texts(("INCLUDING", "EXCLUDING")): 2786 this = self._prev.text.upper() 2787 2788 id_var = self._parse_id_var() 2789 if not id_var: 2790 return None 2791 2792 options.append( 2793 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2794 ) 2795 2796 return self.expression(exp.LikeProperty, this=table, expressions=options) 2797 2798 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2799 return self.expression( 2800 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2801 ) 2802 2803 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2804 self._match(TokenType.EQ) 2805 return self.expression( 2806 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2807 ) 2808 2809 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2810 self._match_text_seq("WITH", "CONNECTION") 2811 return self.expression( 2812 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2813 ) 2814 2815 def _parse_returns(self) -> exp.ReturnsProperty: 2816 value: t.Optional[exp.Expression] 2817 null = None 2818 is_table = self._match(TokenType.TABLE) 2819 2820 if is_table: 2821 if self._match(TokenType.LT): 2822 value = self.expression( 2823 exp.Schema, 2824 this="TABLE", 2825 expressions=self._parse_csv(self._parse_struct_types), 2826 ) 2827 if not self._match(TokenType.GT): 2828 self.raise_error("Expecting >") 2829 else: 2830 value = self._parse_schema(exp.var("TABLE")) 2831 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2832 null = True 2833 value = None 2834 else: 2835 value = self._parse_types() 2836 2837 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2838 2839 def _parse_describe(self) -> exp.Describe: 2840 kind = self._match_set(self.CREATABLES) and self._prev.text 2841 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2842 if self._match(TokenType.DOT): 2843 style = None 2844 self._retreat(self._index - 2) 2845 2846 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2847 2848 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2849 this = self._parse_statement() 2850 else: 2851 this = self._parse_table(schema=True) 2852 2853 properties = self._parse_properties() 2854 expressions = properties.expressions if properties else None 2855 partition = self._parse_partition() 2856 return self.expression( 2857 exp.Describe, 2858 this=this, 2859 style=style, 2860 kind=kind, 2861 expressions=expressions, 2862 partition=partition, 2863 format=format, 2864 ) 2865 2866 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2867 kind = self._prev.text.upper() 2868 expressions = [] 2869 2870 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2871 if self._match(TokenType.WHEN): 2872 expression = self._parse_disjunction() 2873 self._match(TokenType.THEN) 2874 else: 2875 expression = None 2876 2877 else_ = self._match(TokenType.ELSE) 2878 2879 if not self._match(TokenType.INTO): 2880 return None 2881 2882 return self.expression( 2883 exp.ConditionalInsert, 2884 this=self.expression( 2885 exp.Insert, 2886 this=self._parse_table(schema=True), 2887 expression=self._parse_derived_table_values(), 2888 ), 2889 expression=expression, 2890 else_=else_, 2891 ) 2892 2893 expression = parse_conditional_insert() 2894 while expression is not None: 2895 expressions.append(expression) 2896 expression = parse_conditional_insert() 2897 2898 return self.expression( 2899 exp.MultitableInserts, 2900 kind=kind, 2901 comments=comments, 2902 expressions=expressions, 2903 source=self._parse_table(), 2904 ) 2905 2906 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2907 comments = [] 2908 hint = self._parse_hint() 2909 overwrite = self._match(TokenType.OVERWRITE) 2910 ignore = self._match(TokenType.IGNORE) 2911 local = self._match_text_seq("LOCAL") 2912 alternative = None 2913 is_function = None 2914 2915 if self._match_text_seq("DIRECTORY"): 2916 this: t.Optional[exp.Expression] = self.expression( 2917 exp.Directory, 2918 this=self._parse_var_or_string(), 2919 local=local, 2920 row_format=self._parse_row_format(match_row=True), 2921 ) 2922 else: 2923 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2924 comments += ensure_list(self._prev_comments) 2925 return self._parse_multitable_inserts(comments) 2926 2927 if self._match(TokenType.OR): 2928 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2929 2930 self._match(TokenType.INTO) 2931 comments += ensure_list(self._prev_comments) 2932 self._match(TokenType.TABLE) 2933 is_function = self._match(TokenType.FUNCTION) 2934 2935 this = ( 2936 self._parse_table(schema=True, parse_partition=True) 2937 if not is_function 2938 else self._parse_function() 2939 ) 2940 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2941 this.set("alias", self._parse_table_alias()) 2942 2943 returning = self._parse_returning() 2944 2945 return self.expression( 2946 exp.Insert, 2947 comments=comments, 2948 hint=hint, 2949 is_function=is_function, 2950 this=this, 2951 stored=self._match_text_seq("STORED") and self._parse_stored(), 2952 by_name=self._match_text_seq("BY", "NAME"), 2953 exists=self._parse_exists(), 2954 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2955 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2956 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2957 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2958 conflict=self._parse_on_conflict(), 2959 returning=returning or self._parse_returning(), 2960 overwrite=overwrite, 2961 alternative=alternative, 2962 ignore=ignore, 2963 source=self._match(TokenType.TABLE) and self._parse_table(), 2964 ) 2965 2966 def _parse_kill(self) -> exp.Kill: 2967 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2968 2969 return self.expression( 2970 exp.Kill, 2971 this=self._parse_primary(), 2972 kind=kind, 2973 ) 2974 2975 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2976 conflict = self._match_text_seq("ON", "CONFLICT") 2977 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2978 2979 if not conflict and not duplicate: 2980 return None 2981 2982 conflict_keys = None 2983 constraint = None 2984 2985 if conflict: 2986 if self._match_text_seq("ON", "CONSTRAINT"): 2987 constraint = self._parse_id_var() 2988 elif self._match(TokenType.L_PAREN): 2989 conflict_keys = self._parse_csv(self._parse_id_var) 2990 self._match_r_paren() 2991 2992 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2993 if self._prev.token_type == TokenType.UPDATE: 2994 self._match(TokenType.SET) 2995 expressions = self._parse_csv(self._parse_equality) 2996 else: 2997 expressions = None 2998 2999 return self.expression( 3000 exp.OnConflict, 3001 duplicate=duplicate, 3002 expressions=expressions, 3003 action=action, 3004 conflict_keys=conflict_keys, 3005 constraint=constraint, 3006 where=self._parse_where(), 3007 ) 3008 3009 def _parse_returning(self) -> t.Optional[exp.Returning]: 3010 if not self._match(TokenType.RETURNING): 3011 return None 3012 return self.expression( 3013 exp.Returning, 3014 expressions=self._parse_csv(self._parse_expression), 3015 into=self._match(TokenType.INTO) and self._parse_table_part(), 3016 ) 3017 3018 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3019 if not self._match(TokenType.FORMAT): 3020 return None 3021 return self._parse_row_format() 3022 3023 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3024 index = self._index 3025 with_ = with_ or self._match_text_seq("WITH") 3026 3027 if not self._match(TokenType.SERDE_PROPERTIES): 3028 self._retreat(index) 3029 return None 3030 return self.expression( 3031 exp.SerdeProperties, 3032 **{ # type: ignore 3033 "expressions": self._parse_wrapped_properties(), 3034 "with": with_, 3035 }, 3036 ) 3037 3038 def _parse_row_format( 3039 self, match_row: bool = False 3040 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3041 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3042 return None 3043 3044 if self._match_text_seq("SERDE"): 3045 this = self._parse_string() 3046 3047 serde_properties = self._parse_serde_properties() 3048 3049 return self.expression( 3050 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3051 ) 3052 3053 self._match_text_seq("DELIMITED") 3054 3055 kwargs = {} 3056 3057 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3058 kwargs["fields"] = self._parse_string() 3059 if self._match_text_seq("ESCAPED", "BY"): 3060 kwargs["escaped"] = self._parse_string() 3061 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3062 kwargs["collection_items"] = self._parse_string() 3063 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3064 kwargs["map_keys"] = self._parse_string() 3065 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3066 kwargs["lines"] = self._parse_string() 3067 if self._match_text_seq("NULL", "DEFINED", "AS"): 3068 kwargs["null"] = self._parse_string() 3069 3070 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3071 3072 def _parse_load(self) -> exp.LoadData | exp.Command: 3073 if self._match_text_seq("DATA"): 3074 local = self._match_text_seq("LOCAL") 3075 self._match_text_seq("INPATH") 3076 inpath = self._parse_string() 3077 overwrite = self._match(TokenType.OVERWRITE) 3078 self._match_pair(TokenType.INTO, TokenType.TABLE) 3079 3080 return self.expression( 3081 exp.LoadData, 3082 this=self._parse_table(schema=True), 3083 local=local, 3084 overwrite=overwrite, 3085 inpath=inpath, 3086 partition=self._parse_partition(), 3087 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3088 serde=self._match_text_seq("SERDE") and self._parse_string(), 3089 ) 3090 return self._parse_as_command(self._prev) 3091 3092 def _parse_delete(self) -> exp.Delete: 3093 # This handles MySQL's "Multiple-Table Syntax" 3094 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3095 tables = None 3096 if not self._match(TokenType.FROM, advance=False): 3097 tables = self._parse_csv(self._parse_table) or None 3098 3099 returning = self._parse_returning() 3100 3101 return self.expression( 3102 exp.Delete, 3103 tables=tables, 3104 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3105 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3106 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3107 where=self._parse_where(), 3108 returning=returning or self._parse_returning(), 3109 limit=self._parse_limit(), 3110 ) 3111 3112 def _parse_update(self) -> exp.Update: 3113 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3114 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3115 returning = self._parse_returning() 3116 return self.expression( 3117 exp.Update, 3118 **{ # type: ignore 3119 "this": this, 3120 "expressions": expressions, 3121 "from": self._parse_from(joins=True), 3122 "where": self._parse_where(), 3123 "returning": returning or self._parse_returning(), 3124 "order": self._parse_order(), 3125 "limit": self._parse_limit(), 3126 }, 3127 ) 3128 3129 def _parse_use(self) -> exp.Use: 3130 return self.expression( 3131 exp.Use, 3132 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3133 this=self._parse_table(schema=False), 3134 ) 3135 3136 def _parse_uncache(self) -> exp.Uncache: 3137 if not self._match(TokenType.TABLE): 3138 self.raise_error("Expecting TABLE after UNCACHE") 3139 3140 return self.expression( 3141 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3142 ) 3143 3144 def _parse_cache(self) -> exp.Cache: 3145 lazy = self._match_text_seq("LAZY") 3146 self._match(TokenType.TABLE) 3147 table = self._parse_table(schema=True) 3148 3149 options = [] 3150 if self._match_text_seq("OPTIONS"): 3151 self._match_l_paren() 3152 k = self._parse_string() 3153 self._match(TokenType.EQ) 3154 v = self._parse_string() 3155 options = [k, v] 3156 self._match_r_paren() 3157 3158 self._match(TokenType.ALIAS) 3159 return self.expression( 3160 exp.Cache, 3161 this=table, 3162 lazy=lazy, 3163 options=options, 3164 expression=self._parse_select(nested=True), 3165 ) 3166 3167 def _parse_partition(self) -> t.Optional[exp.Partition]: 3168 if not self._match_texts(self.PARTITION_KEYWORDS): 3169 return None 3170 3171 return self.expression( 3172 exp.Partition, 3173 subpartition=self._prev.text.upper() == "SUBPARTITION", 3174 expressions=self._parse_wrapped_csv(self._parse_assignment), 3175 ) 3176 3177 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3178 def _parse_value_expression() -> t.Optional[exp.Expression]: 3179 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3180 return exp.var(self._prev.text.upper()) 3181 return self._parse_expression() 3182 3183 if self._match(TokenType.L_PAREN): 3184 expressions = self._parse_csv(_parse_value_expression) 3185 self._match_r_paren() 3186 return self.expression(exp.Tuple, expressions=expressions) 3187 3188 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3189 expression = self._parse_expression() 3190 if expression: 3191 return self.expression(exp.Tuple, expressions=[expression]) 3192 return None 3193 3194 def _parse_projections(self) -> t.List[exp.Expression]: 3195 return self._parse_expressions() 3196 3197 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3198 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3199 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3200 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3201 ) 3202 elif self._match(TokenType.FROM): 3203 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3204 # Support parentheses for duckdb FROM-first syntax 3205 select = self._parse_select() 3206 if select: 3207 select.set("from", from_) 3208 this = select 3209 else: 3210 this = exp.select("*").from_(t.cast(exp.From, from_)) 3211 else: 3212 this = ( 3213 self._parse_table(consume_pipe=True) 3214 if table 3215 else self._parse_select(nested=True, parse_set_operation=False) 3216 ) 3217 3218 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3219 # in case a modifier (e.g. join) is following 3220 if table and isinstance(this, exp.Values) and this.alias: 3221 alias = this.args["alias"].pop() 3222 this = exp.Table(this=this, alias=alias) 3223 3224 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3225 3226 return this 3227 3228 def _parse_select( 3229 self, 3230 nested: bool = False, 3231 table: bool = False, 3232 parse_subquery_alias: bool = True, 3233 parse_set_operation: bool = True, 3234 consume_pipe: bool = True, 3235 ) -> t.Optional[exp.Expression]: 3236 query = self._parse_select_query( 3237 nested=nested, 3238 table=table, 3239 parse_subquery_alias=parse_subquery_alias, 3240 parse_set_operation=parse_set_operation, 3241 ) 3242 3243 if ( 3244 consume_pipe 3245 and self._match(TokenType.PIPE_GT, advance=False) 3246 and isinstance(query, exp.Query) 3247 ): 3248 query = self._parse_pipe_syntax_query(query) 3249 query = query.subquery(copy=False) if query and table else query 3250 3251 return query 3252 3253 def _parse_select_query( 3254 self, 3255 nested: bool = False, 3256 table: bool = False, 3257 parse_subquery_alias: bool = True, 3258 parse_set_operation: bool = True, 3259 ) -> t.Optional[exp.Expression]: 3260 cte = self._parse_with() 3261 3262 if cte: 3263 this = self._parse_statement() 3264 3265 if not this: 3266 self.raise_error("Failed to parse any statement following CTE") 3267 return cte 3268 3269 if "with" in this.arg_types: 3270 this.set("with", cte) 3271 else: 3272 self.raise_error(f"{this.key} does not support CTE") 3273 this = cte 3274 3275 return this 3276 3277 # duckdb supports leading with FROM x 3278 from_ = ( 3279 self._parse_from(consume_pipe=True) 3280 if self._match(TokenType.FROM, advance=False) 3281 else None 3282 ) 3283 3284 if self._match(TokenType.SELECT): 3285 comments = self._prev_comments 3286 3287 hint = self._parse_hint() 3288 3289 if self._next and not self._next.token_type == TokenType.DOT: 3290 all_ = self._match(TokenType.ALL) 3291 distinct = self._match_set(self.DISTINCT_TOKENS) 3292 else: 3293 all_, distinct = None, None 3294 3295 kind = ( 3296 self._match(TokenType.ALIAS) 3297 and self._match_texts(("STRUCT", "VALUE")) 3298 and self._prev.text.upper() 3299 ) 3300 3301 if distinct: 3302 distinct = self.expression( 3303 exp.Distinct, 3304 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3305 ) 3306 3307 if all_ and distinct: 3308 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3309 3310 operation_modifiers = [] 3311 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3312 operation_modifiers.append(exp.var(self._prev.text.upper())) 3313 3314 limit = self._parse_limit(top=True) 3315 projections = self._parse_projections() 3316 3317 this = self.expression( 3318 exp.Select, 3319 kind=kind, 3320 hint=hint, 3321 distinct=distinct, 3322 expressions=projections, 3323 limit=limit, 3324 operation_modifiers=operation_modifiers or None, 3325 ) 3326 this.comments = comments 3327 3328 into = self._parse_into() 3329 if into: 3330 this.set("into", into) 3331 3332 if not from_: 3333 from_ = self._parse_from() 3334 3335 if from_: 3336 this.set("from", from_) 3337 3338 this = self._parse_query_modifiers(this) 3339 elif (table or nested) and self._match(TokenType.L_PAREN): 3340 this = self._parse_wrapped_select(table=table) 3341 3342 # We return early here so that the UNION isn't attached to the subquery by the 3343 # following call to _parse_set_operations, but instead becomes the parent node 3344 self._match_r_paren() 3345 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3346 elif self._match(TokenType.VALUES, advance=False): 3347 this = self._parse_derived_table_values() 3348 elif from_: 3349 this = exp.select("*").from_(from_.this, copy=False) 3350 elif self._match(TokenType.SUMMARIZE): 3351 table = self._match(TokenType.TABLE) 3352 this = self._parse_select() or self._parse_string() or self._parse_table() 3353 return self.expression(exp.Summarize, this=this, table=table) 3354 elif self._match(TokenType.DESCRIBE): 3355 this = self._parse_describe() 3356 elif self._match_text_seq("STREAM"): 3357 this = self._parse_function() 3358 if this: 3359 this = self.expression(exp.Stream, this=this) 3360 else: 3361 self._retreat(self._index - 1) 3362 else: 3363 this = None 3364 3365 return self._parse_set_operations(this) if parse_set_operation else this 3366 3367 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3368 self._match_text_seq("SEARCH") 3369 3370 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3371 3372 if not kind: 3373 return None 3374 3375 self._match_text_seq("FIRST", "BY") 3376 3377 return self.expression( 3378 exp.RecursiveWithSearch, 3379 kind=kind, 3380 this=self._parse_id_var(), 3381 expression=self._match_text_seq("SET") and self._parse_id_var(), 3382 using=self._match_text_seq("USING") and self._parse_id_var(), 3383 ) 3384 3385 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3386 if not skip_with_token and not self._match(TokenType.WITH): 3387 return None 3388 3389 comments = self._prev_comments 3390 recursive = self._match(TokenType.RECURSIVE) 3391 3392 last_comments = None 3393 expressions = [] 3394 while True: 3395 cte = self._parse_cte() 3396 if isinstance(cte, exp.CTE): 3397 expressions.append(cte) 3398 if last_comments: 3399 cte.add_comments(last_comments) 3400 3401 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3402 break 3403 else: 3404 self._match(TokenType.WITH) 3405 3406 last_comments = self._prev_comments 3407 3408 return self.expression( 3409 exp.With, 3410 comments=comments, 3411 expressions=expressions, 3412 recursive=recursive, 3413 search=self._parse_recursive_with_search(), 3414 ) 3415 3416 def _parse_cte(self) -> t.Optional[exp.CTE]: 3417 index = self._index 3418 3419 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3420 if not alias or not alias.this: 3421 self.raise_error("Expected CTE to have alias") 3422 3423 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3424 self._retreat(index) 3425 return None 3426 3427 comments = self._prev_comments 3428 3429 if self._match_text_seq("NOT", "MATERIALIZED"): 3430 materialized = False 3431 elif self._match_text_seq("MATERIALIZED"): 3432 materialized = True 3433 else: 3434 materialized = None 3435 3436 cte = self.expression( 3437 exp.CTE, 3438 this=self._parse_wrapped(self._parse_statement), 3439 alias=alias, 3440 materialized=materialized, 3441 comments=comments, 3442 ) 3443 3444 values = cte.this 3445 if isinstance(values, exp.Values): 3446 if values.alias: 3447 cte.set("this", exp.select("*").from_(values)) 3448 else: 3449 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3450 3451 return cte 3452 3453 def _parse_table_alias( 3454 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3455 ) -> t.Optional[exp.TableAlias]: 3456 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3457 # so this section tries to parse the clause version and if it fails, it treats the token 3458 # as an identifier (alias) 3459 if self._can_parse_limit_or_offset(): 3460 return None 3461 3462 any_token = self._match(TokenType.ALIAS) 3463 alias = ( 3464 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3465 or self._parse_string_as_identifier() 3466 ) 3467 3468 index = self._index 3469 if self._match(TokenType.L_PAREN): 3470 columns = self._parse_csv(self._parse_function_parameter) 3471 self._match_r_paren() if columns else self._retreat(index) 3472 else: 3473 columns = None 3474 3475 if not alias and not columns: 3476 return None 3477 3478 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3479 3480 # We bubble up comments from the Identifier to the TableAlias 3481 if isinstance(alias, exp.Identifier): 3482 table_alias.add_comments(alias.pop_comments()) 3483 3484 return table_alias 3485 3486 def _parse_subquery( 3487 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3488 ) -> t.Optional[exp.Subquery]: 3489 if not this: 3490 return None 3491 3492 return self.expression( 3493 exp.Subquery, 3494 this=this, 3495 pivots=self._parse_pivots(), 3496 alias=self._parse_table_alias() if parse_alias else None, 3497 sample=self._parse_table_sample(), 3498 ) 3499 3500 def _implicit_unnests_to_explicit(self, this: E) -> E: 3501 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3502 3503 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3504 for i, join in enumerate(this.args.get("joins") or []): 3505 table = join.this 3506 normalized_table = table.copy() 3507 normalized_table.meta["maybe_column"] = True 3508 normalized_table = _norm(normalized_table, dialect=self.dialect) 3509 3510 if isinstance(table, exp.Table) and not join.args.get("on"): 3511 if normalized_table.parts[0].name in refs: 3512 table_as_column = table.to_column() 3513 unnest = exp.Unnest(expressions=[table_as_column]) 3514 3515 # Table.to_column creates a parent Alias node that we want to convert to 3516 # a TableAlias and attach to the Unnest, so it matches the parser's output 3517 if isinstance(table.args.get("alias"), exp.TableAlias): 3518 table_as_column.replace(table_as_column.this) 3519 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3520 3521 table.replace(unnest) 3522 3523 refs.add(normalized_table.alias_or_name) 3524 3525 return this 3526 3527 def _parse_query_modifiers( 3528 self, this: t.Optional[exp.Expression] 3529 ) -> t.Optional[exp.Expression]: 3530 if isinstance(this, self.MODIFIABLES): 3531 for join in self._parse_joins(): 3532 this.append("joins", join) 3533 for lateral in iter(self._parse_lateral, None): 3534 this.append("laterals", lateral) 3535 3536 while True: 3537 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3538 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3539 key, expression = parser(self) 3540 3541 if expression: 3542 this.set(key, expression) 3543 if key == "limit": 3544 offset = expression.args.pop("offset", None) 3545 3546 if offset: 3547 offset = exp.Offset(expression=offset) 3548 this.set("offset", offset) 3549 3550 limit_by_expressions = expression.expressions 3551 expression.set("expressions", None) 3552 offset.set("expressions", limit_by_expressions) 3553 continue 3554 break 3555 3556 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3557 this = self._implicit_unnests_to_explicit(this) 3558 3559 return this 3560 3561 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3562 start = self._curr 3563 while self._curr: 3564 self._advance() 3565 3566 end = self._tokens[self._index - 1] 3567 return exp.Hint(expressions=[self._find_sql(start, end)]) 3568 3569 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3570 return self._parse_function_call() 3571 3572 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3573 start_index = self._index 3574 should_fallback_to_string = False 3575 3576 hints = [] 3577 try: 3578 for hint in iter( 3579 lambda: self._parse_csv( 3580 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3581 ), 3582 [], 3583 ): 3584 hints.extend(hint) 3585 except ParseError: 3586 should_fallback_to_string = True 3587 3588 if should_fallback_to_string or self._curr: 3589 self._retreat(start_index) 3590 return self._parse_hint_fallback_to_string() 3591 3592 return self.expression(exp.Hint, expressions=hints) 3593 3594 def _parse_hint(self) -> t.Optional[exp.Hint]: 3595 if self._match(TokenType.HINT) and self._prev_comments: 3596 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3597 3598 return None 3599 3600 def _parse_into(self) -> t.Optional[exp.Into]: 3601 if not self._match(TokenType.INTO): 3602 return None 3603 3604 temp = self._match(TokenType.TEMPORARY) 3605 unlogged = self._match_text_seq("UNLOGGED") 3606 self._match(TokenType.TABLE) 3607 3608 return self.expression( 3609 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3610 ) 3611 3612 def _parse_from( 3613 self, 3614 joins: bool = False, 3615 skip_from_token: bool = False, 3616 consume_pipe: bool = False, 3617 ) -> t.Optional[exp.From]: 3618 if not skip_from_token and not self._match(TokenType.FROM): 3619 return None 3620 3621 return self.expression( 3622 exp.From, 3623 comments=self._prev_comments, 3624 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3625 ) 3626 3627 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3628 return self.expression( 3629 exp.MatchRecognizeMeasure, 3630 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3631 this=self._parse_expression(), 3632 ) 3633 3634 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3635 if not self._match(TokenType.MATCH_RECOGNIZE): 3636 return None 3637 3638 self._match_l_paren() 3639 3640 partition = self._parse_partition_by() 3641 order = self._parse_order() 3642 3643 measures = ( 3644 self._parse_csv(self._parse_match_recognize_measure) 3645 if self._match_text_seq("MEASURES") 3646 else None 3647 ) 3648 3649 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3650 rows = exp.var("ONE ROW PER MATCH") 3651 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3652 text = "ALL ROWS PER MATCH" 3653 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3654 text += " SHOW EMPTY MATCHES" 3655 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3656 text += " OMIT EMPTY MATCHES" 3657 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3658 text += " WITH UNMATCHED ROWS" 3659 rows = exp.var(text) 3660 else: 3661 rows = None 3662 3663 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3664 text = "AFTER MATCH SKIP" 3665 if self._match_text_seq("PAST", "LAST", "ROW"): 3666 text += " PAST LAST ROW" 3667 elif self._match_text_seq("TO", "NEXT", "ROW"): 3668 text += " TO NEXT ROW" 3669 elif self._match_text_seq("TO", "FIRST"): 3670 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3671 elif self._match_text_seq("TO", "LAST"): 3672 text += f" TO LAST {self._advance_any().text}" # type: ignore 3673 after = exp.var(text) 3674 else: 3675 after = None 3676 3677 if self._match_text_seq("PATTERN"): 3678 self._match_l_paren() 3679 3680 if not self._curr: 3681 self.raise_error("Expecting )", self._curr) 3682 3683 paren = 1 3684 start = self._curr 3685 3686 while self._curr and paren > 0: 3687 if self._curr.token_type == TokenType.L_PAREN: 3688 paren += 1 3689 if self._curr.token_type == TokenType.R_PAREN: 3690 paren -= 1 3691 3692 end = self._prev 3693 self._advance() 3694 3695 if paren > 0: 3696 self.raise_error("Expecting )", self._curr) 3697 3698 pattern = exp.var(self._find_sql(start, end)) 3699 else: 3700 pattern = None 3701 3702 define = ( 3703 self._parse_csv(self._parse_name_as_expression) 3704 if self._match_text_seq("DEFINE") 3705 else None 3706 ) 3707 3708 self._match_r_paren() 3709 3710 return self.expression( 3711 exp.MatchRecognize, 3712 partition_by=partition, 3713 order=order, 3714 measures=measures, 3715 rows=rows, 3716 after=after, 3717 pattern=pattern, 3718 define=define, 3719 alias=self._parse_table_alias(), 3720 ) 3721 3722 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3723 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3724 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3725 cross_apply = False 3726 3727 if cross_apply is not None: 3728 this = self._parse_select(table=True) 3729 view = None 3730 outer = None 3731 elif self._match(TokenType.LATERAL): 3732 this = self._parse_select(table=True) 3733 view = self._match(TokenType.VIEW) 3734 outer = self._match(TokenType.OUTER) 3735 else: 3736 return None 3737 3738 if not this: 3739 this = ( 3740 self._parse_unnest() 3741 or self._parse_function() 3742 or self._parse_id_var(any_token=False) 3743 ) 3744 3745 while self._match(TokenType.DOT): 3746 this = exp.Dot( 3747 this=this, 3748 expression=self._parse_function() or self._parse_id_var(any_token=False), 3749 ) 3750 3751 ordinality: t.Optional[bool] = None 3752 3753 if view: 3754 table = self._parse_id_var(any_token=False) 3755 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3756 table_alias: t.Optional[exp.TableAlias] = self.expression( 3757 exp.TableAlias, this=table, columns=columns 3758 ) 3759 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3760 # We move the alias from the lateral's child node to the lateral itself 3761 table_alias = this.args["alias"].pop() 3762 else: 3763 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3764 table_alias = self._parse_table_alias() 3765 3766 return self.expression( 3767 exp.Lateral, 3768 this=this, 3769 view=view, 3770 outer=outer, 3771 alias=table_alias, 3772 cross_apply=cross_apply, 3773 ordinality=ordinality, 3774 ) 3775 3776 def _parse_join_parts( 3777 self, 3778 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3779 return ( 3780 self._match_set(self.JOIN_METHODS) and self._prev, 3781 self._match_set(self.JOIN_SIDES) and self._prev, 3782 self._match_set(self.JOIN_KINDS) and self._prev, 3783 ) 3784 3785 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3786 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3787 this = self._parse_column() 3788 if isinstance(this, exp.Column): 3789 return this.this 3790 return this 3791 3792 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3793 3794 def _parse_join( 3795 self, skip_join_token: bool = False, parse_bracket: bool = False 3796 ) -> t.Optional[exp.Join]: 3797 if self._match(TokenType.COMMA): 3798 table = self._try_parse(self._parse_table) 3799 cross_join = self.expression(exp.Join, this=table) if table else None 3800 3801 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3802 cross_join.set("kind", "CROSS") 3803 3804 return cross_join 3805 3806 index = self._index 3807 method, side, kind = self._parse_join_parts() 3808 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3809 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3810 join_comments = self._prev_comments 3811 3812 if not skip_join_token and not join: 3813 self._retreat(index) 3814 kind = None 3815 method = None 3816 side = None 3817 3818 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3819 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3820 3821 if not skip_join_token and not join and not outer_apply and not cross_apply: 3822 return None 3823 3824 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3825 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3826 kwargs["expressions"] = self._parse_csv( 3827 lambda: self._parse_table(parse_bracket=parse_bracket) 3828 ) 3829 3830 if method: 3831 kwargs["method"] = method.text 3832 if side: 3833 kwargs["side"] = side.text 3834 if kind: 3835 kwargs["kind"] = kind.text 3836 if hint: 3837 kwargs["hint"] = hint 3838 3839 if self._match(TokenType.MATCH_CONDITION): 3840 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3841 3842 if self._match(TokenType.ON): 3843 kwargs["on"] = self._parse_assignment() 3844 elif self._match(TokenType.USING): 3845 kwargs["using"] = self._parse_using_identifiers() 3846 elif ( 3847 not method 3848 and not (outer_apply or cross_apply) 3849 and not isinstance(kwargs["this"], exp.Unnest) 3850 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3851 ): 3852 index = self._index 3853 joins: t.Optional[list] = list(self._parse_joins()) 3854 3855 if joins and self._match(TokenType.ON): 3856 kwargs["on"] = self._parse_assignment() 3857 elif joins and self._match(TokenType.USING): 3858 kwargs["using"] = self._parse_using_identifiers() 3859 else: 3860 joins = None 3861 self._retreat(index) 3862 3863 kwargs["this"].set("joins", joins if joins else None) 3864 3865 kwargs["pivots"] = self._parse_pivots() 3866 3867 comments = [c for token in (method, side, kind) if token for c in token.comments] 3868 comments = (join_comments or []) + comments 3869 return self.expression(exp.Join, comments=comments, **kwargs) 3870 3871 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3872 this = self._parse_assignment() 3873 3874 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3875 return this 3876 3877 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3878 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3879 3880 return this 3881 3882 def _parse_index_params(self) -> exp.IndexParameters: 3883 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3884 3885 if self._match(TokenType.L_PAREN, advance=False): 3886 columns = self._parse_wrapped_csv(self._parse_with_operator) 3887 else: 3888 columns = None 3889 3890 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3891 partition_by = self._parse_partition_by() 3892 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3893 tablespace = ( 3894 self._parse_var(any_token=True) 3895 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3896 else None 3897 ) 3898 where = self._parse_where() 3899 3900 on = self._parse_field() if self._match(TokenType.ON) else None 3901 3902 return self.expression( 3903 exp.IndexParameters, 3904 using=using, 3905 columns=columns, 3906 include=include, 3907 partition_by=partition_by, 3908 where=where, 3909 with_storage=with_storage, 3910 tablespace=tablespace, 3911 on=on, 3912 ) 3913 3914 def _parse_index( 3915 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3916 ) -> t.Optional[exp.Index]: 3917 if index or anonymous: 3918 unique = None 3919 primary = None 3920 amp = None 3921 3922 self._match(TokenType.ON) 3923 self._match(TokenType.TABLE) # hive 3924 table = self._parse_table_parts(schema=True) 3925 else: 3926 unique = self._match(TokenType.UNIQUE) 3927 primary = self._match_text_seq("PRIMARY") 3928 amp = self._match_text_seq("AMP") 3929 3930 if not self._match(TokenType.INDEX): 3931 return None 3932 3933 index = self._parse_id_var() 3934 table = None 3935 3936 params = self._parse_index_params() 3937 3938 return self.expression( 3939 exp.Index, 3940 this=index, 3941 table=table, 3942 unique=unique, 3943 primary=primary, 3944 amp=amp, 3945 params=params, 3946 ) 3947 3948 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3949 hints: t.List[exp.Expression] = [] 3950 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3951 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3952 hints.append( 3953 self.expression( 3954 exp.WithTableHint, 3955 expressions=self._parse_csv( 3956 lambda: self._parse_function() or self._parse_var(any_token=True) 3957 ), 3958 ) 3959 ) 3960 self._match_r_paren() 3961 else: 3962 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3963 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3964 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3965 3966 self._match_set((TokenType.INDEX, TokenType.KEY)) 3967 if self._match(TokenType.FOR): 3968 hint.set("target", self._advance_any() and self._prev.text.upper()) 3969 3970 hint.set("expressions", self._parse_wrapped_id_vars()) 3971 hints.append(hint) 3972 3973 return hints or None 3974 3975 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3976 return ( 3977 (not schema and self._parse_function(optional_parens=False)) 3978 or self._parse_id_var(any_token=False) 3979 or self._parse_string_as_identifier() 3980 or self._parse_placeholder() 3981 ) 3982 3983 def _parse_table_parts( 3984 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3985 ) -> exp.Table: 3986 catalog = None 3987 db = None 3988 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3989 3990 while self._match(TokenType.DOT): 3991 if catalog: 3992 # This allows nesting the table in arbitrarily many dot expressions if needed 3993 table = self.expression( 3994 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3995 ) 3996 else: 3997 catalog = db 3998 db = table 3999 # "" used for tsql FROM a..b case 4000 table = self._parse_table_part(schema=schema) or "" 4001 4002 if ( 4003 wildcard 4004 and self._is_connected() 4005 and (isinstance(table, exp.Identifier) or not table) 4006 and self._match(TokenType.STAR) 4007 ): 4008 if isinstance(table, exp.Identifier): 4009 table.args["this"] += "*" 4010 else: 4011 table = exp.Identifier(this="*") 4012 4013 # We bubble up comments from the Identifier to the Table 4014 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4015 4016 if is_db_reference: 4017 catalog = db 4018 db = table 4019 table = None 4020 4021 if not table and not is_db_reference: 4022 self.raise_error(f"Expected table name but got {self._curr}") 4023 if not db and is_db_reference: 4024 self.raise_error(f"Expected database name but got {self._curr}") 4025 4026 table = self.expression( 4027 exp.Table, 4028 comments=comments, 4029 this=table, 4030 db=db, 4031 catalog=catalog, 4032 ) 4033 4034 changes = self._parse_changes() 4035 if changes: 4036 table.set("changes", changes) 4037 4038 at_before = self._parse_historical_data() 4039 if at_before: 4040 table.set("when", at_before) 4041 4042 pivots = self._parse_pivots() 4043 if pivots: 4044 table.set("pivots", pivots) 4045 4046 return table 4047 4048 def _parse_table( 4049 self, 4050 schema: bool = False, 4051 joins: bool = False, 4052 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4053 parse_bracket: bool = False, 4054 is_db_reference: bool = False, 4055 parse_partition: bool = False, 4056 consume_pipe: bool = False, 4057 ) -> t.Optional[exp.Expression]: 4058 lateral = self._parse_lateral() 4059 if lateral: 4060 return lateral 4061 4062 unnest = self._parse_unnest() 4063 if unnest: 4064 return unnest 4065 4066 values = self._parse_derived_table_values() 4067 if values: 4068 return values 4069 4070 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4071 if subquery: 4072 if not subquery.args.get("pivots"): 4073 subquery.set("pivots", self._parse_pivots()) 4074 return subquery 4075 4076 bracket = parse_bracket and self._parse_bracket(None) 4077 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4078 4079 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4080 self._parse_table 4081 ) 4082 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4083 4084 only = self._match(TokenType.ONLY) 4085 4086 this = t.cast( 4087 exp.Expression, 4088 bracket 4089 or rows_from 4090 or self._parse_bracket( 4091 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4092 ), 4093 ) 4094 4095 if only: 4096 this.set("only", only) 4097 4098 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4099 self._match_text_seq("*") 4100 4101 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4102 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4103 this.set("partition", self._parse_partition()) 4104 4105 if schema: 4106 return self._parse_schema(this=this) 4107 4108 version = self._parse_version() 4109 4110 if version: 4111 this.set("version", version) 4112 4113 if self.dialect.ALIAS_POST_TABLESAMPLE: 4114 this.set("sample", self._parse_table_sample()) 4115 4116 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4117 if alias: 4118 this.set("alias", alias) 4119 4120 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4121 return self.expression( 4122 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4123 ) 4124 4125 this.set("hints", self._parse_table_hints()) 4126 4127 if not this.args.get("pivots"): 4128 this.set("pivots", self._parse_pivots()) 4129 4130 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4131 this.set("sample", self._parse_table_sample()) 4132 4133 if joins: 4134 for join in self._parse_joins(): 4135 this.append("joins", join) 4136 4137 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4138 this.set("ordinality", True) 4139 this.set("alias", self._parse_table_alias()) 4140 4141 return this 4142 4143 def _parse_version(self) -> t.Optional[exp.Version]: 4144 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4145 this = "TIMESTAMP" 4146 elif self._match(TokenType.VERSION_SNAPSHOT): 4147 this = "VERSION" 4148 else: 4149 return None 4150 4151 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4152 kind = self._prev.text.upper() 4153 start = self._parse_bitwise() 4154 self._match_texts(("TO", "AND")) 4155 end = self._parse_bitwise() 4156 expression: t.Optional[exp.Expression] = self.expression( 4157 exp.Tuple, expressions=[start, end] 4158 ) 4159 elif self._match_text_seq("CONTAINED", "IN"): 4160 kind = "CONTAINED IN" 4161 expression = self.expression( 4162 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4163 ) 4164 elif self._match(TokenType.ALL): 4165 kind = "ALL" 4166 expression = None 4167 else: 4168 self._match_text_seq("AS", "OF") 4169 kind = "AS OF" 4170 expression = self._parse_type() 4171 4172 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4173 4174 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4175 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4176 index = self._index 4177 historical_data = None 4178 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4179 this = self._prev.text.upper() 4180 kind = ( 4181 self._match(TokenType.L_PAREN) 4182 and self._match_texts(self.HISTORICAL_DATA_KIND) 4183 and self._prev.text.upper() 4184 ) 4185 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4186 4187 if expression: 4188 self._match_r_paren() 4189 historical_data = self.expression( 4190 exp.HistoricalData, this=this, kind=kind, expression=expression 4191 ) 4192 else: 4193 self._retreat(index) 4194 4195 return historical_data 4196 4197 def _parse_changes(self) -> t.Optional[exp.Changes]: 4198 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4199 return None 4200 4201 information = self._parse_var(any_token=True) 4202 self._match_r_paren() 4203 4204 return self.expression( 4205 exp.Changes, 4206 information=information, 4207 at_before=self._parse_historical_data(), 4208 end=self._parse_historical_data(), 4209 ) 4210 4211 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4212 if not self._match(TokenType.UNNEST): 4213 return None 4214 4215 expressions = self._parse_wrapped_csv(self._parse_equality) 4216 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4217 4218 alias = self._parse_table_alias() if with_alias else None 4219 4220 if alias: 4221 if self.dialect.UNNEST_COLUMN_ONLY: 4222 if alias.args.get("columns"): 4223 self.raise_error("Unexpected extra column alias in unnest.") 4224 4225 alias.set("columns", [alias.this]) 4226 alias.set("this", None) 4227 4228 columns = alias.args.get("columns") or [] 4229 if offset and len(expressions) < len(columns): 4230 offset = columns.pop() 4231 4232 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4233 self._match(TokenType.ALIAS) 4234 offset = self._parse_id_var( 4235 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4236 ) or exp.to_identifier("offset") 4237 4238 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4239 4240 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4241 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4242 if not is_derived and not ( 4243 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4244 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4245 ): 4246 return None 4247 4248 expressions = self._parse_csv(self._parse_value) 4249 alias = self._parse_table_alias() 4250 4251 if is_derived: 4252 self._match_r_paren() 4253 4254 return self.expression( 4255 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4256 ) 4257 4258 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4259 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4260 as_modifier and self._match_text_seq("USING", "SAMPLE") 4261 ): 4262 return None 4263 4264 bucket_numerator = None 4265 bucket_denominator = None 4266 bucket_field = None 4267 percent = None 4268 size = None 4269 seed = None 4270 4271 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4272 matched_l_paren = self._match(TokenType.L_PAREN) 4273 4274 if self.TABLESAMPLE_CSV: 4275 num = None 4276 expressions = self._parse_csv(self._parse_primary) 4277 else: 4278 expressions = None 4279 num = ( 4280 self._parse_factor() 4281 if self._match(TokenType.NUMBER, advance=False) 4282 else self._parse_primary() or self._parse_placeholder() 4283 ) 4284 4285 if self._match_text_seq("BUCKET"): 4286 bucket_numerator = self._parse_number() 4287 self._match_text_seq("OUT", "OF") 4288 bucket_denominator = bucket_denominator = self._parse_number() 4289 self._match(TokenType.ON) 4290 bucket_field = self._parse_field() 4291 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4292 percent = num 4293 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4294 size = num 4295 else: 4296 percent = num 4297 4298 if matched_l_paren: 4299 self._match_r_paren() 4300 4301 if self._match(TokenType.L_PAREN): 4302 method = self._parse_var(upper=True) 4303 seed = self._match(TokenType.COMMA) and self._parse_number() 4304 self._match_r_paren() 4305 elif self._match_texts(("SEED", "REPEATABLE")): 4306 seed = self._parse_wrapped(self._parse_number) 4307 4308 if not method and self.DEFAULT_SAMPLING_METHOD: 4309 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4310 4311 return self.expression( 4312 exp.TableSample, 4313 expressions=expressions, 4314 method=method, 4315 bucket_numerator=bucket_numerator, 4316 bucket_denominator=bucket_denominator, 4317 bucket_field=bucket_field, 4318 percent=percent, 4319 size=size, 4320 seed=seed, 4321 ) 4322 4323 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4324 return list(iter(self._parse_pivot, None)) or None 4325 4326 def _parse_joins(self) -> t.Iterator[exp.Join]: 4327 return iter(self._parse_join, None) 4328 4329 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4330 if not self._match(TokenType.INTO): 4331 return None 4332 4333 return self.expression( 4334 exp.UnpivotColumns, 4335 this=self._match_text_seq("NAME") and self._parse_column(), 4336 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4337 ) 4338 4339 # https://duckdb.org/docs/sql/statements/pivot 4340 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4341 def _parse_on() -> t.Optional[exp.Expression]: 4342 this = self._parse_bitwise() 4343 4344 if self._match(TokenType.IN): 4345 # PIVOT ... ON col IN (row_val1, row_val2) 4346 return self._parse_in(this) 4347 if self._match(TokenType.ALIAS, advance=False): 4348 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4349 return self._parse_alias(this) 4350 4351 return this 4352 4353 this = self._parse_table() 4354 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4355 into = self._parse_unpivot_columns() 4356 using = self._match(TokenType.USING) and self._parse_csv( 4357 lambda: self._parse_alias(self._parse_function()) 4358 ) 4359 group = self._parse_group() 4360 4361 return self.expression( 4362 exp.Pivot, 4363 this=this, 4364 expressions=expressions, 4365 using=using, 4366 group=group, 4367 unpivot=is_unpivot, 4368 into=into, 4369 ) 4370 4371 def _parse_pivot_in(self) -> exp.In: 4372 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4373 this = self._parse_select_or_expression() 4374 4375 self._match(TokenType.ALIAS) 4376 alias = self._parse_bitwise() 4377 if alias: 4378 if isinstance(alias, exp.Column) and not alias.db: 4379 alias = alias.this 4380 return self.expression(exp.PivotAlias, this=this, alias=alias) 4381 4382 return this 4383 4384 value = self._parse_column() 4385 4386 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4387 self.raise_error("Expecting IN (") 4388 4389 if self._match(TokenType.ANY): 4390 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4391 else: 4392 exprs = self._parse_csv(_parse_aliased_expression) 4393 4394 self._match_r_paren() 4395 return self.expression(exp.In, this=value, expressions=exprs) 4396 4397 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4398 func = self._parse_function() 4399 if not func: 4400 self.raise_error("Expecting an aggregation function in PIVOT") 4401 4402 return self._parse_alias(func) 4403 4404 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4405 index = self._index 4406 include_nulls = None 4407 4408 if self._match(TokenType.PIVOT): 4409 unpivot = False 4410 elif self._match(TokenType.UNPIVOT): 4411 unpivot = True 4412 4413 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4414 if self._match_text_seq("INCLUDE", "NULLS"): 4415 include_nulls = True 4416 elif self._match_text_seq("EXCLUDE", "NULLS"): 4417 include_nulls = False 4418 else: 4419 return None 4420 4421 expressions = [] 4422 4423 if not self._match(TokenType.L_PAREN): 4424 self._retreat(index) 4425 return None 4426 4427 if unpivot: 4428 expressions = self._parse_csv(self._parse_column) 4429 else: 4430 expressions = self._parse_csv(self._parse_pivot_aggregation) 4431 4432 if not expressions: 4433 self.raise_error("Failed to parse PIVOT's aggregation list") 4434 4435 if not self._match(TokenType.FOR): 4436 self.raise_error("Expecting FOR") 4437 4438 fields = [] 4439 while True: 4440 field = self._try_parse(self._parse_pivot_in) 4441 if not field: 4442 break 4443 fields.append(field) 4444 4445 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4446 self._parse_bitwise 4447 ) 4448 4449 group = self._parse_group() 4450 4451 self._match_r_paren() 4452 4453 pivot = self.expression( 4454 exp.Pivot, 4455 expressions=expressions, 4456 fields=fields, 4457 unpivot=unpivot, 4458 include_nulls=include_nulls, 4459 default_on_null=default_on_null, 4460 group=group, 4461 ) 4462 4463 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4464 pivot.set("alias", self._parse_table_alias()) 4465 4466 if not unpivot: 4467 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4468 4469 columns: t.List[exp.Expression] = [] 4470 all_fields = [] 4471 for pivot_field in pivot.fields: 4472 pivot_field_expressions = pivot_field.expressions 4473 4474 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4475 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4476 continue 4477 4478 all_fields.append( 4479 [ 4480 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4481 for fld in pivot_field_expressions 4482 ] 4483 ) 4484 4485 if all_fields: 4486 if names: 4487 all_fields.append(names) 4488 4489 # Generate all possible combinations of the pivot columns 4490 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4491 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4492 for fld_parts_tuple in itertools.product(*all_fields): 4493 fld_parts = list(fld_parts_tuple) 4494 4495 if names and self.PREFIXED_PIVOT_COLUMNS: 4496 # Move the "name" to the front of the list 4497 fld_parts.insert(0, fld_parts.pop(-1)) 4498 4499 columns.append(exp.to_identifier("_".join(fld_parts))) 4500 4501 pivot.set("columns", columns) 4502 4503 return pivot 4504 4505 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4506 return [agg.alias for agg in aggregations if agg.alias] 4507 4508 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4509 if not skip_where_token and not self._match(TokenType.PREWHERE): 4510 return None 4511 4512 return self.expression( 4513 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4514 ) 4515 4516 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4517 if not skip_where_token and not self._match(TokenType.WHERE): 4518 return None 4519 4520 return self.expression( 4521 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4522 ) 4523 4524 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4525 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4526 return None 4527 comments = self._prev_comments 4528 4529 elements: t.Dict[str, t.Any] = defaultdict(list) 4530 4531 if self._match(TokenType.ALL): 4532 elements["all"] = True 4533 elif self._match(TokenType.DISTINCT): 4534 elements["all"] = False 4535 4536 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4537 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4538 4539 while True: 4540 index = self._index 4541 4542 elements["expressions"].extend( 4543 self._parse_csv( 4544 lambda: None 4545 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4546 else self._parse_assignment() 4547 ) 4548 ) 4549 4550 before_with_index = self._index 4551 with_prefix = self._match(TokenType.WITH) 4552 4553 if self._match(TokenType.ROLLUP): 4554 elements["rollup"].append( 4555 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4556 ) 4557 elif self._match(TokenType.CUBE): 4558 elements["cube"].append( 4559 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4560 ) 4561 elif self._match(TokenType.GROUPING_SETS): 4562 elements["grouping_sets"].append( 4563 self.expression( 4564 exp.GroupingSets, 4565 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4566 ) 4567 ) 4568 elif self._match_text_seq("TOTALS"): 4569 elements["totals"] = True # type: ignore 4570 4571 if before_with_index <= self._index <= before_with_index + 1: 4572 self._retreat(before_with_index) 4573 break 4574 4575 if index == self._index: 4576 break 4577 4578 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4579 4580 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4581 return self.expression( 4582 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4583 ) 4584 4585 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4586 if self._match(TokenType.L_PAREN): 4587 grouping_set = self._parse_csv(self._parse_column) 4588 self._match_r_paren() 4589 return self.expression(exp.Tuple, expressions=grouping_set) 4590 4591 return self._parse_column() 4592 4593 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4594 if not skip_having_token and not self._match(TokenType.HAVING): 4595 return None 4596 return self.expression( 4597 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4598 ) 4599 4600 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4601 if not self._match(TokenType.QUALIFY): 4602 return None 4603 return self.expression(exp.Qualify, this=self._parse_assignment()) 4604 4605 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4606 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4607 exp.Prior, this=self._parse_bitwise() 4608 ) 4609 connect = self._parse_assignment() 4610 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4611 return connect 4612 4613 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4614 if skip_start_token: 4615 start = None 4616 elif self._match(TokenType.START_WITH): 4617 start = self._parse_assignment() 4618 else: 4619 return None 4620 4621 self._match(TokenType.CONNECT_BY) 4622 nocycle = self._match_text_seq("NOCYCLE") 4623 connect = self._parse_connect_with_prior() 4624 4625 if not start and self._match(TokenType.START_WITH): 4626 start = self._parse_assignment() 4627 4628 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4629 4630 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4631 this = self._parse_id_var(any_token=True) 4632 if self._match(TokenType.ALIAS): 4633 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4634 return this 4635 4636 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4637 if self._match_text_seq("INTERPOLATE"): 4638 return self._parse_wrapped_csv(self._parse_name_as_expression) 4639 return None 4640 4641 def _parse_order( 4642 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4643 ) -> t.Optional[exp.Expression]: 4644 siblings = None 4645 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4646 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4647 return this 4648 4649 siblings = True 4650 4651 return self.expression( 4652 exp.Order, 4653 comments=self._prev_comments, 4654 this=this, 4655 expressions=self._parse_csv(self._parse_ordered), 4656 siblings=siblings, 4657 ) 4658 4659 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4660 if not self._match(token): 4661 return None 4662 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4663 4664 def _parse_ordered( 4665 self, parse_method: t.Optional[t.Callable] = None 4666 ) -> t.Optional[exp.Ordered]: 4667 this = parse_method() if parse_method else self._parse_assignment() 4668 if not this: 4669 return None 4670 4671 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4672 this = exp.var("ALL") 4673 4674 asc = self._match(TokenType.ASC) 4675 desc = self._match(TokenType.DESC) or (asc and False) 4676 4677 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4678 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4679 4680 nulls_first = is_nulls_first or False 4681 explicitly_null_ordered = is_nulls_first or is_nulls_last 4682 4683 if ( 4684 not explicitly_null_ordered 4685 and ( 4686 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4687 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4688 ) 4689 and self.dialect.NULL_ORDERING != "nulls_are_last" 4690 ): 4691 nulls_first = True 4692 4693 if self._match_text_seq("WITH", "FILL"): 4694 with_fill = self.expression( 4695 exp.WithFill, 4696 **{ # type: ignore 4697 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4698 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4699 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4700 "interpolate": self._parse_interpolate(), 4701 }, 4702 ) 4703 else: 4704 with_fill = None 4705 4706 return self.expression( 4707 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4708 ) 4709 4710 def _parse_limit_options(self) -> exp.LimitOptions: 4711 percent = self._match(TokenType.PERCENT) 4712 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4713 self._match_text_seq("ONLY") 4714 with_ties = self._match_text_seq("WITH", "TIES") 4715 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4716 4717 def _parse_limit( 4718 self, 4719 this: t.Optional[exp.Expression] = None, 4720 top: bool = False, 4721 skip_limit_token: bool = False, 4722 ) -> t.Optional[exp.Expression]: 4723 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4724 comments = self._prev_comments 4725 if top: 4726 limit_paren = self._match(TokenType.L_PAREN) 4727 expression = self._parse_term() if limit_paren else self._parse_number() 4728 4729 if limit_paren: 4730 self._match_r_paren() 4731 4732 limit_options = self._parse_limit_options() 4733 else: 4734 limit_options = None 4735 expression = self._parse_term() 4736 4737 if self._match(TokenType.COMMA): 4738 offset = expression 4739 expression = self._parse_term() 4740 else: 4741 offset = None 4742 4743 limit_exp = self.expression( 4744 exp.Limit, 4745 this=this, 4746 expression=expression, 4747 offset=offset, 4748 comments=comments, 4749 limit_options=limit_options, 4750 expressions=self._parse_limit_by(), 4751 ) 4752 4753 return limit_exp 4754 4755 if self._match(TokenType.FETCH): 4756 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4757 direction = self._prev.text.upper() if direction else "FIRST" 4758 4759 count = self._parse_field(tokens=self.FETCH_TOKENS) 4760 4761 return self.expression( 4762 exp.Fetch, 4763 direction=direction, 4764 count=count, 4765 limit_options=self._parse_limit_options(), 4766 ) 4767 4768 return this 4769 4770 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4771 if not self._match(TokenType.OFFSET): 4772 return this 4773 4774 count = self._parse_term() 4775 self._match_set((TokenType.ROW, TokenType.ROWS)) 4776 4777 return self.expression( 4778 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4779 ) 4780 4781 def _can_parse_limit_or_offset(self) -> bool: 4782 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4783 return False 4784 4785 index = self._index 4786 result = bool( 4787 self._try_parse(self._parse_limit, retreat=True) 4788 or self._try_parse(self._parse_offset, retreat=True) 4789 ) 4790 self._retreat(index) 4791 return result 4792 4793 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4794 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4795 4796 def _parse_locks(self) -> t.List[exp.Lock]: 4797 locks = [] 4798 while True: 4799 update, key = None, None 4800 if self._match_text_seq("FOR", "UPDATE"): 4801 update = True 4802 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4803 "LOCK", "IN", "SHARE", "MODE" 4804 ): 4805 update = False 4806 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4807 update, key = False, True 4808 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4809 update, key = True, True 4810 else: 4811 break 4812 4813 expressions = None 4814 if self._match_text_seq("OF"): 4815 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4816 4817 wait: t.Optional[bool | exp.Expression] = None 4818 if self._match_text_seq("NOWAIT"): 4819 wait = True 4820 elif self._match_text_seq("WAIT"): 4821 wait = self._parse_primary() 4822 elif self._match_text_seq("SKIP", "LOCKED"): 4823 wait = False 4824 4825 locks.append( 4826 self.expression( 4827 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4828 ) 4829 ) 4830 4831 return locks 4832 4833 def parse_set_operation( 4834 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4835 ) -> t.Optional[exp.Expression]: 4836 start = self._index 4837 _, side_token, kind_token = self._parse_join_parts() 4838 4839 side = side_token.text if side_token else None 4840 kind = kind_token.text if kind_token else None 4841 4842 if not self._match_set(self.SET_OPERATIONS): 4843 self._retreat(start) 4844 return None 4845 4846 token_type = self._prev.token_type 4847 4848 if token_type == TokenType.UNION: 4849 operation: t.Type[exp.SetOperation] = exp.Union 4850 elif token_type == TokenType.EXCEPT: 4851 operation = exp.Except 4852 else: 4853 operation = exp.Intersect 4854 4855 comments = self._prev.comments 4856 4857 if self._match(TokenType.DISTINCT): 4858 distinct: t.Optional[bool] = True 4859 elif self._match(TokenType.ALL): 4860 distinct = False 4861 else: 4862 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4863 if distinct is None: 4864 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4865 4866 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4867 "STRICT", "CORRESPONDING" 4868 ) 4869 if self._match_text_seq("CORRESPONDING"): 4870 by_name = True 4871 if not side and not kind: 4872 kind = "INNER" 4873 4874 on_column_list = None 4875 if by_name and self._match_texts(("ON", "BY")): 4876 on_column_list = self._parse_wrapped_csv(self._parse_column) 4877 4878 expression = self._parse_select( 4879 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4880 ) 4881 4882 return self.expression( 4883 operation, 4884 comments=comments, 4885 this=this, 4886 distinct=distinct, 4887 by_name=by_name, 4888 expression=expression, 4889 side=side, 4890 kind=kind, 4891 on=on_column_list, 4892 ) 4893 4894 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4895 while this: 4896 setop = self.parse_set_operation(this) 4897 if not setop: 4898 break 4899 this = setop 4900 4901 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4902 expression = this.expression 4903 4904 if expression: 4905 for arg in self.SET_OP_MODIFIERS: 4906 expr = expression.args.get(arg) 4907 if expr: 4908 this.set(arg, expr.pop()) 4909 4910 return this 4911 4912 def _parse_expression(self) -> t.Optional[exp.Expression]: 4913 return self._parse_alias(self._parse_assignment()) 4914 4915 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4916 this = self._parse_disjunction() 4917 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4918 # This allows us to parse <non-identifier token> := <expr> 4919 this = exp.column( 4920 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4921 ) 4922 4923 while self._match_set(self.ASSIGNMENT): 4924 if isinstance(this, exp.Column) and len(this.parts) == 1: 4925 this = this.this 4926 4927 this = self.expression( 4928 self.ASSIGNMENT[self._prev.token_type], 4929 this=this, 4930 comments=self._prev_comments, 4931 expression=self._parse_assignment(), 4932 ) 4933 4934 return this 4935 4936 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4937 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4938 4939 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4940 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4941 4942 def _parse_equality(self) -> t.Optional[exp.Expression]: 4943 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4944 4945 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4946 return self._parse_tokens(self._parse_range, self.COMPARISON) 4947 4948 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4949 this = this or self._parse_bitwise() 4950 negate = self._match(TokenType.NOT) 4951 4952 if self._match_set(self.RANGE_PARSERS): 4953 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4954 if not expression: 4955 return this 4956 4957 this = expression 4958 elif self._match(TokenType.ISNULL): 4959 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4960 4961 # Postgres supports ISNULL and NOTNULL for conditions. 4962 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4963 if self._match(TokenType.NOTNULL): 4964 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4965 this = self.expression(exp.Not, this=this) 4966 4967 if negate: 4968 this = self._negate_range(this) 4969 4970 if self._match(TokenType.IS): 4971 this = self._parse_is(this) 4972 4973 return this 4974 4975 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4976 if not this: 4977 return this 4978 4979 return self.expression(exp.Not, this=this) 4980 4981 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4982 index = self._index - 1 4983 negate = self._match(TokenType.NOT) 4984 4985 if self._match_text_seq("DISTINCT", "FROM"): 4986 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4987 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4988 4989 if self._match(TokenType.JSON): 4990 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4991 4992 if self._match_text_seq("WITH"): 4993 _with = True 4994 elif self._match_text_seq("WITHOUT"): 4995 _with = False 4996 else: 4997 _with = None 4998 4999 unique = self._match(TokenType.UNIQUE) 5000 self._match_text_seq("KEYS") 5001 expression: t.Optional[exp.Expression] = self.expression( 5002 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5003 ) 5004 else: 5005 expression = self._parse_primary() or self._parse_null() 5006 if not expression: 5007 self._retreat(index) 5008 return None 5009 5010 this = self.expression(exp.Is, this=this, expression=expression) 5011 return self.expression(exp.Not, this=this) if negate else this 5012 5013 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5014 unnest = self._parse_unnest(with_alias=False) 5015 if unnest: 5016 this = self.expression(exp.In, this=this, unnest=unnest) 5017 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5018 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5019 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5020 5021 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5022 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5023 else: 5024 this = self.expression(exp.In, this=this, expressions=expressions) 5025 5026 if matched_l_paren: 5027 self._match_r_paren(this) 5028 elif not self._match(TokenType.R_BRACKET, expression=this): 5029 self.raise_error("Expecting ]") 5030 else: 5031 this = self.expression(exp.In, this=this, field=self._parse_column()) 5032 5033 return this 5034 5035 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5036 symmetric = None 5037 if self._match_text_seq("SYMMETRIC"): 5038 symmetric = True 5039 elif self._match_text_seq("ASYMMETRIC"): 5040 symmetric = False 5041 5042 low = self._parse_bitwise() 5043 self._match(TokenType.AND) 5044 high = self._parse_bitwise() 5045 5046 return self.expression( 5047 exp.Between, 5048 this=this, 5049 low=low, 5050 high=high, 5051 symmetric=symmetric, 5052 ) 5053 5054 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5055 if not self._match(TokenType.ESCAPE): 5056 return this 5057 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5058 5059 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5060 index = self._index 5061 5062 if not self._match(TokenType.INTERVAL) and match_interval: 5063 return None 5064 5065 if self._match(TokenType.STRING, advance=False): 5066 this = self._parse_primary() 5067 else: 5068 this = self._parse_term() 5069 5070 if not this or ( 5071 isinstance(this, exp.Column) 5072 and not this.table 5073 and not this.this.quoted 5074 and this.name.upper() == "IS" 5075 ): 5076 self._retreat(index) 5077 return None 5078 5079 unit = self._parse_function() or ( 5080 not self._match(TokenType.ALIAS, advance=False) 5081 and self._parse_var(any_token=True, upper=True) 5082 ) 5083 5084 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5085 # each INTERVAL expression into this canonical form so it's easy to transpile 5086 if this and this.is_number: 5087 this = exp.Literal.string(this.to_py()) 5088 elif this and this.is_string: 5089 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5090 if parts and unit: 5091 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5092 unit = None 5093 self._retreat(self._index - 1) 5094 5095 if len(parts) == 1: 5096 this = exp.Literal.string(parts[0][0]) 5097 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5098 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5099 unit = self.expression( 5100 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5101 ) 5102 5103 interval = self.expression(exp.Interval, this=this, unit=unit) 5104 5105 index = self._index 5106 self._match(TokenType.PLUS) 5107 5108 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5109 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5110 return self.expression( 5111 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5112 ) 5113 5114 self._retreat(index) 5115 return interval 5116 5117 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5118 this = self._parse_term() 5119 5120 while True: 5121 if self._match_set(self.BITWISE): 5122 this = self.expression( 5123 self.BITWISE[self._prev.token_type], 5124 this=this, 5125 expression=self._parse_term(), 5126 ) 5127 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5128 this = self.expression( 5129 exp.DPipe, 5130 this=this, 5131 expression=self._parse_term(), 5132 safe=not self.dialect.STRICT_STRING_CONCAT, 5133 ) 5134 elif self._match(TokenType.DQMARK): 5135 this = self.expression( 5136 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5137 ) 5138 elif self._match_pair(TokenType.LT, TokenType.LT): 5139 this = self.expression( 5140 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5141 ) 5142 elif self._match_pair(TokenType.GT, TokenType.GT): 5143 this = self.expression( 5144 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5145 ) 5146 else: 5147 break 5148 5149 return this 5150 5151 def _parse_term(self) -> t.Optional[exp.Expression]: 5152 this = self._parse_factor() 5153 5154 while self._match_set(self.TERM): 5155 klass = self.TERM[self._prev.token_type] 5156 comments = self._prev_comments 5157 expression = self._parse_factor() 5158 5159 this = self.expression(klass, this=this, comments=comments, expression=expression) 5160 5161 if isinstance(this, exp.Collate): 5162 expr = this.expression 5163 5164 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5165 # fallback to Identifier / Var 5166 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5167 ident = expr.this 5168 if isinstance(ident, exp.Identifier): 5169 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5170 5171 return this 5172 5173 def _parse_factor(self) -> t.Optional[exp.Expression]: 5174 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5175 this = parse_method() 5176 5177 while self._match_set(self.FACTOR): 5178 klass = self.FACTOR[self._prev.token_type] 5179 comments = self._prev_comments 5180 expression = parse_method() 5181 5182 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5183 self._retreat(self._index - 1) 5184 return this 5185 5186 this = self.expression(klass, this=this, comments=comments, expression=expression) 5187 5188 if isinstance(this, exp.Div): 5189 this.args["typed"] = self.dialect.TYPED_DIVISION 5190 this.args["safe"] = self.dialect.SAFE_DIVISION 5191 5192 return this 5193 5194 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5195 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5196 5197 def _parse_unary(self) -> t.Optional[exp.Expression]: 5198 if self._match_set(self.UNARY_PARSERS): 5199 return self.UNARY_PARSERS[self._prev.token_type](self) 5200 return self._parse_at_time_zone(self._parse_type()) 5201 5202 def _parse_type( 5203 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5204 ) -> t.Optional[exp.Expression]: 5205 interval = parse_interval and self._parse_interval() 5206 if interval: 5207 return interval 5208 5209 index = self._index 5210 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5211 5212 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5213 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5214 if isinstance(data_type, exp.Cast): 5215 # This constructor can contain ops directly after it, for instance struct unnesting: 5216 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5217 return self._parse_column_ops(data_type) 5218 5219 if data_type: 5220 index2 = self._index 5221 this = self._parse_primary() 5222 5223 if isinstance(this, exp.Literal): 5224 literal = this.name 5225 this = self._parse_column_ops(this) 5226 5227 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5228 if parser: 5229 return parser(self, this, data_type) 5230 5231 if ( 5232 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5233 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5234 and TIME_ZONE_RE.search(literal) 5235 ): 5236 data_type = exp.DataType.build("TIMESTAMPTZ") 5237 5238 return self.expression(exp.Cast, this=this, to=data_type) 5239 5240 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5241 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5242 # 5243 # If the index difference here is greater than 1, that means the parser itself must have 5244 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5245 # 5246 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5247 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5248 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5249 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5250 # 5251 # In these cases, we don't really want to return the converted type, but instead retreat 5252 # and try to parse a Column or Identifier in the section below. 5253 if data_type.expressions and index2 - index > 1: 5254 self._retreat(index2) 5255 return self._parse_column_ops(data_type) 5256 5257 self._retreat(index) 5258 5259 if fallback_to_identifier: 5260 return self._parse_id_var() 5261 5262 this = self._parse_column() 5263 return this and self._parse_column_ops(this) 5264 5265 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5266 this = self._parse_type() 5267 if not this: 5268 return None 5269 5270 if isinstance(this, exp.Column) and not this.table: 5271 this = exp.var(this.name.upper()) 5272 5273 return self.expression( 5274 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5275 ) 5276 5277 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5278 type_name = identifier.name 5279 5280 while self._match(TokenType.DOT): 5281 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5282 5283 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5284 5285 def _parse_types( 5286 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5287 ) -> t.Optional[exp.Expression]: 5288 index = self._index 5289 5290 this: t.Optional[exp.Expression] = None 5291 prefix = self._match_text_seq("SYSUDTLIB", ".") 5292 5293 if not self._match_set(self.TYPE_TOKENS): 5294 identifier = allow_identifiers and self._parse_id_var( 5295 any_token=False, tokens=(TokenType.VAR,) 5296 ) 5297 if isinstance(identifier, exp.Identifier): 5298 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5299 5300 if len(tokens) != 1: 5301 self.raise_error("Unexpected identifier", self._prev) 5302 5303 if tokens[0].token_type in self.TYPE_TOKENS: 5304 self._prev = tokens[0] 5305 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5306 this = self._parse_user_defined_type(identifier) 5307 else: 5308 self._retreat(self._index - 1) 5309 return None 5310 else: 5311 return None 5312 5313 type_token = self._prev.token_type 5314 5315 if type_token == TokenType.PSEUDO_TYPE: 5316 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5317 5318 if type_token == TokenType.OBJECT_IDENTIFIER: 5319 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5320 5321 # https://materialize.com/docs/sql/types/map/ 5322 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5323 key_type = self._parse_types( 5324 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5325 ) 5326 if not self._match(TokenType.FARROW): 5327 self._retreat(index) 5328 return None 5329 5330 value_type = self._parse_types( 5331 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5332 ) 5333 if not self._match(TokenType.R_BRACKET): 5334 self._retreat(index) 5335 return None 5336 5337 return exp.DataType( 5338 this=exp.DataType.Type.MAP, 5339 expressions=[key_type, value_type], 5340 nested=True, 5341 prefix=prefix, 5342 ) 5343 5344 nested = type_token in self.NESTED_TYPE_TOKENS 5345 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5346 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5347 expressions = None 5348 maybe_func = False 5349 5350 if self._match(TokenType.L_PAREN): 5351 if is_struct: 5352 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5353 elif nested: 5354 expressions = self._parse_csv( 5355 lambda: self._parse_types( 5356 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5357 ) 5358 ) 5359 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5360 this = expressions[0] 5361 this.set("nullable", True) 5362 self._match_r_paren() 5363 return this 5364 elif type_token in self.ENUM_TYPE_TOKENS: 5365 expressions = self._parse_csv(self._parse_equality) 5366 elif is_aggregate: 5367 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5368 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5369 ) 5370 if not func_or_ident: 5371 return None 5372 expressions = [func_or_ident] 5373 if self._match(TokenType.COMMA): 5374 expressions.extend( 5375 self._parse_csv( 5376 lambda: self._parse_types( 5377 check_func=check_func, 5378 schema=schema, 5379 allow_identifiers=allow_identifiers, 5380 ) 5381 ) 5382 ) 5383 else: 5384 expressions = self._parse_csv(self._parse_type_size) 5385 5386 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5387 if type_token == TokenType.VECTOR and len(expressions) == 2: 5388 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5389 5390 if not expressions or not self._match(TokenType.R_PAREN): 5391 self._retreat(index) 5392 return None 5393 5394 maybe_func = True 5395 5396 values: t.Optional[t.List[exp.Expression]] = None 5397 5398 if nested and self._match(TokenType.LT): 5399 if is_struct: 5400 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5401 else: 5402 expressions = self._parse_csv( 5403 lambda: self._parse_types( 5404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5405 ) 5406 ) 5407 5408 if not self._match(TokenType.GT): 5409 self.raise_error("Expecting >") 5410 5411 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5412 values = self._parse_csv(self._parse_assignment) 5413 if not values and is_struct: 5414 values = None 5415 self._retreat(self._index - 1) 5416 else: 5417 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5418 5419 if type_token in self.TIMESTAMPS: 5420 if self._match_text_seq("WITH", "TIME", "ZONE"): 5421 maybe_func = False 5422 tz_type = ( 5423 exp.DataType.Type.TIMETZ 5424 if type_token in self.TIMES 5425 else exp.DataType.Type.TIMESTAMPTZ 5426 ) 5427 this = exp.DataType(this=tz_type, expressions=expressions) 5428 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5429 maybe_func = False 5430 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5431 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5432 maybe_func = False 5433 elif type_token == TokenType.INTERVAL: 5434 unit = self._parse_var(upper=True) 5435 if unit: 5436 if self._match_text_seq("TO"): 5437 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5438 5439 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5440 else: 5441 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5442 elif type_token == TokenType.VOID: 5443 this = exp.DataType(this=exp.DataType.Type.NULL) 5444 5445 if maybe_func and check_func: 5446 index2 = self._index 5447 peek = self._parse_string() 5448 5449 if not peek: 5450 self._retreat(index) 5451 return None 5452 5453 self._retreat(index2) 5454 5455 if not this: 5456 if self._match_text_seq("UNSIGNED"): 5457 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5458 if not unsigned_type_token: 5459 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5460 5461 type_token = unsigned_type_token or type_token 5462 5463 this = exp.DataType( 5464 this=exp.DataType.Type[type_token.value], 5465 expressions=expressions, 5466 nested=nested, 5467 prefix=prefix, 5468 ) 5469 5470 # Empty arrays/structs are allowed 5471 if values is not None: 5472 cls = exp.Struct if is_struct else exp.Array 5473 this = exp.cast(cls(expressions=values), this, copy=False) 5474 5475 elif expressions: 5476 this.set("expressions", expressions) 5477 5478 # https://materialize.com/docs/sql/types/list/#type-name 5479 while self._match(TokenType.LIST): 5480 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5481 5482 index = self._index 5483 5484 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5485 matched_array = self._match(TokenType.ARRAY) 5486 5487 while self._curr: 5488 datatype_token = self._prev.token_type 5489 matched_l_bracket = self._match(TokenType.L_BRACKET) 5490 5491 if (not matched_l_bracket and not matched_array) or ( 5492 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5493 ): 5494 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5495 # not to be confused with the fixed size array parsing 5496 break 5497 5498 matched_array = False 5499 values = self._parse_csv(self._parse_assignment) or None 5500 if ( 5501 values 5502 and not schema 5503 and ( 5504 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5505 ) 5506 ): 5507 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5508 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5509 self._retreat(index) 5510 break 5511 5512 this = exp.DataType( 5513 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5514 ) 5515 self._match(TokenType.R_BRACKET) 5516 5517 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5518 converter = self.TYPE_CONVERTERS.get(this.this) 5519 if converter: 5520 this = converter(t.cast(exp.DataType, this)) 5521 5522 return this 5523 5524 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5525 index = self._index 5526 5527 if ( 5528 self._curr 5529 and self._next 5530 and self._curr.token_type in self.TYPE_TOKENS 5531 and self._next.token_type in self.TYPE_TOKENS 5532 ): 5533 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5534 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5535 this = self._parse_id_var() 5536 else: 5537 this = ( 5538 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5539 or self._parse_id_var() 5540 ) 5541 5542 self._match(TokenType.COLON) 5543 5544 if ( 5545 type_required 5546 and not isinstance(this, exp.DataType) 5547 and not self._match_set(self.TYPE_TOKENS, advance=False) 5548 ): 5549 self._retreat(index) 5550 return self._parse_types() 5551 5552 return self._parse_column_def(this) 5553 5554 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5555 if not self._match_text_seq("AT", "TIME", "ZONE"): 5556 return this 5557 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5558 5559 def _parse_column(self) -> t.Optional[exp.Expression]: 5560 this = self._parse_column_reference() 5561 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5562 5563 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5564 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5565 5566 return column 5567 5568 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5569 this = self._parse_field() 5570 if ( 5571 not this 5572 and self._match(TokenType.VALUES, advance=False) 5573 and self.VALUES_FOLLOWED_BY_PAREN 5574 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5575 ): 5576 this = self._parse_id_var() 5577 5578 if isinstance(this, exp.Identifier): 5579 # We bubble up comments from the Identifier to the Column 5580 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5581 5582 return this 5583 5584 def _parse_colon_as_variant_extract( 5585 self, this: t.Optional[exp.Expression] 5586 ) -> t.Optional[exp.Expression]: 5587 casts = [] 5588 json_path = [] 5589 escape = None 5590 5591 while self._match(TokenType.COLON): 5592 start_index = self._index 5593 5594 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5595 path = self._parse_column_ops( 5596 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5597 ) 5598 5599 # The cast :: operator has a lower precedence than the extraction operator :, so 5600 # we rearrange the AST appropriately to avoid casting the JSON path 5601 while isinstance(path, exp.Cast): 5602 casts.append(path.to) 5603 path = path.this 5604 5605 if casts: 5606 dcolon_offset = next( 5607 i 5608 for i, t in enumerate(self._tokens[start_index:]) 5609 if t.token_type == TokenType.DCOLON 5610 ) 5611 end_token = self._tokens[start_index + dcolon_offset - 1] 5612 else: 5613 end_token = self._prev 5614 5615 if path: 5616 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5617 # it'll roundtrip to a string literal in GET_PATH 5618 if isinstance(path, exp.Identifier) and path.quoted: 5619 escape = True 5620 5621 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5622 5623 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5624 # Databricks transforms it back to the colon/dot notation 5625 if json_path: 5626 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5627 5628 if json_path_expr: 5629 json_path_expr.set("escape", escape) 5630 5631 this = self.expression( 5632 exp.JSONExtract, 5633 this=this, 5634 expression=json_path_expr, 5635 variant_extract=True, 5636 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5637 ) 5638 5639 while casts: 5640 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5641 5642 return this 5643 5644 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5645 return self._parse_types() 5646 5647 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5648 this = self._parse_bracket(this) 5649 5650 while self._match_set(self.COLUMN_OPERATORS): 5651 op_token = self._prev.token_type 5652 op = self.COLUMN_OPERATORS.get(op_token) 5653 5654 if op_token in self.CAST_COLUMN_OPERATORS: 5655 field = self._parse_dcolon() 5656 if not field: 5657 self.raise_error("Expected type") 5658 elif op and self._curr: 5659 field = self._parse_column_reference() or self._parse_bracket() 5660 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5661 field = self._parse_column_ops(field) 5662 else: 5663 field = self._parse_field(any_token=True, anonymous_func=True) 5664 5665 # Function calls can be qualified, e.g., x.y.FOO() 5666 # This converts the final AST to a series of Dots leading to the function call 5667 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5668 if isinstance(field, (exp.Func, exp.Window)) and this: 5669 this = this.transform( 5670 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5671 ) 5672 5673 if op: 5674 this = op(self, this, field) 5675 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5676 this = self.expression( 5677 exp.Column, 5678 comments=this.comments, 5679 this=field, 5680 table=this.this, 5681 db=this.args.get("table"), 5682 catalog=this.args.get("db"), 5683 ) 5684 elif isinstance(field, exp.Window): 5685 # Move the exp.Dot's to the window's function 5686 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5687 field.set("this", window_func) 5688 this = field 5689 else: 5690 this = self.expression(exp.Dot, this=this, expression=field) 5691 5692 if field and field.comments: 5693 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5694 5695 this = self._parse_bracket(this) 5696 5697 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5698 5699 def _parse_paren(self) -> t.Optional[exp.Expression]: 5700 if not self._match(TokenType.L_PAREN): 5701 return None 5702 5703 comments = self._prev_comments 5704 query = self._parse_select() 5705 5706 if query: 5707 expressions = [query] 5708 else: 5709 expressions = self._parse_expressions() 5710 5711 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5712 5713 if not this and self._match(TokenType.R_PAREN, advance=False): 5714 this = self.expression(exp.Tuple) 5715 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5716 this = self._parse_subquery(this=this, parse_alias=False) 5717 elif isinstance(this, exp.Subquery): 5718 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5719 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5720 this = self.expression(exp.Tuple, expressions=expressions) 5721 else: 5722 this = self.expression(exp.Paren, this=this) 5723 5724 if this: 5725 this.add_comments(comments) 5726 5727 self._match_r_paren(expression=this) 5728 return this 5729 5730 def _parse_primary(self) -> t.Optional[exp.Expression]: 5731 if self._match_set(self.PRIMARY_PARSERS): 5732 token_type = self._prev.token_type 5733 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5734 5735 if token_type == TokenType.STRING: 5736 expressions = [primary] 5737 while self._match(TokenType.STRING): 5738 expressions.append(exp.Literal.string(self._prev.text)) 5739 5740 if len(expressions) > 1: 5741 return self.expression(exp.Concat, expressions=expressions) 5742 5743 return primary 5744 5745 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5746 return exp.Literal.number(f"0.{self._prev.text}") 5747 5748 return self._parse_paren() 5749 5750 def _parse_field( 5751 self, 5752 any_token: bool = False, 5753 tokens: t.Optional[t.Collection[TokenType]] = None, 5754 anonymous_func: bool = False, 5755 ) -> t.Optional[exp.Expression]: 5756 if anonymous_func: 5757 field = ( 5758 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5759 or self._parse_primary() 5760 ) 5761 else: 5762 field = self._parse_primary() or self._parse_function( 5763 anonymous=anonymous_func, any_token=any_token 5764 ) 5765 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5766 5767 def _parse_function( 5768 self, 5769 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5770 anonymous: bool = False, 5771 optional_parens: bool = True, 5772 any_token: bool = False, 5773 ) -> t.Optional[exp.Expression]: 5774 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5775 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5776 fn_syntax = False 5777 if ( 5778 self._match(TokenType.L_BRACE, advance=False) 5779 and self._next 5780 and self._next.text.upper() == "FN" 5781 ): 5782 self._advance(2) 5783 fn_syntax = True 5784 5785 func = self._parse_function_call( 5786 functions=functions, 5787 anonymous=anonymous, 5788 optional_parens=optional_parens, 5789 any_token=any_token, 5790 ) 5791 5792 if fn_syntax: 5793 self._match(TokenType.R_BRACE) 5794 5795 return func 5796 5797 def _parse_function_call( 5798 self, 5799 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5800 anonymous: bool = False, 5801 optional_parens: bool = True, 5802 any_token: bool = False, 5803 ) -> t.Optional[exp.Expression]: 5804 if not self._curr: 5805 return None 5806 5807 comments = self._curr.comments 5808 prev = self._prev 5809 token = self._curr 5810 token_type = self._curr.token_type 5811 this = self._curr.text 5812 upper = this.upper() 5813 5814 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5815 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5816 self._advance() 5817 return self._parse_window(parser(self)) 5818 5819 if not self._next or self._next.token_type != TokenType.L_PAREN: 5820 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5821 self._advance() 5822 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5823 5824 return None 5825 5826 if any_token: 5827 if token_type in self.RESERVED_TOKENS: 5828 return None 5829 elif token_type not in self.FUNC_TOKENS: 5830 return None 5831 5832 self._advance(2) 5833 5834 parser = self.FUNCTION_PARSERS.get(upper) 5835 if parser and not anonymous: 5836 this = parser(self) 5837 else: 5838 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5839 5840 if subquery_predicate: 5841 expr = None 5842 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5843 expr = self._parse_select() 5844 self._match_r_paren() 5845 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5846 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5847 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5848 self._advance(-1) 5849 expr = self._parse_bitwise() 5850 5851 if expr: 5852 return self.expression(subquery_predicate, comments=comments, this=expr) 5853 5854 if functions is None: 5855 functions = self.FUNCTIONS 5856 5857 function = functions.get(upper) 5858 known_function = function and not anonymous 5859 5860 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5861 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5862 5863 post_func_comments = self._curr and self._curr.comments 5864 if known_function and post_func_comments: 5865 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5866 # call we'll construct it as exp.Anonymous, even if it's "known" 5867 if any( 5868 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5869 for comment in post_func_comments 5870 ): 5871 known_function = False 5872 5873 if alias and known_function: 5874 args = self._kv_to_prop_eq(args) 5875 5876 if known_function: 5877 func_builder = t.cast(t.Callable, function) 5878 5879 if "dialect" in func_builder.__code__.co_varnames: 5880 func = func_builder(args, dialect=self.dialect) 5881 else: 5882 func = func_builder(args) 5883 5884 func = self.validate_expression(func, args) 5885 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5886 func.meta["name"] = this 5887 5888 this = func 5889 else: 5890 if token_type == TokenType.IDENTIFIER: 5891 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5892 5893 this = self.expression(exp.Anonymous, this=this, expressions=args) 5894 this = this.update_positions(token) 5895 5896 if isinstance(this, exp.Expression): 5897 this.add_comments(comments) 5898 5899 self._match_r_paren(this) 5900 return self._parse_window(this) 5901 5902 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5903 return expression 5904 5905 def _kv_to_prop_eq( 5906 self, expressions: t.List[exp.Expression], parse_map: bool = False 5907 ) -> t.List[exp.Expression]: 5908 transformed = [] 5909 5910 for index, e in enumerate(expressions): 5911 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5912 if isinstance(e, exp.Alias): 5913 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5914 5915 if not isinstance(e, exp.PropertyEQ): 5916 e = self.expression( 5917 exp.PropertyEQ, 5918 this=e.this if parse_map else exp.to_identifier(e.this.name), 5919 expression=e.expression, 5920 ) 5921 5922 if isinstance(e.this, exp.Column): 5923 e.this.replace(e.this.this) 5924 else: 5925 e = self._to_prop_eq(e, index) 5926 5927 transformed.append(e) 5928 5929 return transformed 5930 5931 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5932 return self._parse_statement() 5933 5934 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5935 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5936 5937 def _parse_user_defined_function( 5938 self, kind: t.Optional[TokenType] = None 5939 ) -> t.Optional[exp.Expression]: 5940 this = self._parse_table_parts(schema=True) 5941 5942 if not self._match(TokenType.L_PAREN): 5943 return this 5944 5945 expressions = self._parse_csv(self._parse_function_parameter) 5946 self._match_r_paren() 5947 return self.expression( 5948 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5949 ) 5950 5951 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5952 literal = self._parse_primary() 5953 if literal: 5954 return self.expression(exp.Introducer, this=token.text, expression=literal) 5955 5956 return self._identifier_expression(token) 5957 5958 def _parse_session_parameter(self) -> exp.SessionParameter: 5959 kind = None 5960 this = self._parse_id_var() or self._parse_primary() 5961 5962 if this and self._match(TokenType.DOT): 5963 kind = this.name 5964 this = self._parse_var() or self._parse_primary() 5965 5966 return self.expression(exp.SessionParameter, this=this, kind=kind) 5967 5968 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5969 return self._parse_id_var() 5970 5971 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5972 index = self._index 5973 5974 if self._match(TokenType.L_PAREN): 5975 expressions = t.cast( 5976 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5977 ) 5978 5979 if not self._match(TokenType.R_PAREN): 5980 self._retreat(index) 5981 else: 5982 expressions = [self._parse_lambda_arg()] 5983 5984 if self._match_set(self.LAMBDAS): 5985 return self.LAMBDAS[self._prev.token_type](self, expressions) 5986 5987 self._retreat(index) 5988 5989 this: t.Optional[exp.Expression] 5990 5991 if self._match(TokenType.DISTINCT): 5992 this = self.expression( 5993 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5994 ) 5995 else: 5996 this = self._parse_select_or_expression(alias=alias) 5997 5998 return self._parse_limit( 5999 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6000 ) 6001 6002 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6003 index = self._index 6004 if not self._match(TokenType.L_PAREN): 6005 return this 6006 6007 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6008 # expr can be of both types 6009 if self._match_set(self.SELECT_START_TOKENS): 6010 self._retreat(index) 6011 return this 6012 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6013 self._match_r_paren() 6014 return self.expression(exp.Schema, this=this, expressions=args) 6015 6016 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6017 return self._parse_column_def(self._parse_field(any_token=True)) 6018 6019 def _parse_column_def( 6020 self, this: t.Optional[exp.Expression], computed_column: bool = True 6021 ) -> t.Optional[exp.Expression]: 6022 # column defs are not really columns, they're identifiers 6023 if isinstance(this, exp.Column): 6024 this = this.this 6025 6026 if not computed_column: 6027 self._match(TokenType.ALIAS) 6028 6029 kind = self._parse_types(schema=True) 6030 6031 if self._match_text_seq("FOR", "ORDINALITY"): 6032 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6033 6034 constraints: t.List[exp.Expression] = [] 6035 6036 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6037 ("ALIAS", "MATERIALIZED") 6038 ): 6039 persisted = self._prev.text.upper() == "MATERIALIZED" 6040 constraint_kind = exp.ComputedColumnConstraint( 6041 this=self._parse_assignment(), 6042 persisted=persisted or self._match_text_seq("PERSISTED"), 6043 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6044 ) 6045 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6046 elif ( 6047 kind 6048 and self._match(TokenType.ALIAS, advance=False) 6049 and ( 6050 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6051 or (self._next and self._next.token_type == TokenType.L_PAREN) 6052 ) 6053 ): 6054 self._advance() 6055 constraints.append( 6056 self.expression( 6057 exp.ColumnConstraint, 6058 kind=exp.ComputedColumnConstraint( 6059 this=self._parse_disjunction(), 6060 persisted=self._match_texts(("STORED", "VIRTUAL")) 6061 and self._prev.text.upper() == "STORED", 6062 ), 6063 ) 6064 ) 6065 6066 while True: 6067 constraint = self._parse_column_constraint() 6068 if not constraint: 6069 break 6070 constraints.append(constraint) 6071 6072 if not kind and not constraints: 6073 return this 6074 6075 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6076 6077 def _parse_auto_increment( 6078 self, 6079 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6080 start = None 6081 increment = None 6082 order = None 6083 6084 if self._match(TokenType.L_PAREN, advance=False): 6085 args = self._parse_wrapped_csv(self._parse_bitwise) 6086 start = seq_get(args, 0) 6087 increment = seq_get(args, 1) 6088 elif self._match_text_seq("START"): 6089 start = self._parse_bitwise() 6090 self._match_text_seq("INCREMENT") 6091 increment = self._parse_bitwise() 6092 if self._match_text_seq("ORDER"): 6093 order = True 6094 elif self._match_text_seq("NOORDER"): 6095 order = False 6096 6097 if start and increment: 6098 return exp.GeneratedAsIdentityColumnConstraint( 6099 start=start, increment=increment, this=False, order=order 6100 ) 6101 6102 return exp.AutoIncrementColumnConstraint() 6103 6104 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6105 if not self._match_text_seq("REFRESH"): 6106 self._retreat(self._index - 1) 6107 return None 6108 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6109 6110 def _parse_compress(self) -> exp.CompressColumnConstraint: 6111 if self._match(TokenType.L_PAREN, advance=False): 6112 return self.expression( 6113 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6114 ) 6115 6116 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6117 6118 def _parse_generated_as_identity( 6119 self, 6120 ) -> ( 6121 exp.GeneratedAsIdentityColumnConstraint 6122 | exp.ComputedColumnConstraint 6123 | exp.GeneratedAsRowColumnConstraint 6124 ): 6125 if self._match_text_seq("BY", "DEFAULT"): 6126 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6127 this = self.expression( 6128 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6129 ) 6130 else: 6131 self._match_text_seq("ALWAYS") 6132 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6133 6134 self._match(TokenType.ALIAS) 6135 6136 if self._match_text_seq("ROW"): 6137 start = self._match_text_seq("START") 6138 if not start: 6139 self._match(TokenType.END) 6140 hidden = self._match_text_seq("HIDDEN") 6141 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6142 6143 identity = self._match_text_seq("IDENTITY") 6144 6145 if self._match(TokenType.L_PAREN): 6146 if self._match(TokenType.START_WITH): 6147 this.set("start", self._parse_bitwise()) 6148 if self._match_text_seq("INCREMENT", "BY"): 6149 this.set("increment", self._parse_bitwise()) 6150 if self._match_text_seq("MINVALUE"): 6151 this.set("minvalue", self._parse_bitwise()) 6152 if self._match_text_seq("MAXVALUE"): 6153 this.set("maxvalue", self._parse_bitwise()) 6154 6155 if self._match_text_seq("CYCLE"): 6156 this.set("cycle", True) 6157 elif self._match_text_seq("NO", "CYCLE"): 6158 this.set("cycle", False) 6159 6160 if not identity: 6161 this.set("expression", self._parse_range()) 6162 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6163 args = self._parse_csv(self._parse_bitwise) 6164 this.set("start", seq_get(args, 0)) 6165 this.set("increment", seq_get(args, 1)) 6166 6167 self._match_r_paren() 6168 6169 return this 6170 6171 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6172 self._match_text_seq("LENGTH") 6173 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6174 6175 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6176 if self._match_text_seq("NULL"): 6177 return self.expression(exp.NotNullColumnConstraint) 6178 if self._match_text_seq("CASESPECIFIC"): 6179 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6180 if self._match_text_seq("FOR", "REPLICATION"): 6181 return self.expression(exp.NotForReplicationColumnConstraint) 6182 6183 # Unconsume the `NOT` token 6184 self._retreat(self._index - 1) 6185 return None 6186 6187 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6188 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6189 6190 procedure_option_follows = ( 6191 self._match(TokenType.WITH, advance=False) 6192 and self._next 6193 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6194 ) 6195 6196 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6197 return self.expression( 6198 exp.ColumnConstraint, 6199 this=this, 6200 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6201 ) 6202 6203 return this 6204 6205 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6206 if not self._match(TokenType.CONSTRAINT): 6207 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6208 6209 return self.expression( 6210 exp.Constraint, 6211 this=self._parse_id_var(), 6212 expressions=self._parse_unnamed_constraints(), 6213 ) 6214 6215 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6216 constraints = [] 6217 while True: 6218 constraint = self._parse_unnamed_constraint() or self._parse_function() 6219 if not constraint: 6220 break 6221 constraints.append(constraint) 6222 6223 return constraints 6224 6225 def _parse_unnamed_constraint( 6226 self, constraints: t.Optional[t.Collection[str]] = None 6227 ) -> t.Optional[exp.Expression]: 6228 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6229 constraints or self.CONSTRAINT_PARSERS 6230 ): 6231 return None 6232 6233 constraint = self._prev.text.upper() 6234 if constraint not in self.CONSTRAINT_PARSERS: 6235 self.raise_error(f"No parser found for schema constraint {constraint}.") 6236 6237 return self.CONSTRAINT_PARSERS[constraint](self) 6238 6239 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6240 return self._parse_id_var(any_token=False) 6241 6242 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6243 self._match_texts(("KEY", "INDEX")) 6244 return self.expression( 6245 exp.UniqueColumnConstraint, 6246 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6247 this=self._parse_schema(self._parse_unique_key()), 6248 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6249 on_conflict=self._parse_on_conflict(), 6250 options=self._parse_key_constraint_options(), 6251 ) 6252 6253 def _parse_key_constraint_options(self) -> t.List[str]: 6254 options = [] 6255 while True: 6256 if not self._curr: 6257 break 6258 6259 if self._match(TokenType.ON): 6260 action = None 6261 on = self._advance_any() and self._prev.text 6262 6263 if self._match_text_seq("NO", "ACTION"): 6264 action = "NO ACTION" 6265 elif self._match_text_seq("CASCADE"): 6266 action = "CASCADE" 6267 elif self._match_text_seq("RESTRICT"): 6268 action = "RESTRICT" 6269 elif self._match_pair(TokenType.SET, TokenType.NULL): 6270 action = "SET NULL" 6271 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6272 action = "SET DEFAULT" 6273 else: 6274 self.raise_error("Invalid key constraint") 6275 6276 options.append(f"ON {on} {action}") 6277 else: 6278 var = self._parse_var_from_options( 6279 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6280 ) 6281 if not var: 6282 break 6283 options.append(var.name) 6284 6285 return options 6286 6287 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6288 if match and not self._match(TokenType.REFERENCES): 6289 return None 6290 6291 expressions = None 6292 this = self._parse_table(schema=True) 6293 options = self._parse_key_constraint_options() 6294 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6295 6296 def _parse_foreign_key(self) -> exp.ForeignKey: 6297 expressions = ( 6298 self._parse_wrapped_id_vars() 6299 if not self._match(TokenType.REFERENCES, advance=False) 6300 else None 6301 ) 6302 reference = self._parse_references() 6303 on_options = {} 6304 6305 while self._match(TokenType.ON): 6306 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6307 self.raise_error("Expected DELETE or UPDATE") 6308 6309 kind = self._prev.text.lower() 6310 6311 if self._match_text_seq("NO", "ACTION"): 6312 action = "NO ACTION" 6313 elif self._match(TokenType.SET): 6314 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6315 action = "SET " + self._prev.text.upper() 6316 else: 6317 self._advance() 6318 action = self._prev.text.upper() 6319 6320 on_options[kind] = action 6321 6322 return self.expression( 6323 exp.ForeignKey, 6324 expressions=expressions, 6325 reference=reference, 6326 options=self._parse_key_constraint_options(), 6327 **on_options, # type: ignore 6328 ) 6329 6330 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6331 return self._parse_ordered() or self._parse_field() 6332 6333 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6334 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6335 self._retreat(self._index - 1) 6336 return None 6337 6338 id_vars = self._parse_wrapped_id_vars() 6339 return self.expression( 6340 exp.PeriodForSystemTimeConstraint, 6341 this=seq_get(id_vars, 0), 6342 expression=seq_get(id_vars, 1), 6343 ) 6344 6345 def _parse_primary_key( 6346 self, wrapped_optional: bool = False, in_props: bool = False 6347 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6348 desc = ( 6349 self._match_set((TokenType.ASC, TokenType.DESC)) 6350 and self._prev.token_type == TokenType.DESC 6351 ) 6352 6353 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6354 return self.expression( 6355 exp.PrimaryKeyColumnConstraint, 6356 desc=desc, 6357 options=self._parse_key_constraint_options(), 6358 ) 6359 6360 expressions = self._parse_wrapped_csv( 6361 self._parse_primary_key_part, optional=wrapped_optional 6362 ) 6363 6364 return self.expression( 6365 exp.PrimaryKey, 6366 expressions=expressions, 6367 include=self._parse_index_params(), 6368 options=self._parse_key_constraint_options(), 6369 ) 6370 6371 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6372 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6373 6374 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6375 """ 6376 Parses a datetime column in ODBC format. We parse the column into the corresponding 6377 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6378 same as we did for `DATE('yyyy-mm-dd')`. 6379 6380 Reference: 6381 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6382 """ 6383 self._match(TokenType.VAR) 6384 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6385 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6386 if not self._match(TokenType.R_BRACE): 6387 self.raise_error("Expected }") 6388 return expression 6389 6390 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6391 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6392 return this 6393 6394 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6395 map_token = seq_get(self._tokens, self._index - 2) 6396 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6397 else: 6398 parse_map = False 6399 6400 bracket_kind = self._prev.token_type 6401 if ( 6402 bracket_kind == TokenType.L_BRACE 6403 and self._curr 6404 and self._curr.token_type == TokenType.VAR 6405 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6406 ): 6407 return self._parse_odbc_datetime_literal() 6408 6409 expressions = self._parse_csv( 6410 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6411 ) 6412 6413 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6414 self.raise_error("Expected ]") 6415 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6416 self.raise_error("Expected }") 6417 6418 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6419 if bracket_kind == TokenType.L_BRACE: 6420 this = self.expression( 6421 exp.Struct, 6422 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6423 ) 6424 elif not this: 6425 this = build_array_constructor( 6426 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6427 ) 6428 else: 6429 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6430 if constructor_type: 6431 return build_array_constructor( 6432 constructor_type, 6433 args=expressions, 6434 bracket_kind=bracket_kind, 6435 dialect=self.dialect, 6436 ) 6437 6438 expressions = apply_index_offset( 6439 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6440 ) 6441 this = self.expression( 6442 exp.Bracket, 6443 this=this, 6444 expressions=expressions, 6445 comments=this.pop_comments(), 6446 ) 6447 6448 self._add_comments(this) 6449 return self._parse_bracket(this) 6450 6451 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6452 if self._match(TokenType.COLON): 6453 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6454 return this 6455 6456 def _parse_case(self) -> t.Optional[exp.Expression]: 6457 ifs = [] 6458 default = None 6459 6460 comments = self._prev_comments 6461 expression = self._parse_assignment() 6462 6463 while self._match(TokenType.WHEN): 6464 this = self._parse_assignment() 6465 self._match(TokenType.THEN) 6466 then = self._parse_assignment() 6467 ifs.append(self.expression(exp.If, this=this, true=then)) 6468 6469 if self._match(TokenType.ELSE): 6470 default = self._parse_assignment() 6471 6472 if not self._match(TokenType.END): 6473 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6474 default = exp.column("interval") 6475 else: 6476 self.raise_error("Expected END after CASE", self._prev) 6477 6478 return self.expression( 6479 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6480 ) 6481 6482 def _parse_if(self) -> t.Optional[exp.Expression]: 6483 if self._match(TokenType.L_PAREN): 6484 args = self._parse_csv( 6485 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6486 ) 6487 this = self.validate_expression(exp.If.from_arg_list(args), args) 6488 self._match_r_paren() 6489 else: 6490 index = self._index - 1 6491 6492 if self.NO_PAREN_IF_COMMANDS and index == 0: 6493 return self._parse_as_command(self._prev) 6494 6495 condition = self._parse_assignment() 6496 6497 if not condition: 6498 self._retreat(index) 6499 return None 6500 6501 self._match(TokenType.THEN) 6502 true = self._parse_assignment() 6503 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6504 self._match(TokenType.END) 6505 this = self.expression(exp.If, this=condition, true=true, false=false) 6506 6507 return this 6508 6509 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6510 if not self._match_text_seq("VALUE", "FOR"): 6511 self._retreat(self._index - 1) 6512 return None 6513 6514 return self.expression( 6515 exp.NextValueFor, 6516 this=self._parse_column(), 6517 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6518 ) 6519 6520 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6521 this = self._parse_function() or self._parse_var_or_string(upper=True) 6522 6523 if self._match(TokenType.FROM): 6524 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6525 6526 if not self._match(TokenType.COMMA): 6527 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6528 6529 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6530 6531 def _parse_gap_fill(self) -> exp.GapFill: 6532 self._match(TokenType.TABLE) 6533 this = self._parse_table() 6534 6535 self._match(TokenType.COMMA) 6536 args = [this, *self._parse_csv(self._parse_lambda)] 6537 6538 gap_fill = exp.GapFill.from_arg_list(args) 6539 return self.validate_expression(gap_fill, args) 6540 6541 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6542 this = self._parse_assignment() 6543 6544 if not self._match(TokenType.ALIAS): 6545 if self._match(TokenType.COMMA): 6546 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6547 6548 self.raise_error("Expected AS after CAST") 6549 6550 fmt = None 6551 to = self._parse_types() 6552 6553 default = self._match(TokenType.DEFAULT) 6554 if default: 6555 default = self._parse_bitwise() 6556 self._match_text_seq("ON", "CONVERSION", "ERROR") 6557 6558 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6559 fmt_string = self._parse_string() 6560 fmt = self._parse_at_time_zone(fmt_string) 6561 6562 if not to: 6563 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6564 if to.this in exp.DataType.TEMPORAL_TYPES: 6565 this = self.expression( 6566 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6567 this=this, 6568 format=exp.Literal.string( 6569 format_time( 6570 fmt_string.this if fmt_string else "", 6571 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6572 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6573 ) 6574 ), 6575 safe=safe, 6576 ) 6577 6578 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6579 this.set("zone", fmt.args["zone"]) 6580 return this 6581 elif not to: 6582 self.raise_error("Expected TYPE after CAST") 6583 elif isinstance(to, exp.Identifier): 6584 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6585 elif to.this == exp.DataType.Type.CHAR: 6586 if self._match(TokenType.CHARACTER_SET): 6587 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6588 6589 return self.build_cast( 6590 strict=strict, 6591 this=this, 6592 to=to, 6593 format=fmt, 6594 safe=safe, 6595 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6596 default=default, 6597 ) 6598 6599 def _parse_string_agg(self) -> exp.GroupConcat: 6600 if self._match(TokenType.DISTINCT): 6601 args: t.List[t.Optional[exp.Expression]] = [ 6602 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6603 ] 6604 if self._match(TokenType.COMMA): 6605 args.extend(self._parse_csv(self._parse_assignment)) 6606 else: 6607 args = self._parse_csv(self._parse_assignment) # type: ignore 6608 6609 if self._match_text_seq("ON", "OVERFLOW"): 6610 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6611 if self._match_text_seq("ERROR"): 6612 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6613 else: 6614 self._match_text_seq("TRUNCATE") 6615 on_overflow = self.expression( 6616 exp.OverflowTruncateBehavior, 6617 this=self._parse_string(), 6618 with_count=( 6619 self._match_text_seq("WITH", "COUNT") 6620 or not self._match_text_seq("WITHOUT", "COUNT") 6621 ), 6622 ) 6623 else: 6624 on_overflow = None 6625 6626 index = self._index 6627 if not self._match(TokenType.R_PAREN) and args: 6628 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6629 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6630 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6631 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6632 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6633 6634 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6635 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6636 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6637 if not self._match_text_seq("WITHIN", "GROUP"): 6638 self._retreat(index) 6639 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6640 6641 # The corresponding match_r_paren will be called in parse_function (caller) 6642 self._match_l_paren() 6643 6644 return self.expression( 6645 exp.GroupConcat, 6646 this=self._parse_order(this=seq_get(args, 0)), 6647 separator=seq_get(args, 1), 6648 on_overflow=on_overflow, 6649 ) 6650 6651 def _parse_convert( 6652 self, strict: bool, safe: t.Optional[bool] = None 6653 ) -> t.Optional[exp.Expression]: 6654 this = self._parse_bitwise() 6655 6656 if self._match(TokenType.USING): 6657 to: t.Optional[exp.Expression] = self.expression( 6658 exp.CharacterSet, this=self._parse_var() 6659 ) 6660 elif self._match(TokenType.COMMA): 6661 to = self._parse_types() 6662 else: 6663 to = None 6664 6665 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6666 6667 def _parse_xml_table(self) -> exp.XMLTable: 6668 namespaces = None 6669 passing = None 6670 columns = None 6671 6672 if self._match_text_seq("XMLNAMESPACES", "("): 6673 namespaces = self._parse_xml_namespace() 6674 self._match_text_seq(")", ",") 6675 6676 this = self._parse_string() 6677 6678 if self._match_text_seq("PASSING"): 6679 # The BY VALUE keywords are optional and are provided for semantic clarity 6680 self._match_text_seq("BY", "VALUE") 6681 passing = self._parse_csv(self._parse_column) 6682 6683 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6684 6685 if self._match_text_seq("COLUMNS"): 6686 columns = self._parse_csv(self._parse_field_def) 6687 6688 return self.expression( 6689 exp.XMLTable, 6690 this=this, 6691 namespaces=namespaces, 6692 passing=passing, 6693 columns=columns, 6694 by_ref=by_ref, 6695 ) 6696 6697 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6698 namespaces = [] 6699 6700 while True: 6701 if self._match(TokenType.DEFAULT): 6702 uri = self._parse_string() 6703 else: 6704 uri = self._parse_alias(self._parse_string()) 6705 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6706 if not self._match(TokenType.COMMA): 6707 break 6708 6709 return namespaces 6710 6711 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6712 args = self._parse_csv(self._parse_assignment) 6713 6714 if len(args) < 3: 6715 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6716 6717 return self.expression(exp.DecodeCase, expressions=args) 6718 6719 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6720 self._match_text_seq("KEY") 6721 key = self._parse_column() 6722 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6723 self._match_text_seq("VALUE") 6724 value = self._parse_bitwise() 6725 6726 if not key and not value: 6727 return None 6728 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6729 6730 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6731 if not this or not self._match_text_seq("FORMAT", "JSON"): 6732 return this 6733 6734 return self.expression(exp.FormatJson, this=this) 6735 6736 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6737 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6738 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6739 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6740 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6741 else: 6742 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6743 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6744 6745 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6746 6747 if not empty and not error and not null: 6748 return None 6749 6750 return self.expression( 6751 exp.OnCondition, 6752 empty=empty, 6753 error=error, 6754 null=null, 6755 ) 6756 6757 def _parse_on_handling( 6758 self, on: str, *values: str 6759 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6760 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6761 for value in values: 6762 if self._match_text_seq(value, "ON", on): 6763 return f"{value} ON {on}" 6764 6765 index = self._index 6766 if self._match(TokenType.DEFAULT): 6767 default_value = self._parse_bitwise() 6768 if self._match_text_seq("ON", on): 6769 return default_value 6770 6771 self._retreat(index) 6772 6773 return None 6774 6775 @t.overload 6776 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6777 6778 @t.overload 6779 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6780 6781 def _parse_json_object(self, agg=False): 6782 star = self._parse_star() 6783 expressions = ( 6784 [star] 6785 if star 6786 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6787 ) 6788 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6789 6790 unique_keys = None 6791 if self._match_text_seq("WITH", "UNIQUE"): 6792 unique_keys = True 6793 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6794 unique_keys = False 6795 6796 self._match_text_seq("KEYS") 6797 6798 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6799 self._parse_type() 6800 ) 6801 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6802 6803 return self.expression( 6804 exp.JSONObjectAgg if agg else exp.JSONObject, 6805 expressions=expressions, 6806 null_handling=null_handling, 6807 unique_keys=unique_keys, 6808 return_type=return_type, 6809 encoding=encoding, 6810 ) 6811 6812 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6813 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6814 if not self._match_text_seq("NESTED"): 6815 this = self._parse_id_var() 6816 kind = self._parse_types(allow_identifiers=False) 6817 nested = None 6818 else: 6819 this = None 6820 kind = None 6821 nested = True 6822 6823 path = self._match_text_seq("PATH") and self._parse_string() 6824 nested_schema = nested and self._parse_json_schema() 6825 6826 return self.expression( 6827 exp.JSONColumnDef, 6828 this=this, 6829 kind=kind, 6830 path=path, 6831 nested_schema=nested_schema, 6832 ) 6833 6834 def _parse_json_schema(self) -> exp.JSONSchema: 6835 self._match_text_seq("COLUMNS") 6836 return self.expression( 6837 exp.JSONSchema, 6838 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6839 ) 6840 6841 def _parse_json_table(self) -> exp.JSONTable: 6842 this = self._parse_format_json(self._parse_bitwise()) 6843 path = self._match(TokenType.COMMA) and self._parse_string() 6844 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6845 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6846 schema = self._parse_json_schema() 6847 6848 return exp.JSONTable( 6849 this=this, 6850 schema=schema, 6851 path=path, 6852 error_handling=error_handling, 6853 empty_handling=empty_handling, 6854 ) 6855 6856 def _parse_match_against(self) -> exp.MatchAgainst: 6857 expressions = self._parse_csv(self._parse_column) 6858 6859 self._match_text_seq(")", "AGAINST", "(") 6860 6861 this = self._parse_string() 6862 6863 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6864 modifier = "IN NATURAL LANGUAGE MODE" 6865 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6866 modifier = f"{modifier} WITH QUERY EXPANSION" 6867 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6868 modifier = "IN BOOLEAN MODE" 6869 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6870 modifier = "WITH QUERY EXPANSION" 6871 else: 6872 modifier = None 6873 6874 return self.expression( 6875 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6876 ) 6877 6878 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6879 def _parse_open_json(self) -> exp.OpenJSON: 6880 this = self._parse_bitwise() 6881 path = self._match(TokenType.COMMA) and self._parse_string() 6882 6883 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6884 this = self._parse_field(any_token=True) 6885 kind = self._parse_types() 6886 path = self._parse_string() 6887 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6888 6889 return self.expression( 6890 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6891 ) 6892 6893 expressions = None 6894 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6895 self._match_l_paren() 6896 expressions = self._parse_csv(_parse_open_json_column_def) 6897 6898 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6899 6900 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6901 args = self._parse_csv(self._parse_bitwise) 6902 6903 if self._match(TokenType.IN): 6904 return self.expression( 6905 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6906 ) 6907 6908 if haystack_first: 6909 haystack = seq_get(args, 0) 6910 needle = seq_get(args, 1) 6911 else: 6912 haystack = seq_get(args, 1) 6913 needle = seq_get(args, 0) 6914 6915 return self.expression( 6916 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6917 ) 6918 6919 def _parse_predict(self) -> exp.Predict: 6920 self._match_text_seq("MODEL") 6921 this = self._parse_table() 6922 6923 self._match(TokenType.COMMA) 6924 self._match_text_seq("TABLE") 6925 6926 return self.expression( 6927 exp.Predict, 6928 this=this, 6929 expression=self._parse_table(), 6930 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6931 ) 6932 6933 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6934 args = self._parse_csv(self._parse_table) 6935 return exp.JoinHint(this=func_name.upper(), expressions=args) 6936 6937 def _parse_substring(self) -> exp.Substring: 6938 # Postgres supports the form: substring(string [from int] [for int]) 6939 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6940 6941 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6942 6943 if self._match(TokenType.FROM): 6944 args.append(self._parse_bitwise()) 6945 if self._match(TokenType.FOR): 6946 if len(args) == 1: 6947 args.append(exp.Literal.number(1)) 6948 args.append(self._parse_bitwise()) 6949 6950 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6951 6952 def _parse_trim(self) -> exp.Trim: 6953 # https://www.w3resource.com/sql/character-functions/trim.php 6954 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6955 6956 position = None 6957 collation = None 6958 expression = None 6959 6960 if self._match_texts(self.TRIM_TYPES): 6961 position = self._prev.text.upper() 6962 6963 this = self._parse_bitwise() 6964 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6965 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6966 expression = self._parse_bitwise() 6967 6968 if invert_order: 6969 this, expression = expression, this 6970 6971 if self._match(TokenType.COLLATE): 6972 collation = self._parse_bitwise() 6973 6974 return self.expression( 6975 exp.Trim, this=this, position=position, expression=expression, collation=collation 6976 ) 6977 6978 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6979 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6980 6981 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6982 return self._parse_window(self._parse_id_var(), alias=True) 6983 6984 def _parse_respect_or_ignore_nulls( 6985 self, this: t.Optional[exp.Expression] 6986 ) -> t.Optional[exp.Expression]: 6987 if self._match_text_seq("IGNORE", "NULLS"): 6988 return self.expression(exp.IgnoreNulls, this=this) 6989 if self._match_text_seq("RESPECT", "NULLS"): 6990 return self.expression(exp.RespectNulls, this=this) 6991 return this 6992 6993 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6994 if self._match(TokenType.HAVING): 6995 self._match_texts(("MAX", "MIN")) 6996 max = self._prev.text.upper() != "MIN" 6997 return self.expression( 6998 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6999 ) 7000 7001 return this 7002 7003 def _parse_window( 7004 self, this: t.Optional[exp.Expression], alias: bool = False 7005 ) -> t.Optional[exp.Expression]: 7006 func = this 7007 comments = func.comments if isinstance(func, exp.Expression) else None 7008 7009 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7010 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7011 if self._match_text_seq("WITHIN", "GROUP"): 7012 order = self._parse_wrapped(self._parse_order) 7013 this = self.expression(exp.WithinGroup, this=this, expression=order) 7014 7015 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7016 self._match(TokenType.WHERE) 7017 this = self.expression( 7018 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7019 ) 7020 self._match_r_paren() 7021 7022 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7023 # Some dialects choose to implement and some do not. 7024 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7025 7026 # There is some code above in _parse_lambda that handles 7027 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7028 7029 # The below changes handle 7030 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7031 7032 # Oracle allows both formats 7033 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7034 # and Snowflake chose to do the same for familiarity 7035 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7036 if isinstance(this, exp.AggFunc): 7037 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7038 7039 if ignore_respect and ignore_respect is not this: 7040 ignore_respect.replace(ignore_respect.this) 7041 this = self.expression(ignore_respect.__class__, this=this) 7042 7043 this = self._parse_respect_or_ignore_nulls(this) 7044 7045 # bigquery select from window x AS (partition by ...) 7046 if alias: 7047 over = None 7048 self._match(TokenType.ALIAS) 7049 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7050 return this 7051 else: 7052 over = self._prev.text.upper() 7053 7054 if comments and isinstance(func, exp.Expression): 7055 func.pop_comments() 7056 7057 if not self._match(TokenType.L_PAREN): 7058 return self.expression( 7059 exp.Window, 7060 comments=comments, 7061 this=this, 7062 alias=self._parse_id_var(False), 7063 over=over, 7064 ) 7065 7066 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7067 7068 first = self._match(TokenType.FIRST) 7069 if self._match_text_seq("LAST"): 7070 first = False 7071 7072 partition, order = self._parse_partition_and_order() 7073 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7074 7075 if kind: 7076 self._match(TokenType.BETWEEN) 7077 start = self._parse_window_spec() 7078 self._match(TokenType.AND) 7079 end = self._parse_window_spec() 7080 exclude = ( 7081 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7082 if self._match_text_seq("EXCLUDE") 7083 else None 7084 ) 7085 7086 spec = self.expression( 7087 exp.WindowSpec, 7088 kind=kind, 7089 start=start["value"], 7090 start_side=start["side"], 7091 end=end["value"], 7092 end_side=end["side"], 7093 exclude=exclude, 7094 ) 7095 else: 7096 spec = None 7097 7098 self._match_r_paren() 7099 7100 window = self.expression( 7101 exp.Window, 7102 comments=comments, 7103 this=this, 7104 partition_by=partition, 7105 order=order, 7106 spec=spec, 7107 alias=window_alias, 7108 over=over, 7109 first=first, 7110 ) 7111 7112 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7113 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7114 return self._parse_window(window, alias=alias) 7115 7116 return window 7117 7118 def _parse_partition_and_order( 7119 self, 7120 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7121 return self._parse_partition_by(), self._parse_order() 7122 7123 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7124 self._match(TokenType.BETWEEN) 7125 7126 return { 7127 "value": ( 7128 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7129 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7130 or self._parse_bitwise() 7131 ), 7132 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7133 } 7134 7135 def _parse_alias( 7136 self, this: t.Optional[exp.Expression], explicit: bool = False 7137 ) -> t.Optional[exp.Expression]: 7138 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7139 # so this section tries to parse the clause version and if it fails, it treats the token 7140 # as an identifier (alias) 7141 if self._can_parse_limit_or_offset(): 7142 return this 7143 7144 any_token = self._match(TokenType.ALIAS) 7145 comments = self._prev_comments or [] 7146 7147 if explicit and not any_token: 7148 return this 7149 7150 if self._match(TokenType.L_PAREN): 7151 aliases = self.expression( 7152 exp.Aliases, 7153 comments=comments, 7154 this=this, 7155 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7156 ) 7157 self._match_r_paren(aliases) 7158 return aliases 7159 7160 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7161 self.STRING_ALIASES and self._parse_string_as_identifier() 7162 ) 7163 7164 if alias: 7165 comments.extend(alias.pop_comments()) 7166 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7167 column = this.this 7168 7169 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7170 if not this.comments and column and column.comments: 7171 this.comments = column.pop_comments() 7172 7173 return this 7174 7175 def _parse_id_var( 7176 self, 7177 any_token: bool = True, 7178 tokens: t.Optional[t.Collection[TokenType]] = None, 7179 ) -> t.Optional[exp.Expression]: 7180 expression = self._parse_identifier() 7181 if not expression and ( 7182 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7183 ): 7184 quoted = self._prev.token_type == TokenType.STRING 7185 expression = self._identifier_expression(quoted=quoted) 7186 7187 return expression 7188 7189 def _parse_string(self) -> t.Optional[exp.Expression]: 7190 if self._match_set(self.STRING_PARSERS): 7191 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7192 return self._parse_placeholder() 7193 7194 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7195 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7196 if output: 7197 output.update_positions(self._prev) 7198 return output 7199 7200 def _parse_number(self) -> t.Optional[exp.Expression]: 7201 if self._match_set(self.NUMERIC_PARSERS): 7202 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7203 return self._parse_placeholder() 7204 7205 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7206 if self._match(TokenType.IDENTIFIER): 7207 return self._identifier_expression(quoted=True) 7208 return self._parse_placeholder() 7209 7210 def _parse_var( 7211 self, 7212 any_token: bool = False, 7213 tokens: t.Optional[t.Collection[TokenType]] = None, 7214 upper: bool = False, 7215 ) -> t.Optional[exp.Expression]: 7216 if ( 7217 (any_token and self._advance_any()) 7218 or self._match(TokenType.VAR) 7219 or (self._match_set(tokens) if tokens else False) 7220 ): 7221 return self.expression( 7222 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7223 ) 7224 return self._parse_placeholder() 7225 7226 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7227 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7228 self._advance() 7229 return self._prev 7230 return None 7231 7232 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7233 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7234 7235 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7236 return self._parse_primary() or self._parse_var(any_token=True) 7237 7238 def _parse_null(self) -> t.Optional[exp.Expression]: 7239 if self._match_set(self.NULL_TOKENS): 7240 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7241 return self._parse_placeholder() 7242 7243 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7244 if self._match(TokenType.TRUE): 7245 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7246 if self._match(TokenType.FALSE): 7247 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7248 return self._parse_placeholder() 7249 7250 def _parse_star(self) -> t.Optional[exp.Expression]: 7251 if self._match(TokenType.STAR): 7252 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7253 return self._parse_placeholder() 7254 7255 def _parse_parameter(self) -> exp.Parameter: 7256 this = self._parse_identifier() or self._parse_primary_or_var() 7257 return self.expression(exp.Parameter, this=this) 7258 7259 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7260 if self._match_set(self.PLACEHOLDER_PARSERS): 7261 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7262 if placeholder: 7263 return placeholder 7264 self._advance(-1) 7265 return None 7266 7267 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7268 if not self._match_texts(keywords): 7269 return None 7270 if self._match(TokenType.L_PAREN, advance=False): 7271 return self._parse_wrapped_csv(self._parse_expression) 7272 7273 expression = self._parse_expression() 7274 return [expression] if expression else None 7275 7276 def _parse_csv( 7277 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7278 ) -> t.List[exp.Expression]: 7279 parse_result = parse_method() 7280 items = [parse_result] if parse_result is not None else [] 7281 7282 while self._match(sep): 7283 self._add_comments(parse_result) 7284 parse_result = parse_method() 7285 if parse_result is not None: 7286 items.append(parse_result) 7287 7288 return items 7289 7290 def _parse_tokens( 7291 self, parse_method: t.Callable, expressions: t.Dict 7292 ) -> t.Optional[exp.Expression]: 7293 this = parse_method() 7294 7295 while self._match_set(expressions): 7296 this = self.expression( 7297 expressions[self._prev.token_type], 7298 this=this, 7299 comments=self._prev_comments, 7300 expression=parse_method(), 7301 ) 7302 7303 return this 7304 7305 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7306 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7307 7308 def _parse_wrapped_csv( 7309 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7310 ) -> t.List[exp.Expression]: 7311 return self._parse_wrapped( 7312 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7313 ) 7314 7315 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7316 wrapped = self._match(TokenType.L_PAREN) 7317 if not wrapped and not optional: 7318 self.raise_error("Expecting (") 7319 parse_result = parse_method() 7320 if wrapped: 7321 self._match_r_paren() 7322 return parse_result 7323 7324 def _parse_expressions(self) -> t.List[exp.Expression]: 7325 return self._parse_csv(self._parse_expression) 7326 7327 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7328 return self._parse_select() or self._parse_set_operations( 7329 self._parse_alias(self._parse_assignment(), explicit=True) 7330 if alias 7331 else self._parse_assignment() 7332 ) 7333 7334 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7335 return self._parse_query_modifiers( 7336 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7337 ) 7338 7339 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7340 this = None 7341 if self._match_texts(self.TRANSACTION_KIND): 7342 this = self._prev.text 7343 7344 self._match_texts(("TRANSACTION", "WORK")) 7345 7346 modes = [] 7347 while True: 7348 mode = [] 7349 while self._match(TokenType.VAR): 7350 mode.append(self._prev.text) 7351 7352 if mode: 7353 modes.append(" ".join(mode)) 7354 if not self._match(TokenType.COMMA): 7355 break 7356 7357 return self.expression(exp.Transaction, this=this, modes=modes) 7358 7359 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7360 chain = None 7361 savepoint = None 7362 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7363 7364 self._match_texts(("TRANSACTION", "WORK")) 7365 7366 if self._match_text_seq("TO"): 7367 self._match_text_seq("SAVEPOINT") 7368 savepoint = self._parse_id_var() 7369 7370 if self._match(TokenType.AND): 7371 chain = not self._match_text_seq("NO") 7372 self._match_text_seq("CHAIN") 7373 7374 if is_rollback: 7375 return self.expression(exp.Rollback, savepoint=savepoint) 7376 7377 return self.expression(exp.Commit, chain=chain) 7378 7379 def _parse_refresh(self) -> exp.Refresh: 7380 self._match(TokenType.TABLE) 7381 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7382 7383 def _parse_column_def_with_exists(self): 7384 start = self._index 7385 self._match(TokenType.COLUMN) 7386 7387 exists_column = self._parse_exists(not_=True) 7388 expression = self._parse_field_def() 7389 7390 if not isinstance(expression, exp.ColumnDef): 7391 self._retreat(start) 7392 return None 7393 7394 expression.set("exists", exists_column) 7395 7396 return expression 7397 7398 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7399 if not self._prev.text.upper() == "ADD": 7400 return None 7401 7402 expression = self._parse_column_def_with_exists() 7403 if not expression: 7404 return None 7405 7406 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7407 if self._match_texts(("FIRST", "AFTER")): 7408 position = self._prev.text 7409 column_position = self.expression( 7410 exp.ColumnPosition, this=self._parse_column(), position=position 7411 ) 7412 expression.set("position", column_position) 7413 7414 return expression 7415 7416 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7417 drop = self._match(TokenType.DROP) and self._parse_drop() 7418 if drop and not isinstance(drop, exp.Command): 7419 drop.set("kind", drop.args.get("kind", "COLUMN")) 7420 return drop 7421 7422 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7423 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7424 return self.expression( 7425 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7426 ) 7427 7428 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7429 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7430 self._match_text_seq("ADD") 7431 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7432 return self.expression( 7433 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7434 ) 7435 7436 column_def = self._parse_add_column() 7437 if isinstance(column_def, exp.ColumnDef): 7438 return column_def 7439 7440 exists = self._parse_exists(not_=True) 7441 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7442 return self.expression( 7443 exp.AddPartition, 7444 exists=exists, 7445 this=self._parse_field(any_token=True), 7446 location=self._match_text_seq("LOCATION", advance=False) 7447 and self._parse_property(), 7448 ) 7449 7450 return None 7451 7452 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7453 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7454 or self._match_text_seq("COLUMNS") 7455 ): 7456 schema = self._parse_schema() 7457 7458 return ( 7459 ensure_list(schema) 7460 if schema 7461 else self._parse_csv(self._parse_column_def_with_exists) 7462 ) 7463 7464 return self._parse_csv(_parse_add_alteration) 7465 7466 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7467 if self._match_texts(self.ALTER_ALTER_PARSERS): 7468 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7469 7470 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7471 # keyword after ALTER we default to parsing this statement 7472 self._match(TokenType.COLUMN) 7473 column = self._parse_field(any_token=True) 7474 7475 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7476 return self.expression(exp.AlterColumn, this=column, drop=True) 7477 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7478 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7479 if self._match(TokenType.COMMENT): 7480 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7481 if self._match_text_seq("DROP", "NOT", "NULL"): 7482 return self.expression( 7483 exp.AlterColumn, 7484 this=column, 7485 drop=True, 7486 allow_null=True, 7487 ) 7488 if self._match_text_seq("SET", "NOT", "NULL"): 7489 return self.expression( 7490 exp.AlterColumn, 7491 this=column, 7492 allow_null=False, 7493 ) 7494 7495 if self._match_text_seq("SET", "VISIBLE"): 7496 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7497 if self._match_text_seq("SET", "INVISIBLE"): 7498 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7499 7500 self._match_text_seq("SET", "DATA") 7501 self._match_text_seq("TYPE") 7502 return self.expression( 7503 exp.AlterColumn, 7504 this=column, 7505 dtype=self._parse_types(), 7506 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7507 using=self._match(TokenType.USING) and self._parse_assignment(), 7508 ) 7509 7510 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7511 if self._match_texts(("ALL", "EVEN", "AUTO")): 7512 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7513 7514 self._match_text_seq("KEY", "DISTKEY") 7515 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7516 7517 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7518 if compound: 7519 self._match_text_seq("SORTKEY") 7520 7521 if self._match(TokenType.L_PAREN, advance=False): 7522 return self.expression( 7523 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7524 ) 7525 7526 self._match_texts(("AUTO", "NONE")) 7527 return self.expression( 7528 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7529 ) 7530 7531 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7532 index = self._index - 1 7533 7534 partition_exists = self._parse_exists() 7535 if self._match(TokenType.PARTITION, advance=False): 7536 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7537 7538 self._retreat(index) 7539 return self._parse_csv(self._parse_drop_column) 7540 7541 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7542 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7543 exists = self._parse_exists() 7544 old_column = self._parse_column() 7545 to = self._match_text_seq("TO") 7546 new_column = self._parse_column() 7547 7548 if old_column is None or to is None or new_column is None: 7549 return None 7550 7551 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7552 7553 self._match_text_seq("TO") 7554 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7555 7556 def _parse_alter_table_set(self) -> exp.AlterSet: 7557 alter_set = self.expression(exp.AlterSet) 7558 7559 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7560 "TABLE", "PROPERTIES" 7561 ): 7562 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7563 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7564 alter_set.set("expressions", [self._parse_assignment()]) 7565 elif self._match_texts(("LOGGED", "UNLOGGED")): 7566 alter_set.set("option", exp.var(self._prev.text.upper())) 7567 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7568 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7569 elif self._match_text_seq("LOCATION"): 7570 alter_set.set("location", self._parse_field()) 7571 elif self._match_text_seq("ACCESS", "METHOD"): 7572 alter_set.set("access_method", self._parse_field()) 7573 elif self._match_text_seq("TABLESPACE"): 7574 alter_set.set("tablespace", self._parse_field()) 7575 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7576 alter_set.set("file_format", [self._parse_field()]) 7577 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7578 alter_set.set("file_format", self._parse_wrapped_options()) 7579 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7580 alter_set.set("copy_options", self._parse_wrapped_options()) 7581 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7582 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7583 else: 7584 if self._match_text_seq("SERDE"): 7585 alter_set.set("serde", self._parse_field()) 7586 7587 properties = self._parse_wrapped(self._parse_properties, optional=True) 7588 alter_set.set("expressions", [properties]) 7589 7590 return alter_set 7591 7592 def _parse_alter(self) -> exp.Alter | exp.Command: 7593 start = self._prev 7594 7595 alter_token = self._match_set(self.ALTERABLES) and self._prev 7596 if not alter_token: 7597 return self._parse_as_command(start) 7598 7599 exists = self._parse_exists() 7600 only = self._match_text_seq("ONLY") 7601 this = self._parse_table(schema=True) 7602 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7603 7604 if self._next: 7605 self._advance() 7606 7607 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7608 if parser: 7609 actions = ensure_list(parser(self)) 7610 not_valid = self._match_text_seq("NOT", "VALID") 7611 options = self._parse_csv(self._parse_property) 7612 7613 if not self._curr and actions: 7614 return self.expression( 7615 exp.Alter, 7616 this=this, 7617 kind=alter_token.text.upper(), 7618 exists=exists, 7619 actions=actions, 7620 only=only, 7621 options=options, 7622 cluster=cluster, 7623 not_valid=not_valid, 7624 ) 7625 7626 return self._parse_as_command(start) 7627 7628 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7629 start = self._prev 7630 # https://duckdb.org/docs/sql/statements/analyze 7631 if not self._curr: 7632 return self.expression(exp.Analyze) 7633 7634 options = [] 7635 while self._match_texts(self.ANALYZE_STYLES): 7636 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7637 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7638 else: 7639 options.append(self._prev.text.upper()) 7640 7641 this: t.Optional[exp.Expression] = None 7642 inner_expression: t.Optional[exp.Expression] = None 7643 7644 kind = self._curr and self._curr.text.upper() 7645 7646 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7647 this = self._parse_table_parts() 7648 elif self._match_text_seq("TABLES"): 7649 if self._match_set((TokenType.FROM, TokenType.IN)): 7650 kind = f"{kind} {self._prev.text.upper()}" 7651 this = self._parse_table(schema=True, is_db_reference=True) 7652 elif self._match_text_seq("DATABASE"): 7653 this = self._parse_table(schema=True, is_db_reference=True) 7654 elif self._match_text_seq("CLUSTER"): 7655 this = self._parse_table() 7656 # Try matching inner expr keywords before fallback to parse table. 7657 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7658 kind = None 7659 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7660 else: 7661 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7662 kind = None 7663 this = self._parse_table_parts() 7664 7665 partition = self._try_parse(self._parse_partition) 7666 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7667 return self._parse_as_command(start) 7668 7669 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7670 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7671 "WITH", "ASYNC", "MODE" 7672 ): 7673 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7674 else: 7675 mode = None 7676 7677 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7678 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7679 7680 properties = self._parse_properties() 7681 return self.expression( 7682 exp.Analyze, 7683 kind=kind, 7684 this=this, 7685 mode=mode, 7686 partition=partition, 7687 properties=properties, 7688 expression=inner_expression, 7689 options=options, 7690 ) 7691 7692 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7693 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7694 this = None 7695 kind = self._prev.text.upper() 7696 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7697 expressions = [] 7698 7699 if not self._match_text_seq("STATISTICS"): 7700 self.raise_error("Expecting token STATISTICS") 7701 7702 if self._match_text_seq("NOSCAN"): 7703 this = "NOSCAN" 7704 elif self._match(TokenType.FOR): 7705 if self._match_text_seq("ALL", "COLUMNS"): 7706 this = "FOR ALL COLUMNS" 7707 if self._match_texts("COLUMNS"): 7708 this = "FOR COLUMNS" 7709 expressions = self._parse_csv(self._parse_column_reference) 7710 elif self._match_text_seq("SAMPLE"): 7711 sample = self._parse_number() 7712 expressions = [ 7713 self.expression( 7714 exp.AnalyzeSample, 7715 sample=sample, 7716 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7717 ) 7718 ] 7719 7720 return self.expression( 7721 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7722 ) 7723 7724 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7725 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7726 kind = None 7727 this = None 7728 expression: t.Optional[exp.Expression] = None 7729 if self._match_text_seq("REF", "UPDATE"): 7730 kind = "REF" 7731 this = "UPDATE" 7732 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7733 this = "UPDATE SET DANGLING TO NULL" 7734 elif self._match_text_seq("STRUCTURE"): 7735 kind = "STRUCTURE" 7736 if self._match_text_seq("CASCADE", "FAST"): 7737 this = "CASCADE FAST" 7738 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7739 ("ONLINE", "OFFLINE") 7740 ): 7741 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7742 expression = self._parse_into() 7743 7744 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7745 7746 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7747 this = self._prev.text.upper() 7748 if self._match_text_seq("COLUMNS"): 7749 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7750 return None 7751 7752 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7753 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7754 if self._match_text_seq("STATISTICS"): 7755 return self.expression(exp.AnalyzeDelete, kind=kind) 7756 return None 7757 7758 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7759 if self._match_text_seq("CHAINED", "ROWS"): 7760 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7761 return None 7762 7763 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7764 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7765 this = self._prev.text.upper() 7766 expression: t.Optional[exp.Expression] = None 7767 expressions = [] 7768 update_options = None 7769 7770 if self._match_text_seq("HISTOGRAM", "ON"): 7771 expressions = self._parse_csv(self._parse_column_reference) 7772 with_expressions = [] 7773 while self._match(TokenType.WITH): 7774 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7775 if self._match_texts(("SYNC", "ASYNC")): 7776 if self._match_text_seq("MODE", advance=False): 7777 with_expressions.append(f"{self._prev.text.upper()} MODE") 7778 self._advance() 7779 else: 7780 buckets = self._parse_number() 7781 if self._match_text_seq("BUCKETS"): 7782 with_expressions.append(f"{buckets} BUCKETS") 7783 if with_expressions: 7784 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7785 7786 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7787 TokenType.UPDATE, advance=False 7788 ): 7789 update_options = self._prev.text.upper() 7790 self._advance() 7791 elif self._match_text_seq("USING", "DATA"): 7792 expression = self.expression(exp.UsingData, this=self._parse_string()) 7793 7794 return self.expression( 7795 exp.AnalyzeHistogram, 7796 this=this, 7797 expressions=expressions, 7798 expression=expression, 7799 update_options=update_options, 7800 ) 7801 7802 def _parse_merge(self) -> exp.Merge: 7803 self._match(TokenType.INTO) 7804 target = self._parse_table() 7805 7806 if target and self._match(TokenType.ALIAS, advance=False): 7807 target.set("alias", self._parse_table_alias()) 7808 7809 self._match(TokenType.USING) 7810 using = self._parse_table() 7811 7812 self._match(TokenType.ON) 7813 on = self._parse_assignment() 7814 7815 return self.expression( 7816 exp.Merge, 7817 this=target, 7818 using=using, 7819 on=on, 7820 whens=self._parse_when_matched(), 7821 returning=self._parse_returning(), 7822 ) 7823 7824 def _parse_when_matched(self) -> exp.Whens: 7825 whens = [] 7826 7827 while self._match(TokenType.WHEN): 7828 matched = not self._match(TokenType.NOT) 7829 self._match_text_seq("MATCHED") 7830 source = ( 7831 False 7832 if self._match_text_seq("BY", "TARGET") 7833 else self._match_text_seq("BY", "SOURCE") 7834 ) 7835 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7836 7837 self._match(TokenType.THEN) 7838 7839 if self._match(TokenType.INSERT): 7840 this = self._parse_star() 7841 if this: 7842 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7843 else: 7844 then = self.expression( 7845 exp.Insert, 7846 this=exp.var("ROW") 7847 if self._match_text_seq("ROW") 7848 else self._parse_value(values=False), 7849 expression=self._match_text_seq("VALUES") and self._parse_value(), 7850 ) 7851 elif self._match(TokenType.UPDATE): 7852 expressions = self._parse_star() 7853 if expressions: 7854 then = self.expression(exp.Update, expressions=expressions) 7855 else: 7856 then = self.expression( 7857 exp.Update, 7858 expressions=self._match(TokenType.SET) 7859 and self._parse_csv(self._parse_equality), 7860 ) 7861 elif self._match(TokenType.DELETE): 7862 then = self.expression(exp.Var, this=self._prev.text) 7863 else: 7864 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7865 7866 whens.append( 7867 self.expression( 7868 exp.When, 7869 matched=matched, 7870 source=source, 7871 condition=condition, 7872 then=then, 7873 ) 7874 ) 7875 return self.expression(exp.Whens, expressions=whens) 7876 7877 def _parse_show(self) -> t.Optional[exp.Expression]: 7878 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7879 if parser: 7880 return parser(self) 7881 return self._parse_as_command(self._prev) 7882 7883 def _parse_set_item_assignment( 7884 self, kind: t.Optional[str] = None 7885 ) -> t.Optional[exp.Expression]: 7886 index = self._index 7887 7888 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7889 return self._parse_set_transaction(global_=kind == "GLOBAL") 7890 7891 left = self._parse_primary() or self._parse_column() 7892 assignment_delimiter = self._match_texts(("=", "TO")) 7893 7894 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7895 self._retreat(index) 7896 return None 7897 7898 right = self._parse_statement() or self._parse_id_var() 7899 if isinstance(right, (exp.Column, exp.Identifier)): 7900 right = exp.var(right.name) 7901 7902 this = self.expression(exp.EQ, this=left, expression=right) 7903 return self.expression(exp.SetItem, this=this, kind=kind) 7904 7905 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7906 self._match_text_seq("TRANSACTION") 7907 characteristics = self._parse_csv( 7908 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7909 ) 7910 return self.expression( 7911 exp.SetItem, 7912 expressions=characteristics, 7913 kind="TRANSACTION", 7914 **{"global": global_}, # type: ignore 7915 ) 7916 7917 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7918 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7919 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7920 7921 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7922 index = self._index 7923 set_ = self.expression( 7924 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7925 ) 7926 7927 if self._curr: 7928 self._retreat(index) 7929 return self._parse_as_command(self._prev) 7930 7931 return set_ 7932 7933 def _parse_var_from_options( 7934 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7935 ) -> t.Optional[exp.Var]: 7936 start = self._curr 7937 if not start: 7938 return None 7939 7940 option = start.text.upper() 7941 continuations = options.get(option) 7942 7943 index = self._index 7944 self._advance() 7945 for keywords in continuations or []: 7946 if isinstance(keywords, str): 7947 keywords = (keywords,) 7948 7949 if self._match_text_seq(*keywords): 7950 option = f"{option} {' '.join(keywords)}" 7951 break 7952 else: 7953 if continuations or continuations is None: 7954 if raise_unmatched: 7955 self.raise_error(f"Unknown option {option}") 7956 7957 self._retreat(index) 7958 return None 7959 7960 return exp.var(option) 7961 7962 def _parse_as_command(self, start: Token) -> exp.Command: 7963 while self._curr: 7964 self._advance() 7965 text = self._find_sql(start, self._prev) 7966 size = len(start.text) 7967 self._warn_unsupported() 7968 return exp.Command(this=text[:size], expression=text[size:]) 7969 7970 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7971 settings = [] 7972 7973 self._match_l_paren() 7974 kind = self._parse_id_var() 7975 7976 if self._match(TokenType.L_PAREN): 7977 while True: 7978 key = self._parse_id_var() 7979 value = self._parse_primary() 7980 if not key and value is None: 7981 break 7982 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7983 self._match(TokenType.R_PAREN) 7984 7985 self._match_r_paren() 7986 7987 return self.expression( 7988 exp.DictProperty, 7989 this=this, 7990 kind=kind.this if kind else None, 7991 settings=settings, 7992 ) 7993 7994 def _parse_dict_range(self, this: str) -> exp.DictRange: 7995 self._match_l_paren() 7996 has_min = self._match_text_seq("MIN") 7997 if has_min: 7998 min = self._parse_var() or self._parse_primary() 7999 self._match_text_seq("MAX") 8000 max = self._parse_var() or self._parse_primary() 8001 else: 8002 max = self._parse_var() or self._parse_primary() 8003 min = exp.Literal.number(0) 8004 self._match_r_paren() 8005 return self.expression(exp.DictRange, this=this, min=min, max=max) 8006 8007 def _parse_comprehension( 8008 self, this: t.Optional[exp.Expression] 8009 ) -> t.Optional[exp.Comprehension]: 8010 index = self._index 8011 expression = self._parse_column() 8012 if not self._match(TokenType.IN): 8013 self._retreat(index - 1) 8014 return None 8015 iterator = self._parse_column() 8016 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8017 return self.expression( 8018 exp.Comprehension, 8019 this=this, 8020 expression=expression, 8021 iterator=iterator, 8022 condition=condition, 8023 ) 8024 8025 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8026 if self._match(TokenType.HEREDOC_STRING): 8027 return self.expression(exp.Heredoc, this=self._prev.text) 8028 8029 if not self._match_text_seq("$"): 8030 return None 8031 8032 tags = ["$"] 8033 tag_text = None 8034 8035 if self._is_connected(): 8036 self._advance() 8037 tags.append(self._prev.text.upper()) 8038 else: 8039 self.raise_error("No closing $ found") 8040 8041 if tags[-1] != "$": 8042 if self._is_connected() and self._match_text_seq("$"): 8043 tag_text = tags[-1] 8044 tags.append("$") 8045 else: 8046 self.raise_error("No closing $ found") 8047 8048 heredoc_start = self._curr 8049 8050 while self._curr: 8051 if self._match_text_seq(*tags, advance=False): 8052 this = self._find_sql(heredoc_start, self._prev) 8053 self._advance(len(tags)) 8054 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8055 8056 self._advance() 8057 8058 self.raise_error(f"No closing {''.join(tags)} found") 8059 return None 8060 8061 def _find_parser( 8062 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8063 ) -> t.Optional[t.Callable]: 8064 if not self._curr: 8065 return None 8066 8067 index = self._index 8068 this = [] 8069 while True: 8070 # The current token might be multiple words 8071 curr = self._curr.text.upper() 8072 key = curr.split(" ") 8073 this.append(curr) 8074 8075 self._advance() 8076 result, trie = in_trie(trie, key) 8077 if result == TrieResult.FAILED: 8078 break 8079 8080 if result == TrieResult.EXISTS: 8081 subparser = parsers[" ".join(this)] 8082 return subparser 8083 8084 self._retreat(index) 8085 return None 8086 8087 def _match(self, token_type, advance=True, expression=None): 8088 if not self._curr: 8089 return None 8090 8091 if self._curr.token_type == token_type: 8092 if advance: 8093 self._advance() 8094 self._add_comments(expression) 8095 return True 8096 8097 return None 8098 8099 def _match_set(self, types, advance=True): 8100 if not self._curr: 8101 return None 8102 8103 if self._curr.token_type in types: 8104 if advance: 8105 self._advance() 8106 return True 8107 8108 return None 8109 8110 def _match_pair(self, token_type_a, token_type_b, advance=True): 8111 if not self._curr or not self._next: 8112 return None 8113 8114 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8115 if advance: 8116 self._advance(2) 8117 return True 8118 8119 return None 8120 8121 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8122 if not self._match(TokenType.L_PAREN, expression=expression): 8123 self.raise_error("Expecting (") 8124 8125 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8126 if not self._match(TokenType.R_PAREN, expression=expression): 8127 self.raise_error("Expecting )") 8128 8129 def _match_texts(self, texts, advance=True): 8130 if ( 8131 self._curr 8132 and self._curr.token_type != TokenType.STRING 8133 and self._curr.text.upper() in texts 8134 ): 8135 if advance: 8136 self._advance() 8137 return True 8138 return None 8139 8140 def _match_text_seq(self, *texts, advance=True): 8141 index = self._index 8142 for text in texts: 8143 if ( 8144 self._curr 8145 and self._curr.token_type != TokenType.STRING 8146 and self._curr.text.upper() == text 8147 ): 8148 self._advance() 8149 else: 8150 self._retreat(index) 8151 return None 8152 8153 if not advance: 8154 self._retreat(index) 8155 8156 return True 8157 8158 def _replace_lambda( 8159 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8160 ) -> t.Optional[exp.Expression]: 8161 if not node: 8162 return node 8163 8164 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8165 8166 for column in node.find_all(exp.Column): 8167 typ = lambda_types.get(column.parts[0].name) 8168 if typ is not None: 8169 dot_or_id = column.to_dot() if column.table else column.this 8170 8171 if typ: 8172 dot_or_id = self.expression( 8173 exp.Cast, 8174 this=dot_or_id, 8175 to=typ, 8176 ) 8177 8178 parent = column.parent 8179 8180 while isinstance(parent, exp.Dot): 8181 if not isinstance(parent.parent, exp.Dot): 8182 parent.replace(dot_or_id) 8183 break 8184 parent = parent.parent 8185 else: 8186 if column is node: 8187 node = dot_or_id 8188 else: 8189 column.replace(dot_or_id) 8190 return node 8191 8192 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8193 start = self._prev 8194 8195 # Not to be confused with TRUNCATE(number, decimals) function call 8196 if self._match(TokenType.L_PAREN): 8197 self._retreat(self._index - 2) 8198 return self._parse_function() 8199 8200 # Clickhouse supports TRUNCATE DATABASE as well 8201 is_database = self._match(TokenType.DATABASE) 8202 8203 self._match(TokenType.TABLE) 8204 8205 exists = self._parse_exists(not_=False) 8206 8207 expressions = self._parse_csv( 8208 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8209 ) 8210 8211 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8212 8213 if self._match_text_seq("RESTART", "IDENTITY"): 8214 identity = "RESTART" 8215 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8216 identity = "CONTINUE" 8217 else: 8218 identity = None 8219 8220 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8221 option = self._prev.text 8222 else: 8223 option = None 8224 8225 partition = self._parse_partition() 8226 8227 # Fallback case 8228 if self._curr: 8229 return self._parse_as_command(start) 8230 8231 return self.expression( 8232 exp.TruncateTable, 8233 expressions=expressions, 8234 is_database=is_database, 8235 exists=exists, 8236 cluster=cluster, 8237 identity=identity, 8238 option=option, 8239 partition=partition, 8240 ) 8241 8242 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8243 this = self._parse_ordered(self._parse_opclass) 8244 8245 if not self._match(TokenType.WITH): 8246 return this 8247 8248 op = self._parse_var(any_token=True) 8249 8250 return self.expression(exp.WithOperator, this=this, op=op) 8251 8252 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8253 self._match(TokenType.EQ) 8254 self._match(TokenType.L_PAREN) 8255 8256 opts: t.List[t.Optional[exp.Expression]] = [] 8257 option: exp.Expression | None 8258 while self._curr and not self._match(TokenType.R_PAREN): 8259 if self._match_text_seq("FORMAT_NAME", "="): 8260 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8261 option = self._parse_format_name() 8262 else: 8263 option = self._parse_property() 8264 8265 if option is None: 8266 self.raise_error("Unable to parse option") 8267 break 8268 8269 opts.append(option) 8270 8271 return opts 8272 8273 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8274 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8275 8276 options = [] 8277 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8278 option = self._parse_var(any_token=True) 8279 prev = self._prev.text.upper() 8280 8281 # Different dialects might separate options and values by white space, "=" and "AS" 8282 self._match(TokenType.EQ) 8283 self._match(TokenType.ALIAS) 8284 8285 param = self.expression(exp.CopyParameter, this=option) 8286 8287 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8288 TokenType.L_PAREN, advance=False 8289 ): 8290 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8291 param.set("expressions", self._parse_wrapped_options()) 8292 elif prev == "FILE_FORMAT": 8293 # T-SQL's external file format case 8294 param.set("expression", self._parse_field()) 8295 else: 8296 param.set("expression", self._parse_unquoted_field()) 8297 8298 options.append(param) 8299 self._match(sep) 8300 8301 return options 8302 8303 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8304 expr = self.expression(exp.Credentials) 8305 8306 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8307 expr.set("storage", self._parse_field()) 8308 if self._match_text_seq("CREDENTIALS"): 8309 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8310 creds = ( 8311 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8312 ) 8313 expr.set("credentials", creds) 8314 if self._match_text_seq("ENCRYPTION"): 8315 expr.set("encryption", self._parse_wrapped_options()) 8316 if self._match_text_seq("IAM_ROLE"): 8317 expr.set("iam_role", self._parse_field()) 8318 if self._match_text_seq("REGION"): 8319 expr.set("region", self._parse_field()) 8320 8321 return expr 8322 8323 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8324 return self._parse_field() 8325 8326 def _parse_copy(self) -> exp.Copy | exp.Command: 8327 start = self._prev 8328 8329 self._match(TokenType.INTO) 8330 8331 this = ( 8332 self._parse_select(nested=True, parse_subquery_alias=False) 8333 if self._match(TokenType.L_PAREN, advance=False) 8334 else self._parse_table(schema=True) 8335 ) 8336 8337 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8338 8339 files = self._parse_csv(self._parse_file_location) 8340 credentials = self._parse_credentials() 8341 8342 self._match_text_seq("WITH") 8343 8344 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8345 8346 # Fallback case 8347 if self._curr: 8348 return self._parse_as_command(start) 8349 8350 return self.expression( 8351 exp.Copy, 8352 this=this, 8353 kind=kind, 8354 credentials=credentials, 8355 files=files, 8356 params=params, 8357 ) 8358 8359 def _parse_normalize(self) -> exp.Normalize: 8360 return self.expression( 8361 exp.Normalize, 8362 this=self._parse_bitwise(), 8363 form=self._match(TokenType.COMMA) and self._parse_var(), 8364 ) 8365 8366 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8367 args = self._parse_csv(lambda: self._parse_lambda()) 8368 8369 this = seq_get(args, 0) 8370 decimals = seq_get(args, 1) 8371 8372 return expr_type( 8373 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8374 ) 8375 8376 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8377 star_token = self._prev 8378 8379 if self._match_text_seq("COLUMNS", "(", advance=False): 8380 this = self._parse_function() 8381 if isinstance(this, exp.Columns): 8382 this.set("unpack", True) 8383 return this 8384 8385 return self.expression( 8386 exp.Star, 8387 **{ # type: ignore 8388 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8389 "replace": self._parse_star_op("REPLACE"), 8390 "rename": self._parse_star_op("RENAME"), 8391 }, 8392 ).update_positions(star_token) 8393 8394 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8395 privilege_parts = [] 8396 8397 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8398 # (end of privilege list) or L_PAREN (start of column list) are met 8399 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8400 privilege_parts.append(self._curr.text.upper()) 8401 self._advance() 8402 8403 this = exp.var(" ".join(privilege_parts)) 8404 expressions = ( 8405 self._parse_wrapped_csv(self._parse_column) 8406 if self._match(TokenType.L_PAREN, advance=False) 8407 else None 8408 ) 8409 8410 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8411 8412 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8413 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8414 principal = self._parse_id_var() 8415 8416 if not principal: 8417 return None 8418 8419 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8420 8421 def _parse_grant(self) -> exp.Grant | exp.Command: 8422 start = self._prev 8423 8424 privileges = self._parse_csv(self._parse_grant_privilege) 8425 8426 self._match(TokenType.ON) 8427 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8428 8429 # Attempt to parse the securable e.g. MySQL allows names 8430 # such as "foo.*", "*.*" which are not easily parseable yet 8431 securable = self._try_parse(self._parse_table_parts) 8432 8433 if not securable or not self._match_text_seq("TO"): 8434 return self._parse_as_command(start) 8435 8436 principals = self._parse_csv(self._parse_grant_principal) 8437 8438 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8439 8440 if self._curr: 8441 return self._parse_as_command(start) 8442 8443 return self.expression( 8444 exp.Grant, 8445 privileges=privileges, 8446 kind=kind, 8447 securable=securable, 8448 principals=principals, 8449 grant_option=grant_option, 8450 ) 8451 8452 def _parse_overlay(self) -> exp.Overlay: 8453 return self.expression( 8454 exp.Overlay, 8455 **{ # type: ignore 8456 "this": self._parse_bitwise(), 8457 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8458 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8459 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8460 }, 8461 ) 8462 8463 def _parse_format_name(self) -> exp.Property: 8464 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8465 # for FILE_FORMAT = <format_name> 8466 return self.expression( 8467 exp.Property, 8468 this=exp.var("FORMAT_NAME"), 8469 value=self._parse_string() or self._parse_table_parts(), 8470 ) 8471 8472 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8473 args: t.List[exp.Expression] = [] 8474 8475 if self._match(TokenType.DISTINCT): 8476 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8477 self._match(TokenType.COMMA) 8478 8479 args.extend(self._parse_csv(self._parse_assignment)) 8480 8481 return self.expression( 8482 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8483 ) 8484 8485 def _identifier_expression( 8486 self, token: t.Optional[Token] = None, **kwargs: t.Any 8487 ) -> exp.Identifier: 8488 token = token or self._prev 8489 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8490 expression.update_positions(token) 8491 return expression 8492 8493 def _build_pipe_cte( 8494 self, 8495 query: exp.Query, 8496 expressions: t.List[exp.Expression], 8497 alias_cte: t.Optional[exp.TableAlias] = None, 8498 ) -> exp.Select: 8499 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8500 if alias_cte: 8501 new_cte = alias_cte 8502 else: 8503 self._pipe_cte_counter += 1 8504 new_cte = f"__tmp{self._pipe_cte_counter}" 8505 8506 with_ = query.args.get("with") 8507 ctes = with_.pop() if with_ else None 8508 8509 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8510 if ctes: 8511 new_select.set("with", ctes) 8512 8513 return new_select.with_(new_cte, as_=query, copy=False) 8514 8515 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8516 select = self._parse_select(consume_pipe=False) 8517 if not select: 8518 return query 8519 8520 return self._build_pipe_cte( 8521 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8522 ) 8523 8524 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8525 limit = self._parse_limit() 8526 offset = self._parse_offset() 8527 if limit: 8528 curr_limit = query.args.get("limit", limit) 8529 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8530 query.limit(limit, copy=False) 8531 if offset: 8532 curr_offset = query.args.get("offset") 8533 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8534 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8535 8536 return query 8537 8538 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8539 this = self._parse_assignment() 8540 if self._match_text_seq("GROUP", "AND", advance=False): 8541 return this 8542 8543 this = self._parse_alias(this) 8544 8545 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8546 return self._parse_ordered(lambda: this) 8547 8548 return this 8549 8550 def _parse_pipe_syntax_aggregate_group_order_by( 8551 self, query: exp.Select, group_by_exists: bool = True 8552 ) -> exp.Select: 8553 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8554 aggregates_or_groups, orders = [], [] 8555 for element in expr: 8556 if isinstance(element, exp.Ordered): 8557 this = element.this 8558 if isinstance(this, exp.Alias): 8559 element.set("this", this.args["alias"]) 8560 orders.append(element) 8561 else: 8562 this = element 8563 aggregates_or_groups.append(this) 8564 8565 if group_by_exists: 8566 query.select(*aggregates_or_groups, copy=False).group_by( 8567 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8568 copy=False, 8569 ) 8570 else: 8571 query.select(*aggregates_or_groups, append=False, copy=False) 8572 8573 if orders: 8574 return query.order_by(*orders, append=False, copy=False) 8575 8576 return query 8577 8578 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8579 self._match_text_seq("AGGREGATE") 8580 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8581 8582 if self._match(TokenType.GROUP_BY) or ( 8583 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8584 ): 8585 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8586 8587 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8588 8589 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8590 first_setop = self.parse_set_operation(this=query) 8591 if not first_setop: 8592 return None 8593 8594 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8595 expr = self._parse_paren() 8596 return expr.assert_is(exp.Subquery).unnest() if expr else None 8597 8598 first_setop.this.pop() 8599 8600 setops = [ 8601 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8602 *self._parse_csv(_parse_and_unwrap_query), 8603 ] 8604 8605 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8606 with_ = query.args.get("with") 8607 ctes = with_.pop() if with_ else None 8608 8609 if isinstance(first_setop, exp.Union): 8610 query = query.union(*setops, copy=False, **first_setop.args) 8611 elif isinstance(first_setop, exp.Except): 8612 query = query.except_(*setops, copy=False, **first_setop.args) 8613 else: 8614 query = query.intersect(*setops, copy=False, **first_setop.args) 8615 8616 query.set("with", ctes) 8617 8618 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8619 8620 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8621 join = self._parse_join() 8622 if not join: 8623 return None 8624 8625 if isinstance(query, exp.Select): 8626 return query.join(join, copy=False) 8627 8628 return query 8629 8630 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8631 pivots = self._parse_pivots() 8632 if not pivots: 8633 return query 8634 8635 from_ = query.args.get("from") 8636 if from_: 8637 from_.this.set("pivots", pivots) 8638 8639 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8640 8641 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8642 self._match_text_seq("EXTEND") 8643 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8644 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8645 8646 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8647 sample = self._parse_table_sample() 8648 8649 with_ = query.args.get("with") 8650 if with_: 8651 with_.expressions[-1].this.set("sample", sample) 8652 else: 8653 query.set("sample", sample) 8654 8655 return query 8656 8657 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8658 if isinstance(query, exp.Subquery): 8659 query = exp.select("*").from_(query, copy=False) 8660 8661 if not query.args.get("from"): 8662 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8663 8664 while self._match(TokenType.PIPE_GT): 8665 start = self._curr 8666 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8667 if not parser: 8668 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8669 # keywords, making it tricky to disambiguate them without lookahead. The approach 8670 # here is to try and parse a set operation and if that fails, then try to parse a 8671 # join operator. If that fails as well, then the operator is not supported. 8672 parsed_query = self._parse_pipe_syntax_set_operator(query) 8673 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8674 if not parsed_query: 8675 self._retreat(start) 8676 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8677 break 8678 query = parsed_query 8679 else: 8680 query = parser(self, query) 8681 8682 return query 8683 8684 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8685 vars = self._parse_csv(self._parse_id_var) 8686 if not vars: 8687 return None 8688 8689 return self.expression( 8690 exp.DeclareItem, 8691 this=vars, 8692 kind=self._parse_types(), 8693 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8694 ) 8695 8696 def _parse_declare(self) -> exp.Declare | exp.Command: 8697 start = self._prev 8698 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8699 8700 if not expressions or self._curr: 8701 return self._parse_as_command(start) 8702 8703 return self.expression(exp.Declare, expressions=expressions) 8704 8705 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8706 exp_class = exp.Cast if strict else exp.TryCast 8707 8708 if exp_class == exp.TryCast: 8709 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8710 8711 return self.expression(exp_class, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1575 def __init__( 1576 self, 1577 error_level: t.Optional[ErrorLevel] = None, 1578 error_message_context: int = 100, 1579 max_errors: int = 3, 1580 dialect: DialectType = None, 1581 ): 1582 from sqlglot.dialects import Dialect 1583 1584 self.error_level = error_level or ErrorLevel.IMMEDIATE 1585 self.error_message_context = error_message_context 1586 self.max_errors = max_errors 1587 self.dialect = Dialect.get_or_raise(dialect) 1588 self.reset()
1601 def parse( 1602 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1603 ) -> t.List[t.Optional[exp.Expression]]: 1604 """ 1605 Parses a list of tokens and returns a list of syntax trees, one tree 1606 per parsed SQL statement. 1607 1608 Args: 1609 raw_tokens: The list of tokens. 1610 sql: The original SQL string, used to produce helpful debug messages. 1611 1612 Returns: 1613 The list of the produced syntax trees. 1614 """ 1615 return self._parse( 1616 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1617 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1619 def parse_into( 1620 self, 1621 expression_types: exp.IntoType, 1622 raw_tokens: t.List[Token], 1623 sql: t.Optional[str] = None, 1624 ) -> t.List[t.Optional[exp.Expression]]: 1625 """ 1626 Parses a list of tokens into a given Expression type. If a collection of Expression 1627 types is given instead, this method will try to parse the token list into each one 1628 of them, stopping at the first for which the parsing succeeds. 1629 1630 Args: 1631 expression_types: The expression type(s) to try and parse the token list into. 1632 raw_tokens: The list of tokens. 1633 sql: The original SQL string, used to produce helpful debug messages. 1634 1635 Returns: 1636 The target Expression. 1637 """ 1638 errors = [] 1639 for expression_type in ensure_list(expression_types): 1640 parser = self.EXPRESSION_PARSERS.get(expression_type) 1641 if not parser: 1642 raise TypeError(f"No parser registered for {expression_type}") 1643 1644 try: 1645 return self._parse(parser, raw_tokens, sql) 1646 except ParseError as e: 1647 e.errors[0]["into_expression"] = expression_type 1648 errors.append(e) 1649 1650 raise ParseError( 1651 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1652 errors=merge_errors(errors), 1653 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1693 def check_errors(self) -> None: 1694 """Logs or raises any found errors, depending on the chosen error level setting.""" 1695 if self.error_level == ErrorLevel.WARN: 1696 for error in self.errors: 1697 logger.error(str(error)) 1698 elif self.error_level == ErrorLevel.RAISE and self.errors: 1699 raise ParseError( 1700 concat_messages(self.errors, self.max_errors), 1701 errors=merge_errors(self.errors), 1702 )
Logs or raises any found errors, depending on the chosen error level setting.
1704 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1705 """ 1706 Appends an error in the list of recorded errors or raises it, depending on the chosen 1707 error level setting. 1708 """ 1709 token = token or self._curr or self._prev or Token.string("") 1710 start = token.start 1711 end = token.end + 1 1712 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1713 highlight = self.sql[start:end] 1714 end_context = self.sql[end : end + self.error_message_context] 1715 1716 error = ParseError.new( 1717 f"{message}. Line {token.line}, Col: {token.col}.\n" 1718 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1719 description=message, 1720 line=token.line, 1721 col=token.col, 1722 start_context=start_context, 1723 highlight=highlight, 1724 end_context=end_context, 1725 ) 1726 1727 if self.error_level == ErrorLevel.IMMEDIATE: 1728 raise error 1729 1730 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1732 def expression( 1733 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1734 ) -> E: 1735 """ 1736 Creates a new, validated Expression. 1737 1738 Args: 1739 exp_class: The expression class to instantiate. 1740 comments: An optional list of comments to attach to the expression. 1741 kwargs: The arguments to set for the expression along with their respective values. 1742 1743 Returns: 1744 The target expression. 1745 """ 1746 instance = exp_class(**kwargs) 1747 instance.add_comments(comments) if comments else self._add_comments(instance) 1748 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1755 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1756 """ 1757 Validates an Expression, making sure that all its mandatory arguments are set. 1758 1759 Args: 1760 expression: The expression to validate. 1761 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1762 1763 Returns: 1764 The validated expression. 1765 """ 1766 if self.error_level != ErrorLevel.IGNORE: 1767 for error_message in expression.error_messages(args): 1768 self.raise_error(error_message) 1769 1770 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4833 def parse_set_operation( 4834 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4835 ) -> t.Optional[exp.Expression]: 4836 start = self._index 4837 _, side_token, kind_token = self._parse_join_parts() 4838 4839 side = side_token.text if side_token else None 4840 kind = kind_token.text if kind_token else None 4841 4842 if not self._match_set(self.SET_OPERATIONS): 4843 self._retreat(start) 4844 return None 4845 4846 token_type = self._prev.token_type 4847 4848 if token_type == TokenType.UNION: 4849 operation: t.Type[exp.SetOperation] = exp.Union 4850 elif token_type == TokenType.EXCEPT: 4851 operation = exp.Except 4852 else: 4853 operation = exp.Intersect 4854 4855 comments = self._prev.comments 4856 4857 if self._match(TokenType.DISTINCT): 4858 distinct: t.Optional[bool] = True 4859 elif self._match(TokenType.ALL): 4860 distinct = False 4861 else: 4862 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4863 if distinct is None: 4864 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4865 4866 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4867 "STRICT", "CORRESPONDING" 4868 ) 4869 if self._match_text_seq("CORRESPONDING"): 4870 by_name = True 4871 if not side and not kind: 4872 kind = "INNER" 4873 4874 on_column_list = None 4875 if by_name and self._match_texts(("ON", "BY")): 4876 on_column_list = self._parse_wrapped_csv(self._parse_column) 4877 4878 expression = self._parse_select( 4879 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4880 ) 4881 4882 return self.expression( 4883 operation, 4884 comments=comments, 4885 this=this, 4886 distinct=distinct, 4887 by_name=by_name, 4888 expression=expression, 4889 side=side, 4890 kind=kind, 4891 on=on_column_list, 4892 )