sqlglot.dialects.dialect
1from __future__ import annotations 2 3import logging 4import typing as t 5from enum import Enum, auto 6from functools import reduce 7 8from sqlglot import exp 9from sqlglot.errors import ParseError 10from sqlglot.generator import Generator 11from sqlglot.helper import AutoName, flatten, is_int, seq_get 12from sqlglot.jsonpath import parse as parse_json_path 13from sqlglot.parser import Parser 14from sqlglot.time import TIMEZONES, format_time 15from sqlglot.tokens import Token, Tokenizer, TokenType 16from sqlglot.trie import new_trie 17 18DATE_ADD_OR_DIFF = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateDiff, exp.TsOrDsDiff] 19DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub] 20JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar] 21 22 23if t.TYPE_CHECKING: 24 from sqlglot._typing import B, E, F 25 26logger = logging.getLogger("sqlglot") 27 28UNESCAPED_SEQUENCES = { 29 "\\a": "\a", 30 "\\b": "\b", 31 "\\f": "\f", 32 "\\n": "\n", 33 "\\r": "\r", 34 "\\t": "\t", 35 "\\v": "\v", 36 "\\\\": "\\", 37} 38 39 40class Dialects(str, Enum): 41 """Dialects supported by SQLGLot.""" 42 43 DIALECT = "" 44 45 ATHENA = "athena" 46 BIGQUERY = "bigquery" 47 CLICKHOUSE = "clickhouse" 48 DATABRICKS = "databricks" 49 DORIS = "doris" 50 DRILL = "drill" 51 DUCKDB = "duckdb" 52 HIVE = "hive" 53 MATERIALIZE = "materialize" 54 MYSQL = "mysql" 55 ORACLE = "oracle" 56 POSTGRES = "postgres" 57 PRESTO = "presto" 58 PRQL = "prql" 59 REDSHIFT = "redshift" 60 RISINGWAVE = "risingwave" 61 SNOWFLAKE = "snowflake" 62 SPARK = "spark" 63 SPARK2 = "spark2" 64 SQLITE = "sqlite" 65 STARROCKS = "starrocks" 66 TABLEAU = "tableau" 67 TERADATA = "teradata" 68 TRINO = "trino" 69 TSQL = "tsql" 70 71 72class NormalizationStrategy(str, AutoName): 73 """Specifies the strategy according to which identifiers should be normalized.""" 74 75 LOWERCASE = auto() 76 """Unquoted identifiers are lowercased.""" 77 78 UPPERCASE = auto() 79 """Unquoted identifiers are uppercased.""" 80 81 CASE_SENSITIVE = auto() 82 """Always case-sensitive, regardless of quotes.""" 83 84 CASE_INSENSITIVE = auto() 85 """Always case-insensitive, regardless of quotes.""" 86 87 88class _Dialect(type): 89 classes: t.Dict[str, t.Type[Dialect]] = {} 90 91 def __eq__(cls, other: t.Any) -> bool: 92 if cls is other: 93 return True 94 if isinstance(other, str): 95 return cls is cls.get(other) 96 if isinstance(other, Dialect): 97 return cls is type(other) 98 99 return False 100 101 def __hash__(cls) -> int: 102 return hash(cls.__name__.lower()) 103 104 @classmethod 105 def __getitem__(cls, key: str) -> t.Type[Dialect]: 106 return cls.classes[key] 107 108 @classmethod 109 def get( 110 cls, key: str, default: t.Optional[t.Type[Dialect]] = None 111 ) -> t.Optional[t.Type[Dialect]]: 112 return cls.classes.get(key, default) 113 114 def __new__(cls, clsname, bases, attrs): 115 klass = super().__new__(cls, clsname, bases, attrs) 116 enum = Dialects.__members__.get(clsname.upper()) 117 cls.classes[enum.value if enum is not None else clsname.lower()] = klass 118 119 klass.TIME_TRIE = new_trie(klass.TIME_MAPPING) 120 klass.FORMAT_TRIE = ( 121 new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE 122 ) 123 klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()} 124 klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING) 125 126 base = seq_get(bases, 0) 127 base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),) 128 base_parser = (getattr(base, "parser_class", Parser),) 129 base_generator = (getattr(base, "generator_class", Generator),) 130 131 klass.tokenizer_class = klass.__dict__.get( 132 "Tokenizer", type("Tokenizer", base_tokenizer, {}) 133 ) 134 klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {})) 135 klass.generator_class = klass.__dict__.get( 136 "Generator", type("Generator", base_generator, {}) 137 ) 138 139 klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0] 140 klass.IDENTIFIER_START, klass.IDENTIFIER_END = list( 141 klass.tokenizer_class._IDENTIFIERS.items() 142 )[0] 143 144 def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]: 145 return next( 146 ( 147 (s, e) 148 for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items() 149 if t == token_type 150 ), 151 (None, None), 152 ) 153 154 klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING) 155 klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING) 156 klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING) 157 klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING) 158 159 if "\\" in klass.tokenizer_class.STRING_ESCAPES: 160 klass.UNESCAPED_SEQUENCES = { 161 **UNESCAPED_SEQUENCES, 162 **klass.UNESCAPED_SEQUENCES, 163 } 164 165 klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()} 166 167 klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS 168 169 if enum not in ("", "bigquery"): 170 klass.generator_class.SELECT_KINDS = () 171 172 if enum not in ("", "athena", "presto", "trino"): 173 klass.generator_class.TRY_SUPPORTED = False 174 klass.generator_class.SUPPORTS_UESCAPE = False 175 176 if enum not in ("", "databricks", "hive", "spark", "spark2"): 177 modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy() 178 for modifier in ("cluster", "distribute", "sort"): 179 modifier_transforms.pop(modifier, None) 180 181 klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms 182 183 if enum not in ("", "doris", "mysql"): 184 klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | { 185 TokenType.STRAIGHT_JOIN, 186 } 187 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { 188 TokenType.STRAIGHT_JOIN, 189 } 190 191 if not klass.SUPPORTS_SEMI_ANTI_JOIN: 192 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { 193 TokenType.ANTI, 194 TokenType.SEMI, 195 } 196 197 return klass 198 199 200class Dialect(metaclass=_Dialect): 201 INDEX_OFFSET = 0 202 """The base index offset for arrays.""" 203 204 WEEK_OFFSET = 0 205 """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" 206 207 UNNEST_COLUMN_ONLY = False 208 """Whether `UNNEST` table aliases are treated as column aliases.""" 209 210 ALIAS_POST_TABLESAMPLE = False 211 """Whether the table alias comes after tablesample.""" 212 213 TABLESAMPLE_SIZE_IS_PERCENT = False 214 """Whether a size in the table sample clause represents percentage.""" 215 216 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE 217 """Specifies the strategy according to which identifiers should be normalized.""" 218 219 IDENTIFIERS_CAN_START_WITH_DIGIT = False 220 """Whether an unquoted identifier can start with a digit.""" 221 222 DPIPE_IS_STRING_CONCAT = True 223 """Whether the DPIPE token (`||`) is a string concatenation operator.""" 224 225 STRICT_STRING_CONCAT = False 226 """Whether `CONCAT`'s arguments must be strings.""" 227 228 SUPPORTS_USER_DEFINED_TYPES = True 229 """Whether user-defined data types are supported.""" 230 231 SUPPORTS_SEMI_ANTI_JOIN = True 232 """Whether `SEMI` or `ANTI` joins are supported.""" 233 234 SUPPORTS_COLUMN_JOIN_MARKS = False 235 """Whether the old-style outer join (+) syntax is supported.""" 236 237 COPY_PARAMS_ARE_CSV = True 238 """Separator of COPY statement parameters.""" 239 240 NORMALIZE_FUNCTIONS: bool | str = "upper" 241 """ 242 Determines how function names are going to be normalized. 243 Possible values: 244 "upper" or True: Convert names to uppercase. 245 "lower": Convert names to lowercase. 246 False: Disables function name normalization. 247 """ 248 249 LOG_BASE_FIRST: t.Optional[bool] = True 250 """ 251 Whether the base comes first in the `LOG` function. 252 Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`) 253 """ 254 255 NULL_ORDERING = "nulls_are_small" 256 """ 257 Default `NULL` ordering method to use if not explicitly set. 258 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` 259 """ 260 261 TYPED_DIVISION = False 262 """ 263 Whether the behavior of `a / b` depends on the types of `a` and `b`. 264 False means `a / b` is always float division. 265 True means `a / b` is integer division if both `a` and `b` are integers. 266 """ 267 268 SAFE_DIVISION = False 269 """Whether division by zero throws an error (`False`) or returns NULL (`True`).""" 270 271 CONCAT_COALESCE = False 272 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" 273 274 HEX_LOWERCASE = False 275 """Whether the `HEX` function returns a lowercase hexadecimal string.""" 276 277 DATE_FORMAT = "'%Y-%m-%d'" 278 DATEINT_FORMAT = "'%Y%m%d'" 279 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" 280 281 TIME_MAPPING: t.Dict[str, str] = {} 282 """Associates this dialect's time formats with their equivalent Python `strftime` formats.""" 283 284 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time 285 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE 286 FORMAT_MAPPING: t.Dict[str, str] = {} 287 """ 288 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. 289 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. 290 """ 291 292 UNESCAPED_SEQUENCES: t.Dict[str, str] = {} 293 """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`).""" 294 295 PSEUDOCOLUMNS: t.Set[str] = set() 296 """ 297 Columns that are auto-generated by the engine corresponding to this dialect. 298 For example, such columns may be excluded from `SELECT *` queries. 299 """ 300 301 PREFER_CTE_ALIAS_COLUMN = False 302 """ 303 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the 304 HAVING clause of the CTE. This flag will cause the CTE alias columns to override 305 any projection aliases in the subquery. 306 307 For example, 308 WITH y(c) AS ( 309 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 310 ) SELECT c FROM y; 311 312 will be rewritten as 313 314 WITH y(c) AS ( 315 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 316 ) SELECT c FROM y; 317 """ 318 319 COPY_PARAMS_ARE_CSV = True 320 """ 321 Whether COPY statement parameters are separated by comma or whitespace 322 """ 323 324 # --- Autofilled --- 325 326 tokenizer_class = Tokenizer 327 parser_class = Parser 328 generator_class = Generator 329 330 # A trie of the time_mapping keys 331 TIME_TRIE: t.Dict = {} 332 FORMAT_TRIE: t.Dict = {} 333 334 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} 335 INVERSE_TIME_TRIE: t.Dict = {} 336 337 ESCAPED_SEQUENCES: t.Dict[str, str] = {} 338 339 # Delimiters for string literals and identifiers 340 QUOTE_START = "'" 341 QUOTE_END = "'" 342 IDENTIFIER_START = '"' 343 IDENTIFIER_END = '"' 344 345 # Delimiters for bit, hex, byte and unicode literals 346 BIT_START: t.Optional[str] = None 347 BIT_END: t.Optional[str] = None 348 HEX_START: t.Optional[str] = None 349 HEX_END: t.Optional[str] = None 350 BYTE_START: t.Optional[str] = None 351 BYTE_END: t.Optional[str] = None 352 UNICODE_START: t.Optional[str] = None 353 UNICODE_END: t.Optional[str] = None 354 355 DATE_PART_MAPPING = { 356 "Y": "YEAR", 357 "YY": "YEAR", 358 "YYY": "YEAR", 359 "YYYY": "YEAR", 360 "YR": "YEAR", 361 "YEARS": "YEAR", 362 "YRS": "YEAR", 363 "MM": "MONTH", 364 "MON": "MONTH", 365 "MONS": "MONTH", 366 "MONTHS": "MONTH", 367 "D": "DAY", 368 "DD": "DAY", 369 "DAYS": "DAY", 370 "DAYOFMONTH": "DAY", 371 "DAY OF WEEK": "DAYOFWEEK", 372 "WEEKDAY": "DAYOFWEEK", 373 "DOW": "DAYOFWEEK", 374 "DW": "DAYOFWEEK", 375 "WEEKDAY_ISO": "DAYOFWEEKISO", 376 "DOW_ISO": "DAYOFWEEKISO", 377 "DW_ISO": "DAYOFWEEKISO", 378 "DAY OF YEAR": "DAYOFYEAR", 379 "DOY": "DAYOFYEAR", 380 "DY": "DAYOFYEAR", 381 "W": "WEEK", 382 "WK": "WEEK", 383 "WEEKOFYEAR": "WEEK", 384 "WOY": "WEEK", 385 "WY": "WEEK", 386 "WEEK_ISO": "WEEKISO", 387 "WEEKOFYEARISO": "WEEKISO", 388 "WEEKOFYEAR_ISO": "WEEKISO", 389 "Q": "QUARTER", 390 "QTR": "QUARTER", 391 "QTRS": "QUARTER", 392 "QUARTERS": "QUARTER", 393 "H": "HOUR", 394 "HH": "HOUR", 395 "HR": "HOUR", 396 "HOURS": "HOUR", 397 "HRS": "HOUR", 398 "M": "MINUTE", 399 "MI": "MINUTE", 400 "MIN": "MINUTE", 401 "MINUTES": "MINUTE", 402 "MINS": "MINUTE", 403 "S": "SECOND", 404 "SEC": "SECOND", 405 "SECONDS": "SECOND", 406 "SECS": "SECOND", 407 "MS": "MILLISECOND", 408 "MSEC": "MILLISECOND", 409 "MSECS": "MILLISECOND", 410 "MSECOND": "MILLISECOND", 411 "MSECONDS": "MILLISECOND", 412 "MILLISEC": "MILLISECOND", 413 "MILLISECS": "MILLISECOND", 414 "MILLISECON": "MILLISECOND", 415 "MILLISECONDS": "MILLISECOND", 416 "US": "MICROSECOND", 417 "USEC": "MICROSECOND", 418 "USECS": "MICROSECOND", 419 "MICROSEC": "MICROSECOND", 420 "MICROSECS": "MICROSECOND", 421 "USECOND": "MICROSECOND", 422 "USECONDS": "MICROSECOND", 423 "MICROSECONDS": "MICROSECOND", 424 "NS": "NANOSECOND", 425 "NSEC": "NANOSECOND", 426 "NANOSEC": "NANOSECOND", 427 "NSECOND": "NANOSECOND", 428 "NSECONDS": "NANOSECOND", 429 "NANOSECS": "NANOSECOND", 430 "EPOCH_SECOND": "EPOCH", 431 "EPOCH_SECONDS": "EPOCH", 432 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 433 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 434 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 435 "TZH": "TIMEZONE_HOUR", 436 "TZM": "TIMEZONE_MINUTE", 437 "DEC": "DECADE", 438 "DECS": "DECADE", 439 "DECADES": "DECADE", 440 "MIL": "MILLENIUM", 441 "MILS": "MILLENIUM", 442 "MILLENIA": "MILLENIUM", 443 "C": "CENTURY", 444 "CENT": "CENTURY", 445 "CENTS": "CENTURY", 446 "CENTURIES": "CENTURY", 447 } 448 449 @classmethod 450 def get_or_raise(cls, dialect: DialectType) -> Dialect: 451 """ 452 Look up a dialect in the global dialect registry and return it if it exists. 453 454 Args: 455 dialect: The target dialect. If this is a string, it can be optionally followed by 456 additional key-value pairs that are separated by commas and are used to specify 457 dialect settings, such as whether the dialect's identifiers are case-sensitive. 458 459 Example: 460 >>> dialect = dialect_class = get_or_raise("duckdb") 461 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 462 463 Returns: 464 The corresponding Dialect instance. 465 """ 466 467 if not dialect: 468 return cls() 469 if isinstance(dialect, _Dialect): 470 return dialect() 471 if isinstance(dialect, Dialect): 472 return dialect 473 if isinstance(dialect, str): 474 try: 475 dialect_name, *kv_pairs = dialect.split(",") 476 kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)} 477 except ValueError: 478 raise ValueError( 479 f"Invalid dialect format: '{dialect}'. " 480 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 481 ) 482 483 result = cls.get(dialect_name.strip()) 484 if not result: 485 from difflib import get_close_matches 486 487 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 488 if similar: 489 similar = f" Did you mean {similar}?" 490 491 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 492 493 return result(**kwargs) 494 495 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") 496 497 @classmethod 498 def format_time( 499 cls, expression: t.Optional[str | exp.Expression] 500 ) -> t.Optional[exp.Expression]: 501 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 502 if isinstance(expression, str): 503 return exp.Literal.string( 504 # the time formats are quoted 505 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 506 ) 507 508 if expression and expression.is_string: 509 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 510 511 return expression 512 513 def __init__(self, **kwargs) -> None: 514 normalization_strategy = kwargs.pop("normalization_strategy", None) 515 516 if normalization_strategy is None: 517 self.normalization_strategy = self.NORMALIZATION_STRATEGY 518 else: 519 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 520 521 self.settings = kwargs 522 523 def __eq__(self, other: t.Any) -> bool: 524 # Does not currently take dialect state into account 525 return type(self) == other 526 527 def __hash__(self) -> int: 528 # Does not currently take dialect state into account 529 return hash(type(self)) 530 531 def normalize_identifier(self, expression: E) -> E: 532 """ 533 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 534 535 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 536 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 537 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 538 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 539 540 There are also dialects like Spark, which are case-insensitive even when quotes are 541 present, and dialects like MySQL, whose resolution rules match those employed by the 542 underlying operating system, for example they may always be case-sensitive in Linux. 543 544 Finally, the normalization behavior of some engines can even be controlled through flags, 545 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 546 547 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 548 that it can analyze queries in the optimizer and successfully capture their semantics. 549 """ 550 if ( 551 isinstance(expression, exp.Identifier) 552 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 553 and ( 554 not expression.quoted 555 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 556 ) 557 ): 558 expression.set( 559 "this", 560 ( 561 expression.this.upper() 562 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 563 else expression.this.lower() 564 ), 565 ) 566 567 return expression 568 569 def case_sensitive(self, text: str) -> bool: 570 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 571 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 572 return False 573 574 unsafe = ( 575 str.islower 576 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 577 else str.isupper 578 ) 579 return any(unsafe(char) for char in text) 580 581 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 582 """Checks if text can be identified given an identify option. 583 584 Args: 585 text: The text to check. 586 identify: 587 `"always"` or `True`: Always returns `True`. 588 `"safe"`: Only returns `True` if the identifier is case-insensitive. 589 590 Returns: 591 Whether the given text can be identified. 592 """ 593 if identify is True or identify == "always": 594 return True 595 596 if identify == "safe": 597 return not self.case_sensitive(text) 598 599 return False 600 601 def quote_identifier(self, expression: E, identify: bool = True) -> E: 602 """ 603 Adds quotes to a given identifier. 604 605 Args: 606 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 607 identify: If set to `False`, the quotes will only be added if the identifier is deemed 608 "unsafe", with respect to its characters and this dialect's normalization strategy. 609 """ 610 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 611 name = expression.this 612 expression.set( 613 "quoted", 614 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 615 ) 616 617 return expression 618 619 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 620 if isinstance(path, exp.Literal): 621 path_text = path.name 622 if path.is_number: 623 path_text = f"[{path_text}]" 624 625 try: 626 return parse_json_path(path_text) 627 except ParseError as e: 628 logger.warning(f"Invalid JSON path syntax. {str(e)}") 629 630 return path 631 632 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 633 return self.parser(**opts).parse(self.tokenize(sql), sql) 634 635 def parse_into( 636 self, expression_type: exp.IntoType, sql: str, **opts 637 ) -> t.List[t.Optional[exp.Expression]]: 638 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) 639 640 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: 641 return self.generator(**opts).generate(expression, copy=copy) 642 643 def transpile(self, sql: str, **opts) -> t.List[str]: 644 return [ 645 self.generate(expression, copy=False, **opts) if expression else "" 646 for expression in self.parse(sql) 647 ] 648 649 def tokenize(self, sql: str) -> t.List[Token]: 650 return self.tokenizer.tokenize(sql) 651 652 @property 653 def tokenizer(self) -> Tokenizer: 654 if not hasattr(self, "_tokenizer"): 655 self._tokenizer = self.tokenizer_class(dialect=self) 656 return self._tokenizer 657 658 def parser(self, **opts) -> Parser: 659 return self.parser_class(dialect=self, **opts) 660 661 def generator(self, **opts) -> Generator: 662 return self.generator_class(dialect=self, **opts) 663 664 665DialectType = t.Union[str, Dialect, t.Type[Dialect], None] 666 667 668def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]: 669 return lambda self, expression: self.func(name, *flatten(expression.args.values())) 670 671 672def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str: 673 if expression.args.get("accuracy"): 674 self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy") 675 return self.func("APPROX_COUNT_DISTINCT", expression.this) 676 677 678def if_sql( 679 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None 680) -> t.Callable[[Generator, exp.If], str]: 681 def _if_sql(self: Generator, expression: exp.If) -> str: 682 return self.func( 683 name, 684 expression.this, 685 expression.args.get("true"), 686 expression.args.get("false") or false_value, 687 ) 688 689 return _if_sql 690 691 692def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 693 this = expression.this 694 if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: 695 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 696 697 return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>") 698 699 700def inline_array_sql(self: Generator, expression: exp.Array) -> str: 701 return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]" 702 703 704def inline_array_unless_query(self: Generator, expression: exp.Array) -> str: 705 elem = seq_get(expression.expressions, 0) 706 if isinstance(elem, exp.Expression) and elem.find(exp.Query): 707 return self.func("ARRAY", elem) 708 return inline_array_sql(self, expression) 709 710 711def no_ilike_sql(self: Generator, expression: exp.ILike) -> str: 712 return self.like_sql( 713 exp.Like( 714 this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression) 715 ) 716 ) 717 718 719def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str: 720 zone = self.sql(expression, "this") 721 return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE" 722 723 724def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str: 725 if expression.args.get("recursive"): 726 self.unsupported("Recursive CTEs are unsupported") 727 expression.args["recursive"] = False 728 return self.with_sql(expression) 729 730 731def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str: 732 n = self.sql(expression, "this") 733 d = self.sql(expression, "expression") 734 return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)" 735 736 737def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str: 738 self.unsupported("TABLESAMPLE unsupported") 739 return self.sql(expression.this) 740 741 742def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str: 743 self.unsupported("PIVOT unsupported") 744 return "" 745 746 747def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str: 748 return self.cast_sql(expression) 749 750 751def no_comment_column_constraint_sql( 752 self: Generator, expression: exp.CommentColumnConstraint 753) -> str: 754 self.unsupported("CommentColumnConstraint unsupported") 755 return "" 756 757 758def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str: 759 self.unsupported("MAP_FROM_ENTRIES unsupported") 760 return "" 761 762 763def str_position_sql( 764 self: Generator, expression: exp.StrPosition, generate_instance: bool = False 765) -> str: 766 this = self.sql(expression, "this") 767 substr = self.sql(expression, "substr") 768 position = self.sql(expression, "position") 769 instance = expression.args.get("instance") if generate_instance else None 770 position_offset = "" 771 772 if position: 773 # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects 774 this = self.func("SUBSTR", this, position) 775 position_offset = f" + {position} - 1" 776 777 return self.func("STRPOS", this, substr, instance) + position_offset 778 779 780def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str: 781 return ( 782 f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}" 783 ) 784 785 786def var_map_sql( 787 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 788) -> str: 789 keys = expression.args["keys"] 790 values = expression.args["values"] 791 792 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 793 self.unsupported("Cannot convert array columns into map.") 794 return self.func(map_func_name, keys, values) 795 796 args = [] 797 for key, value in zip(keys.expressions, values.expressions): 798 args.append(self.sql(key)) 799 args.append(self.sql(value)) 800 801 return self.func(map_func_name, *args) 802 803 804def build_formatted_time( 805 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 806) -> t.Callable[[t.List], E]: 807 """Helper used for time expressions. 808 809 Args: 810 exp_class: the expression class to instantiate. 811 dialect: target sql dialect. 812 default: the default format, True being time. 813 814 Returns: 815 A callable that can be used to return the appropriately formatted time expression. 816 """ 817 818 def _builder(args: t.List): 819 return exp_class( 820 this=seq_get(args, 0), 821 format=Dialect[dialect].format_time( 822 seq_get(args, 1) 823 or (Dialect[dialect].TIME_FORMAT if default is True else default or None) 824 ), 825 ) 826 827 return _builder 828 829 830def time_format( 831 dialect: DialectType = None, 832) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]: 833 def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]: 834 """ 835 Returns the time format for a given expression, unless it's equivalent 836 to the default time format of the dialect of interest. 837 """ 838 time_format = self.format_time(expression) 839 return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None 840 841 return _time_format 842 843 844def build_date_delta( 845 exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None 846) -> t.Callable[[t.List], E]: 847 def _builder(args: t.List) -> E: 848 unit_based = len(args) == 3 849 this = args[2] if unit_based else seq_get(args, 0) 850 unit = args[0] if unit_based else exp.Literal.string("DAY") 851 unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit 852 return exp_class(this=this, expression=seq_get(args, 1), unit=unit) 853 854 return _builder 855 856 857def build_date_delta_with_interval( 858 expression_class: t.Type[E], 859) -> t.Callable[[t.List], t.Optional[E]]: 860 def _builder(args: t.List) -> t.Optional[E]: 861 if len(args) < 2: 862 return None 863 864 interval = args[1] 865 866 if not isinstance(interval, exp.Interval): 867 raise ParseError(f"INTERVAL expression expected but got '{interval}'") 868 869 expression = interval.this 870 if expression and expression.is_string: 871 expression = exp.Literal.number(expression.this) 872 873 return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval)) 874 875 return _builder 876 877 878def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 879 unit = seq_get(args, 0) 880 this = seq_get(args, 1) 881 882 if isinstance(this, exp.Cast) and this.is_type("date"): 883 return exp.DateTrunc(unit=unit, this=this) 884 return exp.TimestampTrunc(this=this, unit=unit) 885 886 887def date_add_interval_sql( 888 data_type: str, kind: str 889) -> t.Callable[[Generator, exp.Expression], str]: 890 def func(self: Generator, expression: exp.Expression) -> str: 891 this = self.sql(expression, "this") 892 interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression)) 893 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 894 895 return func 896 897 898def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]: 899 def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str: 900 args = [unit_to_str(expression), expression.this] 901 if zone: 902 args.append(expression.args.get("zone")) 903 return self.func("DATE_TRUNC", *args) 904 905 return _timestamptrunc_sql 906 907 908def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str: 909 if not expression.expression: 910 from sqlglot.optimizer.annotate_types import annotate_types 911 912 target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP 913 return self.sql(exp.cast(expression.this, target_type)) 914 if expression.text("expression").lower() in TIMEZONES: 915 return self.sql( 916 exp.AtTimeZone( 917 this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), 918 zone=expression.expression, 919 ) 920 ) 921 return self.func("TIMESTAMP", expression.this, expression.expression) 922 923 924def locate_to_strposition(args: t.List) -> exp.Expression: 925 return exp.StrPosition( 926 this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2) 927 ) 928 929 930def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str: 931 return self.func( 932 "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position") 933 ) 934 935 936def left_to_substring_sql(self: Generator, expression: exp.Left) -> str: 937 return self.sql( 938 exp.Substring( 939 this=expression.this, start=exp.Literal.number(1), length=expression.expression 940 ) 941 ) 942 943 944def right_to_substring_sql(self: Generator, expression: exp.Left) -> str: 945 return self.sql( 946 exp.Substring( 947 this=expression.this, 948 start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1), 949 ) 950 ) 951 952 953def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str: 954 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 955 956 957def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str: 958 return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE)) 959 960 961# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8 962def encode_decode_sql( 963 self: Generator, expression: exp.Expression, name: str, replace: bool = True 964) -> str: 965 charset = expression.args.get("charset") 966 if charset and charset.name.lower() != "utf-8": 967 self.unsupported(f"Expected utf-8 character set, got {charset}.") 968 969 return self.func(name, expression.this, expression.args.get("replace") if replace else None) 970 971 972def min_or_least(self: Generator, expression: exp.Min) -> str: 973 name = "LEAST" if expression.expressions else "MIN" 974 return rename_func(name)(self, expression) 975 976 977def max_or_greatest(self: Generator, expression: exp.Max) -> str: 978 name = "GREATEST" if expression.expressions else "MAX" 979 return rename_func(name)(self, expression) 980 981 982def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str: 983 cond = expression.this 984 985 if isinstance(expression.this, exp.Distinct): 986 cond = expression.this.expressions[0] 987 self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM") 988 989 return self.func("sum", exp.func("if", cond, 1, 0)) 990 991 992def trim_sql(self: Generator, expression: exp.Trim) -> str: 993 target = self.sql(expression, "this") 994 trim_type = self.sql(expression, "position") 995 remove_chars = self.sql(expression, "expression") 996 collation = self.sql(expression, "collation") 997 998 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 999 if not remove_chars and not collation: 1000 return self.trim_sql(expression) 1001 1002 trim_type = f"{trim_type} " if trim_type else "" 1003 remove_chars = f"{remove_chars} " if remove_chars else "" 1004 from_part = "FROM " if trim_type or remove_chars else "" 1005 collation = f" COLLATE {collation}" if collation else "" 1006 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})" 1007 1008 1009def str_to_time_sql(self: Generator, expression: exp.Expression) -> str: 1010 return self.func("STRPTIME", expression.this, self.format_time(expression)) 1011 1012 1013def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str: 1014 return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions)) 1015 1016 1017def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str: 1018 delim, *rest_args = expression.expressions 1019 return self.sql( 1020 reduce( 1021 lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)), 1022 rest_args, 1023 ) 1024 ) 1025 1026 1027def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str: 1028 bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters"))) 1029 if bad_args: 1030 self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}") 1031 1032 return self.func( 1033 "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group") 1034 ) 1035 1036 1037def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str: 1038 bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers"))) 1039 if bad_args: 1040 self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}") 1041 1042 return self.func( 1043 "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"] 1044 ) 1045 1046 1047def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]: 1048 names = [] 1049 for agg in aggregations: 1050 if isinstance(agg, exp.Alias): 1051 names.append(agg.alias) 1052 else: 1053 """ 1054 This case corresponds to aggregations without aliases being used as suffixes 1055 (e.g. col_avg(foo)). We need to unquote identifiers because they're going to 1056 be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`. 1057 Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes). 1058 """ 1059 agg_all_unquoted = agg.transform( 1060 lambda node: ( 1061 exp.Identifier(this=node.name, quoted=False) 1062 if isinstance(node, exp.Identifier) 1063 else node 1064 ) 1065 ) 1066 names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower")) 1067 1068 return names 1069 1070 1071def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]: 1072 return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1)) 1073 1074 1075# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects 1076def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc: 1077 return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0)) 1078 1079 1080def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str: 1081 return self.func("MAX", expression.this) 1082 1083 1084def bool_xor_sql(self: Generator, expression: exp.Xor) -> str: 1085 a = self.sql(expression.left) 1086 b = self.sql(expression.right) 1087 return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})" 1088 1089 1090def is_parse_json(expression: exp.Expression) -> bool: 1091 return isinstance(expression, exp.ParseJSON) or ( 1092 isinstance(expression, exp.Cast) and expression.is_type("json") 1093 ) 1094 1095 1096def isnull_to_is_null(args: t.List) -> exp.Expression: 1097 return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null())) 1098 1099 1100def generatedasidentitycolumnconstraint_sql( 1101 self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint 1102) -> str: 1103 start = self.sql(expression, "start") or "1" 1104 increment = self.sql(expression, "increment") or "1" 1105 return f"IDENTITY({start}, {increment})" 1106 1107 1108def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]: 1109 def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str: 1110 if expression.args.get("count"): 1111 self.unsupported(f"Only two arguments are supported in function {name}.") 1112 1113 return self.func(name, expression.this, expression.expression) 1114 1115 return _arg_max_or_min_sql 1116 1117 1118def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd: 1119 this = expression.this.copy() 1120 1121 return_type = expression.return_type 1122 if return_type.is_type(exp.DataType.Type.DATE): 1123 # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we 1124 # can truncate timestamp strings, because some dialects can't cast them to DATE 1125 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1126 1127 expression.this.replace(exp.cast(this, return_type)) 1128 return expression 1129 1130 1131def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]: 1132 def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str: 1133 if cast and isinstance(expression, exp.TsOrDsAdd): 1134 expression = ts_or_ds_add_cast(expression) 1135 1136 return self.func( 1137 name, 1138 unit_to_var(expression), 1139 expression.expression, 1140 expression.this, 1141 ) 1142 1143 return _delta_sql 1144 1145 1146def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]: 1147 unit = expression.args.get("unit") 1148 1149 if isinstance(unit, exp.Placeholder): 1150 return unit 1151 if unit: 1152 return exp.Literal.string(unit.name) 1153 return exp.Literal.string(default) if default else None 1154 1155 1156def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]: 1157 unit = expression.args.get("unit") 1158 1159 if isinstance(unit, (exp.Var, exp.Placeholder)): 1160 return unit 1161 return exp.Var(this=default) if default else None 1162 1163 1164@t.overload 1165def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var: 1166 pass 1167 1168 1169@t.overload 1170def map_date_part( 1171 part: t.Optional[exp.Expression], dialect: DialectType = Dialect 1172) -> t.Optional[exp.Expression]: 1173 pass 1174 1175 1176def map_date_part(part, dialect: DialectType = Dialect): 1177 mapped = ( 1178 Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None 1179 ) 1180 return exp.var(mapped) if mapped else part 1181 1182 1183def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str: 1184 trunc_curr_date = exp.func("date_trunc", "month", expression.this) 1185 plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month") 1186 minus_one_day = exp.func("date_sub", plus_one_month, 1, "day") 1187 1188 return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE)) 1189 1190 1191def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str: 1192 """Remove table refs from columns in when statements.""" 1193 alias = expression.this.args.get("alias") 1194 1195 def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]: 1196 return self.dialect.normalize_identifier(identifier).name if identifier else None 1197 1198 targets = {normalize(expression.this.this)} 1199 1200 if alias: 1201 targets.add(normalize(alias.this)) 1202 1203 for when in expression.expressions: 1204 when.transform( 1205 lambda node: ( 1206 exp.column(node.this) 1207 if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets 1208 else node 1209 ), 1210 copy=False, 1211 ) 1212 1213 return self.merge_sql(expression) 1214 1215 1216def build_json_extract_path( 1217 expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False 1218) -> t.Callable[[t.List], F]: 1219 def _builder(args: t.List) -> F: 1220 segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] 1221 for arg in args[1:]: 1222 if not isinstance(arg, exp.Literal): 1223 # We use the fallback parser because we can't really transpile non-literals safely 1224 return expr_type.from_arg_list(args) 1225 1226 text = arg.name 1227 if is_int(text): 1228 index = int(text) 1229 segments.append( 1230 exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1) 1231 ) 1232 else: 1233 segments.append(exp.JSONPathKey(this=text)) 1234 1235 # This is done to avoid failing in the expression validator due to the arg count 1236 del args[2:] 1237 return expr_type( 1238 this=seq_get(args, 0), 1239 expression=exp.JSONPath(expressions=segments), 1240 only_json_types=arrow_req_json_type, 1241 ) 1242 1243 return _builder 1244 1245 1246def json_extract_segments( 1247 name: str, quoted_index: bool = True, op: t.Optional[str] = None 1248) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]: 1249 def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 1250 path = expression.expression 1251 if not isinstance(path, exp.JSONPath): 1252 return rename_func(name)(self, expression) 1253 1254 segments = [] 1255 for segment in path.expressions: 1256 path = self.sql(segment) 1257 if path: 1258 if isinstance(segment, exp.JSONPathPart) and ( 1259 quoted_index or not isinstance(segment, exp.JSONPathSubscript) 1260 ): 1261 path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" 1262 1263 segments.append(path) 1264 1265 if op: 1266 return f" {op} ".join([self.sql(expression.this), *segments]) 1267 return self.func(name, expression.this, *segments) 1268 1269 return _json_extract_segments 1270 1271 1272def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str: 1273 if isinstance(expression.this, exp.JSONPathWildcard): 1274 self.unsupported("Unsupported wildcard in JSONPathKey expression") 1275 1276 return expression.name 1277 1278 1279def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str: 1280 cond = expression.expression 1281 if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1: 1282 alias = cond.expressions[0] 1283 cond = cond.this 1284 elif isinstance(cond, exp.Predicate): 1285 alias = "_u" 1286 else: 1287 self.unsupported("Unsupported filter condition") 1288 return "" 1289 1290 unnest = exp.Unnest(expressions=[expression.this]) 1291 filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond) 1292 return self.sql(exp.Array(expressions=[filtered])) 1293 1294 1295def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str: 1296 return self.func( 1297 "TO_NUMBER", 1298 expression.this, 1299 expression.args.get("format"), 1300 expression.args.get("nlsparam"), 1301 ) 1302 1303 1304def build_default_decimal_type( 1305 precision: t.Optional[int] = None, scale: t.Optional[int] = None 1306) -> t.Callable[[exp.DataType], exp.DataType]: 1307 def _builder(dtype: exp.DataType) -> exp.DataType: 1308 if dtype.expressions or precision is None: 1309 return dtype 1310 1311 params = f"{precision}{f', {scale}' if scale is not None else ''}" 1312 return exp.DataType.build(f"DECIMAL({params})") 1313 1314 return _builder 1315 1316 1317def build_timestamp_from_parts(args: t.List) -> exp.Func: 1318 if len(args) == 2: 1319 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 1320 # so we parse this into Anonymous for now instead of introducing complexity 1321 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 1322 1323 return exp.TimestampFromParts.from_arg_list(args) 1324 1325 1326def sha256_sql(self: Generator, expression: exp.SHA2) -> str: 1327 return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
41class Dialects(str, Enum): 42 """Dialects supported by SQLGLot.""" 43 44 DIALECT = "" 45 46 ATHENA = "athena" 47 BIGQUERY = "bigquery" 48 CLICKHOUSE = "clickhouse" 49 DATABRICKS = "databricks" 50 DORIS = "doris" 51 DRILL = "drill" 52 DUCKDB = "duckdb" 53 HIVE = "hive" 54 MATERIALIZE = "materialize" 55 MYSQL = "mysql" 56 ORACLE = "oracle" 57 POSTGRES = "postgres" 58 PRESTO = "presto" 59 PRQL = "prql" 60 REDSHIFT = "redshift" 61 RISINGWAVE = "risingwave" 62 SNOWFLAKE = "snowflake" 63 SPARK = "spark" 64 SPARK2 = "spark2" 65 SQLITE = "sqlite" 66 STARROCKS = "starrocks" 67 TABLEAU = "tableau" 68 TERADATA = "teradata" 69 TRINO = "trino" 70 TSQL = "tsql"
Dialects supported by SQLGLot.
Inherited Members
- enum.Enum
- name
- value
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
73class NormalizationStrategy(str, AutoName): 74 """Specifies the strategy according to which identifiers should be normalized.""" 75 76 LOWERCASE = auto() 77 """Unquoted identifiers are lowercased.""" 78 79 UPPERCASE = auto() 80 """Unquoted identifiers are uppercased.""" 81 82 CASE_SENSITIVE = auto() 83 """Always case-sensitive, regardless of quotes.""" 84 85 CASE_INSENSITIVE = auto() 86 """Always case-insensitive, regardless of quotes."""
Specifies the strategy according to which identifiers should be normalized.
Always case-sensitive, regardless of quotes.
Always case-insensitive, regardless of quotes.
Inherited Members
- enum.Enum
- name
- value
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
201class Dialect(metaclass=_Dialect): 202 INDEX_OFFSET = 0 203 """The base index offset for arrays.""" 204 205 WEEK_OFFSET = 0 206 """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" 207 208 UNNEST_COLUMN_ONLY = False 209 """Whether `UNNEST` table aliases are treated as column aliases.""" 210 211 ALIAS_POST_TABLESAMPLE = False 212 """Whether the table alias comes after tablesample.""" 213 214 TABLESAMPLE_SIZE_IS_PERCENT = False 215 """Whether a size in the table sample clause represents percentage.""" 216 217 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE 218 """Specifies the strategy according to which identifiers should be normalized.""" 219 220 IDENTIFIERS_CAN_START_WITH_DIGIT = False 221 """Whether an unquoted identifier can start with a digit.""" 222 223 DPIPE_IS_STRING_CONCAT = True 224 """Whether the DPIPE token (`||`) is a string concatenation operator.""" 225 226 STRICT_STRING_CONCAT = False 227 """Whether `CONCAT`'s arguments must be strings.""" 228 229 SUPPORTS_USER_DEFINED_TYPES = True 230 """Whether user-defined data types are supported.""" 231 232 SUPPORTS_SEMI_ANTI_JOIN = True 233 """Whether `SEMI` or `ANTI` joins are supported.""" 234 235 SUPPORTS_COLUMN_JOIN_MARKS = False 236 """Whether the old-style outer join (+) syntax is supported.""" 237 238 COPY_PARAMS_ARE_CSV = True 239 """Separator of COPY statement parameters.""" 240 241 NORMALIZE_FUNCTIONS: bool | str = "upper" 242 """ 243 Determines how function names are going to be normalized. 244 Possible values: 245 "upper" or True: Convert names to uppercase. 246 "lower": Convert names to lowercase. 247 False: Disables function name normalization. 248 """ 249 250 LOG_BASE_FIRST: t.Optional[bool] = True 251 """ 252 Whether the base comes first in the `LOG` function. 253 Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`) 254 """ 255 256 NULL_ORDERING = "nulls_are_small" 257 """ 258 Default `NULL` ordering method to use if not explicitly set. 259 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` 260 """ 261 262 TYPED_DIVISION = False 263 """ 264 Whether the behavior of `a / b` depends on the types of `a` and `b`. 265 False means `a / b` is always float division. 266 True means `a / b` is integer division if both `a` and `b` are integers. 267 """ 268 269 SAFE_DIVISION = False 270 """Whether division by zero throws an error (`False`) or returns NULL (`True`).""" 271 272 CONCAT_COALESCE = False 273 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" 274 275 HEX_LOWERCASE = False 276 """Whether the `HEX` function returns a lowercase hexadecimal string.""" 277 278 DATE_FORMAT = "'%Y-%m-%d'" 279 DATEINT_FORMAT = "'%Y%m%d'" 280 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" 281 282 TIME_MAPPING: t.Dict[str, str] = {} 283 """Associates this dialect's time formats with their equivalent Python `strftime` formats.""" 284 285 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time 286 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE 287 FORMAT_MAPPING: t.Dict[str, str] = {} 288 """ 289 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. 290 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. 291 """ 292 293 UNESCAPED_SEQUENCES: t.Dict[str, str] = {} 294 """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`).""" 295 296 PSEUDOCOLUMNS: t.Set[str] = set() 297 """ 298 Columns that are auto-generated by the engine corresponding to this dialect. 299 For example, such columns may be excluded from `SELECT *` queries. 300 """ 301 302 PREFER_CTE_ALIAS_COLUMN = False 303 """ 304 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the 305 HAVING clause of the CTE. This flag will cause the CTE alias columns to override 306 any projection aliases in the subquery. 307 308 For example, 309 WITH y(c) AS ( 310 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 311 ) SELECT c FROM y; 312 313 will be rewritten as 314 315 WITH y(c) AS ( 316 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 317 ) SELECT c FROM y; 318 """ 319 320 COPY_PARAMS_ARE_CSV = True 321 """ 322 Whether COPY statement parameters are separated by comma or whitespace 323 """ 324 325 # --- Autofilled --- 326 327 tokenizer_class = Tokenizer 328 parser_class = Parser 329 generator_class = Generator 330 331 # A trie of the time_mapping keys 332 TIME_TRIE: t.Dict = {} 333 FORMAT_TRIE: t.Dict = {} 334 335 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} 336 INVERSE_TIME_TRIE: t.Dict = {} 337 338 ESCAPED_SEQUENCES: t.Dict[str, str] = {} 339 340 # Delimiters for string literals and identifiers 341 QUOTE_START = "'" 342 QUOTE_END = "'" 343 IDENTIFIER_START = '"' 344 IDENTIFIER_END = '"' 345 346 # Delimiters for bit, hex, byte and unicode literals 347 BIT_START: t.Optional[str] = None 348 BIT_END: t.Optional[str] = None 349 HEX_START: t.Optional[str] = None 350 HEX_END: t.Optional[str] = None 351 BYTE_START: t.Optional[str] = None 352 BYTE_END: t.Optional[str] = None 353 UNICODE_START: t.Optional[str] = None 354 UNICODE_END: t.Optional[str] = None 355 356 DATE_PART_MAPPING = { 357 "Y": "YEAR", 358 "YY": "YEAR", 359 "YYY": "YEAR", 360 "YYYY": "YEAR", 361 "YR": "YEAR", 362 "YEARS": "YEAR", 363 "YRS": "YEAR", 364 "MM": "MONTH", 365 "MON": "MONTH", 366 "MONS": "MONTH", 367 "MONTHS": "MONTH", 368 "D": "DAY", 369 "DD": "DAY", 370 "DAYS": "DAY", 371 "DAYOFMONTH": "DAY", 372 "DAY OF WEEK": "DAYOFWEEK", 373 "WEEKDAY": "DAYOFWEEK", 374 "DOW": "DAYOFWEEK", 375 "DW": "DAYOFWEEK", 376 "WEEKDAY_ISO": "DAYOFWEEKISO", 377 "DOW_ISO": "DAYOFWEEKISO", 378 "DW_ISO": "DAYOFWEEKISO", 379 "DAY OF YEAR": "DAYOFYEAR", 380 "DOY": "DAYOFYEAR", 381 "DY": "DAYOFYEAR", 382 "W": "WEEK", 383 "WK": "WEEK", 384 "WEEKOFYEAR": "WEEK", 385 "WOY": "WEEK", 386 "WY": "WEEK", 387 "WEEK_ISO": "WEEKISO", 388 "WEEKOFYEARISO": "WEEKISO", 389 "WEEKOFYEAR_ISO": "WEEKISO", 390 "Q": "QUARTER", 391 "QTR": "QUARTER", 392 "QTRS": "QUARTER", 393 "QUARTERS": "QUARTER", 394 "H": "HOUR", 395 "HH": "HOUR", 396 "HR": "HOUR", 397 "HOURS": "HOUR", 398 "HRS": "HOUR", 399 "M": "MINUTE", 400 "MI": "MINUTE", 401 "MIN": "MINUTE", 402 "MINUTES": "MINUTE", 403 "MINS": "MINUTE", 404 "S": "SECOND", 405 "SEC": "SECOND", 406 "SECONDS": "SECOND", 407 "SECS": "SECOND", 408 "MS": "MILLISECOND", 409 "MSEC": "MILLISECOND", 410 "MSECS": "MILLISECOND", 411 "MSECOND": "MILLISECOND", 412 "MSECONDS": "MILLISECOND", 413 "MILLISEC": "MILLISECOND", 414 "MILLISECS": "MILLISECOND", 415 "MILLISECON": "MILLISECOND", 416 "MILLISECONDS": "MILLISECOND", 417 "US": "MICROSECOND", 418 "USEC": "MICROSECOND", 419 "USECS": "MICROSECOND", 420 "MICROSEC": "MICROSECOND", 421 "MICROSECS": "MICROSECOND", 422 "USECOND": "MICROSECOND", 423 "USECONDS": "MICROSECOND", 424 "MICROSECONDS": "MICROSECOND", 425 "NS": "NANOSECOND", 426 "NSEC": "NANOSECOND", 427 "NANOSEC": "NANOSECOND", 428 "NSECOND": "NANOSECOND", 429 "NSECONDS": "NANOSECOND", 430 "NANOSECS": "NANOSECOND", 431 "EPOCH_SECOND": "EPOCH", 432 "EPOCH_SECONDS": "EPOCH", 433 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 434 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 435 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 436 "TZH": "TIMEZONE_HOUR", 437 "TZM": "TIMEZONE_MINUTE", 438 "DEC": "DECADE", 439 "DECS": "DECADE", 440 "DECADES": "DECADE", 441 "MIL": "MILLENIUM", 442 "MILS": "MILLENIUM", 443 "MILLENIA": "MILLENIUM", 444 "C": "CENTURY", 445 "CENT": "CENTURY", 446 "CENTS": "CENTURY", 447 "CENTURIES": "CENTURY", 448 } 449 450 @classmethod 451 def get_or_raise(cls, dialect: DialectType) -> Dialect: 452 """ 453 Look up a dialect in the global dialect registry and return it if it exists. 454 455 Args: 456 dialect: The target dialect. If this is a string, it can be optionally followed by 457 additional key-value pairs that are separated by commas and are used to specify 458 dialect settings, such as whether the dialect's identifiers are case-sensitive. 459 460 Example: 461 >>> dialect = dialect_class = get_or_raise("duckdb") 462 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 463 464 Returns: 465 The corresponding Dialect instance. 466 """ 467 468 if not dialect: 469 return cls() 470 if isinstance(dialect, _Dialect): 471 return dialect() 472 if isinstance(dialect, Dialect): 473 return dialect 474 if isinstance(dialect, str): 475 try: 476 dialect_name, *kv_pairs = dialect.split(",") 477 kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)} 478 except ValueError: 479 raise ValueError( 480 f"Invalid dialect format: '{dialect}'. " 481 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 482 ) 483 484 result = cls.get(dialect_name.strip()) 485 if not result: 486 from difflib import get_close_matches 487 488 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 489 if similar: 490 similar = f" Did you mean {similar}?" 491 492 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 493 494 return result(**kwargs) 495 496 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") 497 498 @classmethod 499 def format_time( 500 cls, expression: t.Optional[str | exp.Expression] 501 ) -> t.Optional[exp.Expression]: 502 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 503 if isinstance(expression, str): 504 return exp.Literal.string( 505 # the time formats are quoted 506 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 507 ) 508 509 if expression and expression.is_string: 510 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 511 512 return expression 513 514 def __init__(self, **kwargs) -> None: 515 normalization_strategy = kwargs.pop("normalization_strategy", None) 516 517 if normalization_strategy is None: 518 self.normalization_strategy = self.NORMALIZATION_STRATEGY 519 else: 520 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 521 522 self.settings = kwargs 523 524 def __eq__(self, other: t.Any) -> bool: 525 # Does not currently take dialect state into account 526 return type(self) == other 527 528 def __hash__(self) -> int: 529 # Does not currently take dialect state into account 530 return hash(type(self)) 531 532 def normalize_identifier(self, expression: E) -> E: 533 """ 534 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 535 536 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 537 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 538 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 539 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 540 541 There are also dialects like Spark, which are case-insensitive even when quotes are 542 present, and dialects like MySQL, whose resolution rules match those employed by the 543 underlying operating system, for example they may always be case-sensitive in Linux. 544 545 Finally, the normalization behavior of some engines can even be controlled through flags, 546 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 547 548 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 549 that it can analyze queries in the optimizer and successfully capture their semantics. 550 """ 551 if ( 552 isinstance(expression, exp.Identifier) 553 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 554 and ( 555 not expression.quoted 556 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 557 ) 558 ): 559 expression.set( 560 "this", 561 ( 562 expression.this.upper() 563 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 564 else expression.this.lower() 565 ), 566 ) 567 568 return expression 569 570 def case_sensitive(self, text: str) -> bool: 571 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 572 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 573 return False 574 575 unsafe = ( 576 str.islower 577 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 578 else str.isupper 579 ) 580 return any(unsafe(char) for char in text) 581 582 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 583 """Checks if text can be identified given an identify option. 584 585 Args: 586 text: The text to check. 587 identify: 588 `"always"` or `True`: Always returns `True`. 589 `"safe"`: Only returns `True` if the identifier is case-insensitive. 590 591 Returns: 592 Whether the given text can be identified. 593 """ 594 if identify is True or identify == "always": 595 return True 596 597 if identify == "safe": 598 return not self.case_sensitive(text) 599 600 return False 601 602 def quote_identifier(self, expression: E, identify: bool = True) -> E: 603 """ 604 Adds quotes to a given identifier. 605 606 Args: 607 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 608 identify: If set to `False`, the quotes will only be added if the identifier is deemed 609 "unsafe", with respect to its characters and this dialect's normalization strategy. 610 """ 611 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 612 name = expression.this 613 expression.set( 614 "quoted", 615 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 616 ) 617 618 return expression 619 620 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 621 if isinstance(path, exp.Literal): 622 path_text = path.name 623 if path.is_number: 624 path_text = f"[{path_text}]" 625 626 try: 627 return parse_json_path(path_text) 628 except ParseError as e: 629 logger.warning(f"Invalid JSON path syntax. {str(e)}") 630 631 return path 632 633 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 634 return self.parser(**opts).parse(self.tokenize(sql), sql) 635 636 def parse_into( 637 self, expression_type: exp.IntoType, sql: str, **opts 638 ) -> t.List[t.Optional[exp.Expression]]: 639 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) 640 641 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: 642 return self.generator(**opts).generate(expression, copy=copy) 643 644 def transpile(self, sql: str, **opts) -> t.List[str]: 645 return [ 646 self.generate(expression, copy=False, **opts) if expression else "" 647 for expression in self.parse(sql) 648 ] 649 650 def tokenize(self, sql: str) -> t.List[Token]: 651 return self.tokenizer.tokenize(sql) 652 653 @property 654 def tokenizer(self) -> Tokenizer: 655 if not hasattr(self, "_tokenizer"): 656 self._tokenizer = self.tokenizer_class(dialect=self) 657 return self._tokenizer 658 659 def parser(self, **opts) -> Parser: 660 return self.parser_class(dialect=self, **opts) 661 662 def generator(self, **opts) -> Generator: 663 return self.generator_class(dialect=self, **opts)
514 def __init__(self, **kwargs) -> None: 515 normalization_strategy = kwargs.pop("normalization_strategy", None) 516 517 if normalization_strategy is None: 518 self.normalization_strategy = self.NORMALIZATION_STRATEGY 519 else: 520 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 521 522 self.settings = kwargs
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether a size in the table sample clause represents percentage.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
450 @classmethod 451 def get_or_raise(cls, dialect: DialectType) -> Dialect: 452 """ 453 Look up a dialect in the global dialect registry and return it if it exists. 454 455 Args: 456 dialect: The target dialect. If this is a string, it can be optionally followed by 457 additional key-value pairs that are separated by commas and are used to specify 458 dialect settings, such as whether the dialect's identifiers are case-sensitive. 459 460 Example: 461 >>> dialect = dialect_class = get_or_raise("duckdb") 462 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 463 464 Returns: 465 The corresponding Dialect instance. 466 """ 467 468 if not dialect: 469 return cls() 470 if isinstance(dialect, _Dialect): 471 return dialect() 472 if isinstance(dialect, Dialect): 473 return dialect 474 if isinstance(dialect, str): 475 try: 476 dialect_name, *kv_pairs = dialect.split(",") 477 kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)} 478 except ValueError: 479 raise ValueError( 480 f"Invalid dialect format: '{dialect}'. " 481 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 482 ) 483 484 result = cls.get(dialect_name.strip()) 485 if not result: 486 from difflib import get_close_matches 487 488 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 489 if similar: 490 similar = f" Did you mean {similar}?" 491 492 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 493 494 return result(**kwargs) 495 496 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
Look up a dialect in the global dialect registry and return it if it exists.
Arguments:
- dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb") >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:
The corresponding Dialect instance.
498 @classmethod 499 def format_time( 500 cls, expression: t.Optional[str | exp.Expression] 501 ) -> t.Optional[exp.Expression]: 502 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 503 if isinstance(expression, str): 504 return exp.Literal.string( 505 # the time formats are quoted 506 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 507 ) 508 509 if expression and expression.is_string: 510 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 511 512 return expression
Converts a time format in this dialect to its equivalent Python strftime
format.
532 def normalize_identifier(self, expression: E) -> E: 533 """ 534 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 535 536 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 537 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 538 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 539 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 540 541 There are also dialects like Spark, which are case-insensitive even when quotes are 542 present, and dialects like MySQL, whose resolution rules match those employed by the 543 underlying operating system, for example they may always be case-sensitive in Linux. 544 545 Finally, the normalization behavior of some engines can even be controlled through flags, 546 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 547 548 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 549 that it can analyze queries in the optimizer and successfully capture their semantics. 550 """ 551 if ( 552 isinstance(expression, exp.Identifier) 553 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 554 and ( 555 not expression.quoted 556 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 557 ) 558 ): 559 expression.set( 560 "this", 561 ( 562 expression.this.upper() 563 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 564 else expression.this.lower() 565 ), 566 ) 567 568 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
570 def case_sensitive(self, text: str) -> bool: 571 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 572 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 573 return False 574 575 unsafe = ( 576 str.islower 577 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 578 else str.isupper 579 ) 580 return any(unsafe(char) for char in text)
Checks if text contains any case sensitive characters, based on the dialect's rules.
582 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 583 """Checks if text can be identified given an identify option. 584 585 Args: 586 text: The text to check. 587 identify: 588 `"always"` or `True`: Always returns `True`. 589 `"safe"`: Only returns `True` if the identifier is case-insensitive. 590 591 Returns: 592 Whether the given text can be identified. 593 """ 594 if identify is True or identify == "always": 595 return True 596 597 if identify == "safe": 598 return not self.case_sensitive(text) 599 600 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify:
"always"
orTrue
: Always returnsTrue
."safe"
: Only returnsTrue
if the identifier is case-insensitive.
Returns:
Whether the given text can be identified.
602 def quote_identifier(self, expression: E, identify: bool = True) -> E: 603 """ 604 Adds quotes to a given identifier. 605 606 Args: 607 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 608 identify: If set to `False`, the quotes will only be added if the identifier is deemed 609 "unsafe", with respect to its characters and this dialect's normalization strategy. 610 """ 611 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 612 name = expression.this 613 expression.set( 614 "quoted", 615 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 616 ) 617 618 return expression
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
620 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 621 if isinstance(path, exp.Literal): 622 path_text = path.name 623 if path.is_number: 624 path_text = f"[{path_text}]" 625 626 try: 627 return parse_json_path(path_text) 628 except ParseError as e: 629 logger.warning(f"Invalid JSON path syntax. {str(e)}") 630 631 return path
679def if_sql( 680 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None 681) -> t.Callable[[Generator, exp.If], str]: 682 def _if_sql(self: Generator, expression: exp.If) -> str: 683 return self.func( 684 name, 685 expression.this, 686 expression.args.get("true"), 687 expression.args.get("false") or false_value, 688 ) 689 690 return _if_sql
693def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 694 this = expression.this 695 if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: 696 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 697 698 return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
764def str_position_sql( 765 self: Generator, expression: exp.StrPosition, generate_instance: bool = False 766) -> str: 767 this = self.sql(expression, "this") 768 substr = self.sql(expression, "substr") 769 position = self.sql(expression, "position") 770 instance = expression.args.get("instance") if generate_instance else None 771 position_offset = "" 772 773 if position: 774 # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects 775 this = self.func("SUBSTR", this, position) 776 position_offset = f" + {position} - 1" 777 778 return self.func("STRPOS", this, substr, instance) + position_offset
787def var_map_sql( 788 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 789) -> str: 790 keys = expression.args["keys"] 791 values = expression.args["values"] 792 793 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 794 self.unsupported("Cannot convert array columns into map.") 795 return self.func(map_func_name, keys, values) 796 797 args = [] 798 for key, value in zip(keys.expressions, values.expressions): 799 args.append(self.sql(key)) 800 args.append(self.sql(value)) 801 802 return self.func(map_func_name, *args)
805def build_formatted_time( 806 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 807) -> t.Callable[[t.List], E]: 808 """Helper used for time expressions. 809 810 Args: 811 exp_class: the expression class to instantiate. 812 dialect: target sql dialect. 813 default: the default format, True being time. 814 815 Returns: 816 A callable that can be used to return the appropriately formatted time expression. 817 """ 818 819 def _builder(args: t.List): 820 return exp_class( 821 this=seq_get(args, 0), 822 format=Dialect[dialect].format_time( 823 seq_get(args, 1) 824 or (Dialect[dialect].TIME_FORMAT if default is True else default or None) 825 ), 826 ) 827 828 return _builder
Helper used for time expressions.
Arguments:
- exp_class: the expression class to instantiate.
- dialect: target sql dialect.
- default: the default format, True being time.
Returns:
A callable that can be used to return the appropriately formatted time expression.
831def time_format( 832 dialect: DialectType = None, 833) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]: 834 def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]: 835 """ 836 Returns the time format for a given expression, unless it's equivalent 837 to the default time format of the dialect of interest. 838 """ 839 time_format = self.format_time(expression) 840 return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None 841 842 return _time_format
845def build_date_delta( 846 exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None 847) -> t.Callable[[t.List], E]: 848 def _builder(args: t.List) -> E: 849 unit_based = len(args) == 3 850 this = args[2] if unit_based else seq_get(args, 0) 851 unit = args[0] if unit_based else exp.Literal.string("DAY") 852 unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit 853 return exp_class(this=this, expression=seq_get(args, 1), unit=unit) 854 855 return _builder
858def build_date_delta_with_interval( 859 expression_class: t.Type[E], 860) -> t.Callable[[t.List], t.Optional[E]]: 861 def _builder(args: t.List) -> t.Optional[E]: 862 if len(args) < 2: 863 return None 864 865 interval = args[1] 866 867 if not isinstance(interval, exp.Interval): 868 raise ParseError(f"INTERVAL expression expected but got '{interval}'") 869 870 expression = interval.this 871 if expression and expression.is_string: 872 expression = exp.Literal.number(expression.this) 873 874 return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval)) 875 876 return _builder
888def date_add_interval_sql( 889 data_type: str, kind: str 890) -> t.Callable[[Generator, exp.Expression], str]: 891 def func(self: Generator, expression: exp.Expression) -> str: 892 this = self.sql(expression, "this") 893 interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression)) 894 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 895 896 return func
899def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]: 900 def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str: 901 args = [unit_to_str(expression), expression.this] 902 if zone: 903 args.append(expression.args.get("zone")) 904 return self.func("DATE_TRUNC", *args) 905 906 return _timestamptrunc_sql
909def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str: 910 if not expression.expression: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP 914 return self.sql(exp.cast(expression.this, target_type)) 915 if expression.text("expression").lower() in TIMEZONES: 916 return self.sql( 917 exp.AtTimeZone( 918 this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), 919 zone=expression.expression, 920 ) 921 ) 922 return self.func("TIMESTAMP", expression.this, expression.expression)
963def encode_decode_sql( 964 self: Generator, expression: exp.Expression, name: str, replace: bool = True 965) -> str: 966 charset = expression.args.get("charset") 967 if charset and charset.name.lower() != "utf-8": 968 self.unsupported(f"Expected utf-8 character set, got {charset}.") 969 970 return self.func(name, expression.this, expression.args.get("replace") if replace else None)
983def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str: 984 cond = expression.this 985 986 if isinstance(expression.this, exp.Distinct): 987 cond = expression.this.expressions[0] 988 self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM") 989 990 return self.func("sum", exp.func("if", cond, 1, 0))
993def trim_sql(self: Generator, expression: exp.Trim) -> str: 994 target = self.sql(expression, "this") 995 trim_type = self.sql(expression, "position") 996 remove_chars = self.sql(expression, "expression") 997 collation = self.sql(expression, "collation") 998 999 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 1000 if not remove_chars and not collation: 1001 return self.trim_sql(expression) 1002 1003 trim_type = f"{trim_type} " if trim_type else "" 1004 remove_chars = f"{remove_chars} " if remove_chars else "" 1005 from_part = "FROM " if trim_type or remove_chars else "" 1006 collation = f" COLLATE {collation}" if collation else "" 1007 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1028def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str: 1029 bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters"))) 1030 if bad_args: 1031 self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}") 1032 1033 return self.func( 1034 "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group") 1035 )
1038def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str: 1039 bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers"))) 1040 if bad_args: 1041 self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}") 1042 1043 return self.func( 1044 "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"] 1045 )
1048def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]: 1049 names = [] 1050 for agg in aggregations: 1051 if isinstance(agg, exp.Alias): 1052 names.append(agg.alias) 1053 else: 1054 """ 1055 This case corresponds to aggregations without aliases being used as suffixes 1056 (e.g. col_avg(foo)). We need to unquote identifiers because they're going to 1057 be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`. 1058 Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes). 1059 """ 1060 agg_all_unquoted = agg.transform( 1061 lambda node: ( 1062 exp.Identifier(this=node.name, quoted=False) 1063 if isinstance(node, exp.Identifier) 1064 else node 1065 ) 1066 ) 1067 names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower")) 1068 1069 return names
1109def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]: 1110 def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str: 1111 if expression.args.get("count"): 1112 self.unsupported(f"Only two arguments are supported in function {name}.") 1113 1114 return self.func(name, expression.this, expression.expression) 1115 1116 return _arg_max_or_min_sql
1119def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd: 1120 this = expression.this.copy() 1121 1122 return_type = expression.return_type 1123 if return_type.is_type(exp.DataType.Type.DATE): 1124 # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we 1125 # can truncate timestamp strings, because some dialects can't cast them to DATE 1126 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1127 1128 expression.this.replace(exp.cast(this, return_type)) 1129 return expression
1132def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]: 1133 def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str: 1134 if cast and isinstance(expression, exp.TsOrDsAdd): 1135 expression = ts_or_ds_add_cast(expression) 1136 1137 return self.func( 1138 name, 1139 unit_to_var(expression), 1140 expression.expression, 1141 expression.this, 1142 ) 1143 1144 return _delta_sql
1147def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]: 1148 unit = expression.args.get("unit") 1149 1150 if isinstance(unit, exp.Placeholder): 1151 return unit 1152 if unit: 1153 return exp.Literal.string(unit.name) 1154 return exp.Literal.string(default) if default else None
1184def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str: 1185 trunc_curr_date = exp.func("date_trunc", "month", expression.this) 1186 plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month") 1187 minus_one_day = exp.func("date_sub", plus_one_month, 1, "day") 1188 1189 return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
1192def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str: 1193 """Remove table refs from columns in when statements.""" 1194 alias = expression.this.args.get("alias") 1195 1196 def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]: 1197 return self.dialect.normalize_identifier(identifier).name if identifier else None 1198 1199 targets = {normalize(expression.this.this)} 1200 1201 if alias: 1202 targets.add(normalize(alias.this)) 1203 1204 for when in expression.expressions: 1205 when.transform( 1206 lambda node: ( 1207 exp.column(node.this) 1208 if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets 1209 else node 1210 ), 1211 copy=False, 1212 ) 1213 1214 return self.merge_sql(expression)
Remove table refs from columns in when statements.
1217def build_json_extract_path( 1218 expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False 1219) -> t.Callable[[t.List], F]: 1220 def _builder(args: t.List) -> F: 1221 segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] 1222 for arg in args[1:]: 1223 if not isinstance(arg, exp.Literal): 1224 # We use the fallback parser because we can't really transpile non-literals safely 1225 return expr_type.from_arg_list(args) 1226 1227 text = arg.name 1228 if is_int(text): 1229 index = int(text) 1230 segments.append( 1231 exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1) 1232 ) 1233 else: 1234 segments.append(exp.JSONPathKey(this=text)) 1235 1236 # This is done to avoid failing in the expression validator due to the arg count 1237 del args[2:] 1238 return expr_type( 1239 this=seq_get(args, 0), 1240 expression=exp.JSONPath(expressions=segments), 1241 only_json_types=arrow_req_json_type, 1242 ) 1243 1244 return _builder
1247def json_extract_segments( 1248 name: str, quoted_index: bool = True, op: t.Optional[str] = None 1249) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]: 1250 def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 1251 path = expression.expression 1252 if not isinstance(path, exp.JSONPath): 1253 return rename_func(name)(self, expression) 1254 1255 segments = [] 1256 for segment in path.expressions: 1257 path = self.sql(segment) 1258 if path: 1259 if isinstance(segment, exp.JSONPathPart) and ( 1260 quoted_index or not isinstance(segment, exp.JSONPathSubscript) 1261 ): 1262 path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" 1263 1264 segments.append(path) 1265 1266 if op: 1267 return f" {op} ".join([self.sql(expression.this), *segments]) 1268 return self.func(name, expression.this, *segments) 1269 1270 return _json_extract_segments
1280def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str: 1281 cond = expression.expression 1282 if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1: 1283 alias = cond.expressions[0] 1284 cond = cond.this 1285 elif isinstance(cond, exp.Predicate): 1286 alias = "_u" 1287 else: 1288 self.unsupported("Unsupported filter condition") 1289 return "" 1290 1291 unnest = exp.Unnest(expressions=[expression.this]) 1292 filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond) 1293 return self.sql(exp.Array(expressions=[filtered]))
1305def build_default_decimal_type( 1306 precision: t.Optional[int] = None, scale: t.Optional[int] = None 1307) -> t.Callable[[exp.DataType], exp.DataType]: 1308 def _builder(dtype: exp.DataType) -> exp.DataType: 1309 if dtype.expressions or precision is None: 1310 return dtype 1311 1312 params = f"{precision}{f', {scale}' if scale is not None else ''}" 1313 return exp.DataType.build(f"DECIMAL({params})") 1314 1315 return _builder
1318def build_timestamp_from_parts(args: t.List) -> exp.Func: 1319 if len(args) == 2: 1320 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 1321 # so we parse this into Anonymous for now instead of introducing complexity 1322 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 1323 1324 return exp.TimestampFromParts.from_arg_list(args)