sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33 space_sql, 34) 35from sqlglot.helper import seq_get, split_num_words 36from sqlglot.tokens import TokenType 37from sqlglot.generator import unsupported_args 38 39if t.TYPE_CHECKING: 40 from sqlglot._typing import Lit 41 42 from sqlglot.optimizer.annotate_types import TypeAnnotator 43 44logger = logging.getLogger("sqlglot") 45 46 47JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 48 49DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 50 51 52def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 53 if not expression.find_ancestor(exp.From, exp.Join): 54 return self.values_sql(expression) 55 56 structs = [] 57 alias = expression.args.get("alias") 58 for tup in expression.find_all(exp.Tuple): 59 field_aliases = ( 60 alias.columns 61 if alias and alias.columns 62 else (f"_c{i}" for i in range(len(tup.expressions))) 63 ) 64 expressions = [ 65 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 66 for name, fld in zip(field_aliases, tup.expressions) 67 ] 68 structs.append(exp.Struct(expressions=expressions)) 69 70 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 71 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 72 return self.unnest_sql( 73 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 74 ) 75 76 77def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 78 this = expression.this 79 if isinstance(this, exp.Schema): 80 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 81 else: 82 this = self.sql(this) 83 return f"RETURNS {this}" 84 85 86def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 87 returns = expression.find(exp.ReturnsProperty) 88 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 89 expression.set("kind", "TABLE FUNCTION") 90 91 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 92 expression.set("expression", expression.expression.this) 93 94 return self.create_sql(expression) 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 192 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 193 unit = unit_to_var(expression) 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast( 209 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 210 ) 211 return self.func("TIMESTAMP_SECONDS", unix_seconds) 212 213 214def _build_time(args: t.List) -> exp.Func: 215 if len(args) == 1: 216 return exp.TsOrDsToTime(this=args[0]) 217 if len(args) == 2: 218 return exp.Time.from_arg_list(args) 219 return exp.TimeFromParts.from_arg_list(args) 220 221 222def _build_datetime(args: t.List) -> exp.Func: 223 if len(args) == 1: 224 return exp.TsOrDsToDatetime.from_arg_list(args) 225 if len(args) == 2: 226 return exp.Datetime.from_arg_list(args) 227 return exp.TimestampFromParts.from_arg_list(args) 228 229 230def _build_regexp_extract( 231 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 232) -> t.Callable[[t.List], E]: 233 def _builder(args: t.List) -> E: 234 try: 235 group = re.compile(args[1].name).groups == 1 236 except re.error: 237 group = False 238 239 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 240 return expr_type( 241 this=seq_get(args, 0), 242 expression=seq_get(args, 1), 243 position=seq_get(args, 2), 244 occurrence=seq_get(args, 3), 245 group=exp.Literal.number(1) if group else default_group, 246 ) 247 248 return _builder 249 250 251def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 252 def _builder(args: t.List, dialect: Dialect) -> E: 253 if len(args) == 1: 254 # The default value for the JSONPath is '$' i.e all of the data 255 args.append(exp.Literal.string("$")) 256 return parser.build_extract_json_with_path(expr_type)(args, dialect) 257 258 return _builder 259 260 261def _str_to_datetime_sql( 262 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 263) -> str: 264 this = self.sql(expression, "this") 265 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 266 267 if expression.args.get("safe"): 268 fmt = self.format_time( 269 expression, 270 self.dialect.INVERSE_FORMAT_MAPPING, 271 self.dialect.INVERSE_FORMAT_TRIE, 272 ) 273 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 274 275 fmt = self.format_time(expression) 276 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 277 278 279def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 280 """ 281 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 282 +---------+---------+---------+------------+---------+ 283 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 286 +---------+---------+---------+------------+---------+ 287 """ 288 self._annotate_args(expression) 289 290 this: exp.Expression = expression.this 291 292 self._set_type( 293 expression, 294 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 295 ) 296 return expression 297 298 299@unsupported_args("ins_cost", "del_cost", "sub_cost") 300def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 301 max_dist = expression.args.get("max_dist") 302 if max_dist: 303 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 304 305 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 306 307 308def _build_levenshtein(args: t.List) -> exp.Levenshtein: 309 max_dist = seq_get(args, 2) 310 return exp.Levenshtein( 311 this=seq_get(args, 0), 312 expression=seq_get(args, 1), 313 max_dist=max_dist.expression if max_dist else None, 314 ) 315 316 317def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 318 def _builder(args: t.List) -> exp.TimeToStr: 319 return exp.TimeToStr( 320 this=expr_type(this=seq_get(args, 1)), 321 format=seq_get(args, 0), 322 zone=seq_get(args, 2), 323 ) 324 325 return _builder 326 327 328def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 329 if len(args) == 3: 330 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 331 332 # Lowercase the operands in case of transpilation, as exp.Contains 333 # is case-sensitive on other dialects 334 this = exp.Lower(this=seq_get(args, 0)) 335 expr = exp.Lower(this=seq_get(args, 1)) 336 337 return exp.Contains(this=this, expression=expr) 338 339 340def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 341 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 342 upper = name.upper() 343 344 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 345 346 if dquote_escaping: 347 self._quote_json_path_key_using_brackets = False 348 349 sql = rename_func(upper)(self, expression) 350 351 if dquote_escaping: 352 self._quote_json_path_key_using_brackets = True 353 354 return sql 355 356 357def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 358 annotated = self._annotate_by_args(expression, "expressions") 359 360 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 362 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 363 annotated.type = exp.DataType.Type.VARCHAR 364 365 return annotated 366 367 368def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 369 array_args = expression.expressions 370 371 # BigQuery behaves as follows: 372 # 373 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 374 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 375 if ( 376 len(array_args) == 1 377 and isinstance(select := array_args[0].unnest(), exp.Select) 378 and (query_type := select.meta.get("query_type")) is not None 379 and query_type.is_type(exp.DataType.Type.STRUCT) 380 and len(query_type.expressions) == 1 381 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) 382 and (projection_type := col_def.kind) is not None 383 and not projection_type.is_type(exp.DataType.Type.UNKNOWN) 384 ): 385 array_type = exp.DataType( 386 this=exp.DataType.Type.ARRAY, 387 expressions=[projection_type.copy()], 388 nested=True, 389 ) 390 return self._annotate_with_type(expression, array_type) 391 392 return self._annotate_by_args(expression, "expressions", array=True) 393 394 395class BigQuery(Dialect): 396 WEEK_OFFSET = -1 397 UNNEST_COLUMN_ONLY = True 398 SUPPORTS_USER_DEFINED_TYPES = False 399 SUPPORTS_SEMI_ANTI_JOIN = False 400 LOG_BASE_FIRST = False 401 HEX_LOWERCASE = True 402 FORCE_EARLY_ALIAS_REF_EXPANSION = True 403 PRESERVE_ORIGINAL_NAMES = True 404 HEX_STRING_IS_INTEGER_TYPE = True 405 406 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 407 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 408 409 # bigquery udfs are case sensitive 410 NORMALIZE_FUNCTIONS = False 411 412 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 413 TIME_MAPPING = { 414 "%D": "%m/%d/%y", 415 "%E6S": "%S.%f", 416 "%e": "%-d", 417 } 418 419 FORMAT_MAPPING = { 420 "DD": "%d", 421 "MM": "%m", 422 "MON": "%b", 423 "MONTH": "%B", 424 "YYYY": "%Y", 425 "YY": "%y", 426 "HH": "%I", 427 "HH12": "%I", 428 "HH24": "%H", 429 "MI": "%M", 430 "SS": "%S", 431 "SSSSS": "%f", 432 "TZH": "%z", 433 } 434 435 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 436 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 437 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 438 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 439 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 440 441 # All set operations require either a DISTINCT or ALL specifier 442 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 443 444 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 445 TYPE_TO_EXPRESSIONS = { 446 **Dialect.TYPE_TO_EXPRESSIONS, 447 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 448 } 449 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 450 451 ANNOTATORS = { 452 **Dialect.ANNOTATORS, 453 **{ 454 expr_type: annotate_with_type_lambda(data_type) 455 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 456 for expr_type in expressions 457 }, 458 **{ 459 expr_type: lambda self, e: _annotate_math_functions(self, e) 460 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 461 }, 462 **{ 463 expr_type: lambda self, e: self._annotate_by_args(e, "this") 464 for expr_type in ( 465 exp.Left, 466 exp.Right, 467 exp.Lower, 468 exp.Upper, 469 exp.Pad, 470 exp.Trim, 471 exp.RegexpExtract, 472 exp.RegexpReplace, 473 exp.Repeat, 474 exp.Substring, 475 ) 476 }, 477 exp.Array: _annotate_array, 478 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 479 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 485 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 486 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 487 e, exp.DataType.Type.VARCHAR 488 ), 489 exp.Concat: _annotate_concat, 490 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 491 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 492 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 493 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 494 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 495 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 496 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 497 ), 498 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 499 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 500 e, exp.DataType.Type.VARCHAR 501 ), 502 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 503 e, exp.DataType.build("ARRAY<VARCHAR>") 504 ), 505 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 506 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 507 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 508 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 509 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 510 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 511 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 512 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 513 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 514 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 515 e, exp.DataType.Type.DATETIME 516 ), 517 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 518 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 519 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 520 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 521 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 522 } 523 524 def normalize_identifier(self, expression: E) -> E: 525 if ( 526 isinstance(expression, exp.Identifier) 527 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 528 ): 529 parent = expression.parent 530 while isinstance(parent, exp.Dot): 531 parent = parent.parent 532 533 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 534 # by default. The following check uses a heuristic to detect tables based on whether 535 # they are qualified. This should generally be correct, because tables in BigQuery 536 # must be qualified with at least a dataset, unless @@dataset_id is set. 537 case_sensitive = ( 538 isinstance(parent, exp.UserDefinedFunction) 539 or ( 540 isinstance(parent, exp.Table) 541 and parent.db 542 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 543 ) 544 or expression.meta.get("is_table") 545 ) 546 if not case_sensitive: 547 expression.set("this", expression.this.lower()) 548 549 return t.cast(E, expression) 550 551 return super().normalize_identifier(expression) 552 553 class Tokenizer(tokens.Tokenizer): 554 QUOTES = ["'", '"', '"""', "'''"] 555 COMMENTS = ["--", "#", ("/*", "*/")] 556 IDENTIFIERS = ["`"] 557 STRING_ESCAPES = ["\\"] 558 559 HEX_STRINGS = [("0x", ""), ("0X", "")] 560 561 BYTE_STRINGS = [ 562 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 563 ] 564 565 RAW_STRINGS = [ 566 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 567 ] 568 569 NESTED_COMMENTS = False 570 571 KEYWORDS = { 572 **tokens.Tokenizer.KEYWORDS, 573 "ANY TYPE": TokenType.VARIANT, 574 "BEGIN": TokenType.COMMAND, 575 "BEGIN TRANSACTION": TokenType.BEGIN, 576 "BYTEINT": TokenType.INT, 577 "BYTES": TokenType.BINARY, 578 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 579 "DATETIME": TokenType.TIMESTAMP, 580 "DECLARE": TokenType.DECLARE, 581 "ELSEIF": TokenType.COMMAND, 582 "EXCEPTION": TokenType.COMMAND, 583 "EXPORT": TokenType.EXPORT, 584 "FLOAT64": TokenType.DOUBLE, 585 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 586 "MODEL": TokenType.MODEL, 587 "NOT DETERMINISTIC": TokenType.VOLATILE, 588 "RECORD": TokenType.STRUCT, 589 "TIMESTAMP": TokenType.TIMESTAMPTZ, 590 } 591 KEYWORDS.pop("DIV") 592 KEYWORDS.pop("VALUES") 593 KEYWORDS.pop("/*+") 594 595 class Parser(parser.Parser): 596 PREFIXED_PIVOT_COLUMNS = True 597 LOG_DEFAULTS_TO_LN = True 598 SUPPORTS_IMPLICIT_UNNEST = True 599 JOINS_HAVE_EQUAL_PRECEDENCE = True 600 601 # BigQuery does not allow ASC/DESC to be used as an identifier 602 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 603 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 604 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 605 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 606 TokenType.ASC, 607 TokenType.DESC, 608 } 609 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 610 611 FUNCTIONS = { 612 **parser.Parser.FUNCTIONS, 613 "CONTAINS_SUBSTR": _build_contains_substring, 614 "DATE": _build_date, 615 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 616 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 617 "DATE_TRUNC": lambda args: exp.DateTrunc( 618 unit=seq_get(args, 1), 619 this=seq_get(args, 0), 620 zone=seq_get(args, 2), 621 ), 622 "DATETIME": _build_datetime, 623 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 624 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 625 "DIV": binary_from_function(exp.IntDiv), 626 "EDIT_DISTANCE": _build_levenshtein, 627 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 628 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 629 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 630 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 631 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 632 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 633 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 634 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 635 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 636 "MD5": exp.MD5Digest.from_arg_list, 637 "TO_HEX": _build_to_hex, 638 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 639 [seq_get(args, 1), seq_get(args, 0)] 640 ), 641 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 642 [seq_get(args, 1), seq_get(args, 0)] 643 ), 644 "PARSE_TIMESTAMP": _build_parse_timestamp, 645 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 646 [seq_get(args, 1), seq_get(args, 0)] 647 ), 648 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 649 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 650 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 651 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 652 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 653 ), 654 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 655 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 656 "SPLIT": lambda args: exp.Split( 657 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 658 this=seq_get(args, 0), 659 expression=seq_get(args, 1) or exp.Literal.string(","), 660 ), 661 "STRPOS": exp.StrPosition.from_arg_list, 662 "TIME": _build_time, 663 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 664 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 665 "TIMESTAMP": _build_timestamp, 666 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 667 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 668 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 669 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 670 ), 671 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 672 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 673 ), 674 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 675 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 676 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 677 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 678 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 679 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 680 } 681 682 FUNCTION_PARSERS = { 683 **parser.Parser.FUNCTION_PARSERS, 684 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 685 "JSON_ARRAY": lambda self: self.expression( 686 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 687 ), 688 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 689 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 690 } 691 FUNCTION_PARSERS.pop("TRIM") 692 693 NO_PAREN_FUNCTIONS = { 694 **parser.Parser.NO_PAREN_FUNCTIONS, 695 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 696 } 697 698 NESTED_TYPE_TOKENS = { 699 *parser.Parser.NESTED_TYPE_TOKENS, 700 TokenType.TABLE, 701 } 702 703 PROPERTY_PARSERS = { 704 **parser.Parser.PROPERTY_PARSERS, 705 "NOT DETERMINISTIC": lambda self: self.expression( 706 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 707 ), 708 "OPTIONS": lambda self: self._parse_with_property(), 709 } 710 711 CONSTRAINT_PARSERS = { 712 **parser.Parser.CONSTRAINT_PARSERS, 713 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 714 } 715 716 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 717 RANGE_PARSERS.pop(TokenType.OVERLAPS) 718 719 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 720 721 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 722 723 STATEMENT_PARSERS = { 724 **parser.Parser.STATEMENT_PARSERS, 725 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 726 TokenType.END: lambda self: self._parse_as_command(self._prev), 727 TokenType.FOR: lambda self: self._parse_for_in(), 728 TokenType.EXPORT: lambda self: self._parse_export_data(), 729 TokenType.DECLARE: lambda self: self._parse_declare(), 730 } 731 732 BRACKET_OFFSETS = { 733 "OFFSET": (0, False), 734 "ORDINAL": (1, False), 735 "SAFE_OFFSET": (0, True), 736 "SAFE_ORDINAL": (1, True), 737 } 738 739 def _parse_for_in(self) -> exp.ForIn: 740 this = self._parse_range() 741 self._match_text_seq("DO") 742 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 743 744 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 745 this = super()._parse_table_part(schema=schema) or self._parse_number() 746 747 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 748 if isinstance(this, exp.Identifier): 749 table_name = this.name 750 while self._match(TokenType.DASH, advance=False) and self._next: 751 start = self._curr 752 while self._is_connected() and not self._match_set( 753 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 754 ): 755 self._advance() 756 757 if start == self._curr: 758 break 759 760 table_name += self._find_sql(start, self._prev) 761 762 this = exp.Identifier( 763 this=table_name, quoted=this.args.get("quoted") 764 ).update_positions(this) 765 elif isinstance(this, exp.Literal): 766 table_name = this.name 767 768 if self._is_connected() and self._parse_var(any_token=True): 769 table_name += self._prev.text 770 771 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 772 773 return this 774 775 def _parse_table_parts( 776 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 777 ) -> exp.Table: 778 table = super()._parse_table_parts( 779 schema=schema, is_db_reference=is_db_reference, wildcard=True 780 ) 781 782 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 783 if not table.catalog: 784 if table.db: 785 previous_db = table.args["db"] 786 parts = table.db.split(".") 787 if len(parts) == 2 and not table.args["db"].quoted: 788 table.set( 789 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 790 ) 791 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 792 else: 793 previous_this = table.this 794 parts = table.name.split(".") 795 if len(parts) == 2 and not table.this.quoted: 796 table.set( 797 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 798 ) 799 table.set( 800 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 801 ) 802 803 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 804 alias = table.this 805 catalog, db, this, *rest = ( 806 exp.to_identifier(p, quoted=True) 807 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 808 ) 809 810 for part in (catalog, db, this): 811 if part: 812 part.update_positions(table.this) 813 814 if rest and this: 815 this = exp.Dot.build([this, *rest]) # type: ignore 816 817 table = exp.Table( 818 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 819 ) 820 table.meta["quoted_table"] = True 821 else: 822 alias = None 823 824 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 825 # dataset, so if the project identifier is omitted we need to fix the ast so that 826 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 827 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 828 # views, because it would seem like the "catalog" part is set, when it'd actually 829 # be the region/dataset. Merging the two identifiers into a single one is done to 830 # avoid producing a 4-part Table reference, which would cause issues in the schema 831 # module, when there are 3-part table names mixed with information schema views. 832 # 833 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 834 table_parts = table.parts 835 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 836 # We need to alias the table here to avoid breaking existing qualified columns. 837 # This is expected to be safe, because if there's an actual alias coming up in 838 # the token stream, it will overwrite this one. If there isn't one, we are only 839 # exposing the name that can be used to reference the view explicitly (a no-op). 840 exp.alias_( 841 table, 842 t.cast(exp.Identifier, alias or table_parts[-1]), 843 table=True, 844 copy=False, 845 ) 846 847 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 848 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 849 line=table_parts[-2].meta.get("line"), 850 col=table_parts[-1].meta.get("col"), 851 start=table_parts[-2].meta.get("start"), 852 end=table_parts[-1].meta.get("end"), 853 ) 854 table.set("this", new_this) 855 table.set("db", seq_get(table_parts, -3)) 856 table.set("catalog", seq_get(table_parts, -4)) 857 858 return table 859 860 def _parse_column(self) -> t.Optional[exp.Expression]: 861 column = super()._parse_column() 862 if isinstance(column, exp.Column): 863 parts = column.parts 864 if any("." in p.name for p in parts): 865 catalog, db, table, this, *rest = ( 866 exp.to_identifier(p, quoted=True) 867 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 868 ) 869 870 if rest and this: 871 this = exp.Dot.build([this, *rest]) # type: ignore 872 873 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 874 column.meta["quoted_column"] = True 875 876 return column 877 878 @t.overload 879 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 880 881 @t.overload 882 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 883 884 def _parse_json_object(self, agg=False): 885 json_object = super()._parse_json_object() 886 array_kv_pair = seq_get(json_object.expressions, 0) 887 888 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 889 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 890 if ( 891 array_kv_pair 892 and isinstance(array_kv_pair.this, exp.Array) 893 and isinstance(array_kv_pair.expression, exp.Array) 894 ): 895 keys = array_kv_pair.this.expressions 896 values = array_kv_pair.expression.expressions 897 898 json_object.set( 899 "expressions", 900 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 901 ) 902 903 return json_object 904 905 def _parse_bracket( 906 self, this: t.Optional[exp.Expression] = None 907 ) -> t.Optional[exp.Expression]: 908 bracket = super()._parse_bracket(this) 909 910 if this is bracket: 911 return bracket 912 913 if isinstance(bracket, exp.Bracket): 914 for expression in bracket.expressions: 915 name = expression.name.upper() 916 917 if name not in self.BRACKET_OFFSETS: 918 break 919 920 offset, safe = self.BRACKET_OFFSETS[name] 921 bracket.set("offset", offset) 922 bracket.set("safe", safe) 923 expression.replace(expression.expressions[0]) 924 925 return bracket 926 927 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 928 unnest = super()._parse_unnest(with_alias=with_alias) 929 930 if not unnest: 931 return None 932 933 unnest_expr = seq_get(unnest.expressions, 0) 934 if unnest_expr: 935 from sqlglot.optimizer.annotate_types import annotate_types 936 937 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 938 939 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 940 # in contrast to other dialects such as DuckDB which flattens only the array by default 941 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 942 array_elem.is_type(exp.DataType.Type.STRUCT) 943 for array_elem in unnest_expr._type.expressions 944 ): 945 unnest.set("explode_array", True) 946 947 return unnest 948 949 def _parse_make_interval(self) -> exp.MakeInterval: 950 expr = exp.MakeInterval() 951 952 for arg_key in expr.arg_types: 953 value = self._parse_lambda() 954 955 if not value: 956 break 957 958 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 959 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 960 if isinstance(value, exp.Kwarg): 961 arg_key = value.this.name 962 963 expr.set(arg_key, value) 964 965 self._match(TokenType.COMMA) 966 967 return expr 968 969 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 970 expr = self.expression( 971 exp.FeaturesAtTime, 972 this=(self._match(TokenType.TABLE) and self._parse_table()) 973 or self._parse_select(nested=True), 974 ) 975 976 while self._match(TokenType.COMMA): 977 arg = self._parse_lambda() 978 979 # Get the LHS of the Kwarg and set the arg to that value, e.g 980 # "num_rows => 1" sets the expr's `num_rows` arg 981 if arg: 982 expr.set(arg.this.name, arg) 983 984 return expr 985 986 def _parse_export_data(self) -> exp.Export: 987 self._match_text_seq("DATA") 988 989 return self.expression( 990 exp.Export, 991 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 992 options=self._parse_properties(), 993 this=self._match_text_seq("AS") and self._parse_select(), 994 ) 995 996 class Generator(generator.Generator): 997 INTERVAL_ALLOWS_PLURAL_FORM = False 998 JOIN_HINTS = False 999 QUERY_HINTS = False 1000 TABLE_HINTS = False 1001 LIMIT_FETCH = "LIMIT" 1002 RENAME_TABLE_WITH_DB = False 1003 NVL2_SUPPORTED = False 1004 UNNEST_WITH_ORDINALITY = False 1005 COLLATE_IS_FUNC = True 1006 LIMIT_ONLY_LITERALS = True 1007 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1008 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1009 JSON_KEY_VALUE_PAIR_SEP = "," 1010 NULL_ORDERING_SUPPORTED = False 1011 IGNORE_NULLS_IN_FUNC = True 1012 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1013 CAN_IMPLEMENT_ARRAY_ANY = True 1014 SUPPORTS_TO_NUMBER = False 1015 NAMED_PLACEHOLDER_TOKEN = "@" 1016 HEX_FUNC = "TO_HEX" 1017 WITH_PROPERTIES_PREFIX = "OPTIONS" 1018 SUPPORTS_EXPLODING_PROJECTIONS = False 1019 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1020 SUPPORTS_UNIX_SECONDS = True 1021 1022 TS_OR_DS_TYPES = ( 1023 exp.TsOrDsToDatetime, 1024 exp.TsOrDsToTimestamp, 1025 exp.TsOrDsToTime, 1026 exp.TsOrDsToDate, 1027 ) 1028 1029 TRANSFORMS = { 1030 **generator.Generator.TRANSFORMS, 1031 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1032 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1033 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1034 exp.Array: inline_array_unless_query, 1035 exp.ArrayContains: _array_contains_sql, 1036 exp.ArrayFilter: filter_array_using_unnest, 1037 exp.ArrayRemove: filter_array_using_unnest, 1038 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1039 exp.CollateProperty: lambda self, e: ( 1040 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1041 if e.args.get("default") 1042 else f"COLLATE {self.sql(e, 'this')}" 1043 ), 1044 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1045 exp.CountIf: rename_func("COUNTIF"), 1046 exp.Create: _create_sql, 1047 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1048 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1049 exp.DateDiff: lambda self, e: self.func( 1050 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1051 ), 1052 exp.DateFromParts: rename_func("DATE"), 1053 exp.DateStrToDate: datestrtodate_sql, 1054 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1055 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1056 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1057 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1058 exp.FromTimeZone: lambda self, e: self.func( 1059 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1060 ), 1061 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1062 exp.GroupConcat: lambda self, e: groupconcat_sql( 1063 self, e, func_name="STRING_AGG", within_group=False 1064 ), 1065 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1066 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1067 exp.If: if_sql(false_value="NULL"), 1068 exp.ILike: no_ilike_sql, 1069 exp.IntDiv: rename_func("DIV"), 1070 exp.Int64: rename_func("INT64"), 1071 exp.JSONExtract: _json_extract_sql, 1072 exp.JSONExtractArray: _json_extract_sql, 1073 exp.JSONExtractScalar: _json_extract_sql, 1074 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1075 exp.Levenshtein: _levenshtein_sql, 1076 exp.Max: max_or_greatest, 1077 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1078 exp.MD5Digest: rename_func("MD5"), 1079 exp.Min: min_or_least, 1080 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1081 exp.RegexpExtract: lambda self, e: self.func( 1082 "REGEXP_EXTRACT", 1083 e.this, 1084 e.expression, 1085 e.args.get("position"), 1086 e.args.get("occurrence"), 1087 ), 1088 exp.RegexpExtractAll: lambda self, e: self.func( 1089 "REGEXP_EXTRACT_ALL", e.this, e.expression 1090 ), 1091 exp.RegexpReplace: regexp_replace_sql, 1092 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1093 exp.ReturnsProperty: _returnsproperty_sql, 1094 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1095 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1096 exp.ParseDatetime: lambda self, e: self.func( 1097 "PARSE_DATETIME", self.format_time(e), e.this 1098 ), 1099 exp.Select: transforms.preprocess( 1100 [ 1101 transforms.explode_projection_to_unnest(), 1102 transforms.unqualify_unnest, 1103 transforms.eliminate_distinct_on, 1104 _alias_ordered_group, 1105 transforms.eliminate_semi_and_anti_joins, 1106 ] 1107 ), 1108 exp.SHA: rename_func("SHA1"), 1109 exp.SHA2: sha256_sql, 1110 exp.Space: space_sql, 1111 exp.StabilityProperty: lambda self, e: ( 1112 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1113 ), 1114 exp.String: rename_func("STRING"), 1115 exp.StrPosition: lambda self, e: ( 1116 strposition_sql( 1117 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1118 ) 1119 ), 1120 exp.StrToDate: _str_to_datetime_sql, 1121 exp.StrToTime: _str_to_datetime_sql, 1122 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1123 exp.TimeFromParts: rename_func("TIME"), 1124 exp.TimestampFromParts: rename_func("DATETIME"), 1125 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1126 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1127 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1128 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1129 exp.TimeStrToTime: timestrtotime_sql, 1130 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1131 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1132 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1133 exp.TsOrDsToTime: rename_func("TIME"), 1134 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1135 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1136 exp.Unhex: rename_func("FROM_HEX"), 1137 exp.UnixDate: rename_func("UNIX_DATE"), 1138 exp.UnixToTime: _unix_to_time_sql, 1139 exp.Uuid: lambda *_: "GENERATE_UUID()", 1140 exp.Values: _derived_table_values_to_unnest, 1141 exp.VariancePop: rename_func("VAR_POP"), 1142 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1143 } 1144 1145 SUPPORTED_JSON_PATH_PARTS = { 1146 exp.JSONPathKey, 1147 exp.JSONPathRoot, 1148 exp.JSONPathSubscript, 1149 } 1150 1151 TYPE_MAPPING = { 1152 **generator.Generator.TYPE_MAPPING, 1153 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1154 exp.DataType.Type.BIGINT: "INT64", 1155 exp.DataType.Type.BINARY: "BYTES", 1156 exp.DataType.Type.BLOB: "BYTES", 1157 exp.DataType.Type.BOOLEAN: "BOOL", 1158 exp.DataType.Type.CHAR: "STRING", 1159 exp.DataType.Type.DECIMAL: "NUMERIC", 1160 exp.DataType.Type.DOUBLE: "FLOAT64", 1161 exp.DataType.Type.FLOAT: "FLOAT64", 1162 exp.DataType.Type.INT: "INT64", 1163 exp.DataType.Type.NCHAR: "STRING", 1164 exp.DataType.Type.NVARCHAR: "STRING", 1165 exp.DataType.Type.SMALLINT: "INT64", 1166 exp.DataType.Type.TEXT: "STRING", 1167 exp.DataType.Type.TIMESTAMP: "DATETIME", 1168 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1169 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1170 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1171 exp.DataType.Type.TINYINT: "INT64", 1172 exp.DataType.Type.ROWVERSION: "BYTES", 1173 exp.DataType.Type.UUID: "STRING", 1174 exp.DataType.Type.VARBINARY: "BYTES", 1175 exp.DataType.Type.VARCHAR: "STRING", 1176 exp.DataType.Type.VARIANT: "ANY TYPE", 1177 } 1178 1179 PROPERTIES_LOCATION = { 1180 **generator.Generator.PROPERTIES_LOCATION, 1181 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1182 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1183 } 1184 1185 # WINDOW comes after QUALIFY 1186 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1187 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1188 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1189 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1190 } 1191 1192 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1193 RESERVED_KEYWORDS = { 1194 "all", 1195 "and", 1196 "any", 1197 "array", 1198 "as", 1199 "asc", 1200 "assert_rows_modified", 1201 "at", 1202 "between", 1203 "by", 1204 "case", 1205 "cast", 1206 "collate", 1207 "contains", 1208 "create", 1209 "cross", 1210 "cube", 1211 "current", 1212 "default", 1213 "define", 1214 "desc", 1215 "distinct", 1216 "else", 1217 "end", 1218 "enum", 1219 "escape", 1220 "except", 1221 "exclude", 1222 "exists", 1223 "extract", 1224 "false", 1225 "fetch", 1226 "following", 1227 "for", 1228 "from", 1229 "full", 1230 "group", 1231 "grouping", 1232 "groups", 1233 "hash", 1234 "having", 1235 "if", 1236 "ignore", 1237 "in", 1238 "inner", 1239 "intersect", 1240 "interval", 1241 "into", 1242 "is", 1243 "join", 1244 "lateral", 1245 "left", 1246 "like", 1247 "limit", 1248 "lookup", 1249 "merge", 1250 "natural", 1251 "new", 1252 "no", 1253 "not", 1254 "null", 1255 "nulls", 1256 "of", 1257 "on", 1258 "or", 1259 "order", 1260 "outer", 1261 "over", 1262 "partition", 1263 "preceding", 1264 "proto", 1265 "qualify", 1266 "range", 1267 "recursive", 1268 "respect", 1269 "right", 1270 "rollup", 1271 "rows", 1272 "select", 1273 "set", 1274 "some", 1275 "struct", 1276 "tablesample", 1277 "then", 1278 "to", 1279 "treat", 1280 "true", 1281 "unbounded", 1282 "union", 1283 "unnest", 1284 "using", 1285 "when", 1286 "where", 1287 "window", 1288 "with", 1289 "within", 1290 } 1291 1292 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1293 unit = expression.unit 1294 unit_sql = unit.name if unit.is_string else self.sql(unit) 1295 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1296 1297 def mod_sql(self, expression: exp.Mod) -> str: 1298 this = expression.this 1299 expr = expression.expression 1300 return self.func( 1301 "MOD", 1302 this.unnest() if isinstance(this, exp.Paren) else this, 1303 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1304 ) 1305 1306 def column_parts(self, expression: exp.Column) -> str: 1307 if expression.meta.get("quoted_column"): 1308 # If a column reference is of the form `dataset.table`.name, we need 1309 # to preserve the quoted table path, otherwise the reference breaks 1310 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1311 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1312 return f"{table_path}.{self.sql(expression, 'this')}" 1313 1314 return super().column_parts(expression) 1315 1316 def table_parts(self, expression: exp.Table) -> str: 1317 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1318 # we need to make sure the correct quoting is used in each case. 1319 # 1320 # For example, if there is a CTE x that clashes with a schema name, then the former will 1321 # return the table y in that schema, whereas the latter will return the CTE's y column: 1322 # 1323 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1324 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1325 if expression.meta.get("quoted_table"): 1326 table_parts = ".".join(p.name for p in expression.parts) 1327 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1328 1329 return super().table_parts(expression) 1330 1331 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1332 this = expression.this 1333 if isinstance(this, exp.TsOrDsToDatetime): 1334 func_name = "FORMAT_DATETIME" 1335 elif isinstance(this, exp.TsOrDsToTimestamp): 1336 func_name = "FORMAT_TIMESTAMP" 1337 elif isinstance(this, exp.TsOrDsToTime): 1338 func_name = "FORMAT_TIME" 1339 else: 1340 func_name = "FORMAT_DATE" 1341 1342 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1343 return self.func( 1344 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1345 ) 1346 1347 def eq_sql(self, expression: exp.EQ) -> str: 1348 # Operands of = cannot be NULL in BigQuery 1349 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1350 if not isinstance(expression.parent, exp.Update): 1351 return "NULL" 1352 1353 return self.binary(expression, "=") 1354 1355 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1356 parent = expression.parent 1357 1358 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1359 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1360 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1361 return self.func( 1362 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1363 ) 1364 1365 return super().attimezone_sql(expression) 1366 1367 def trycast_sql(self, expression: exp.TryCast) -> str: 1368 return self.cast_sql(expression, safe_prefix="SAFE_") 1369 1370 def bracket_sql(self, expression: exp.Bracket) -> str: 1371 this = expression.this 1372 expressions = expression.expressions 1373 1374 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1375 arg = expressions[0] 1376 if arg.type is None: 1377 from sqlglot.optimizer.annotate_types import annotate_types 1378 1379 arg = annotate_types(arg, dialect=self.dialect) 1380 1381 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1382 # BQ doesn't support bracket syntax with string values for structs 1383 return f"{self.sql(this)}.{arg.name}" 1384 1385 expressions_sql = self.expressions(expression, flat=True) 1386 offset = expression.args.get("offset") 1387 1388 if offset == 0: 1389 expressions_sql = f"OFFSET({expressions_sql})" 1390 elif offset == 1: 1391 expressions_sql = f"ORDINAL({expressions_sql})" 1392 elif offset is not None: 1393 self.unsupported(f"Unsupported array offset: {offset}") 1394 1395 if expression.args.get("safe"): 1396 expressions_sql = f"SAFE_{expressions_sql}" 1397 1398 return f"{self.sql(this)}[{expressions_sql}]" 1399 1400 def in_unnest_op(self, expression: exp.Unnest) -> str: 1401 return self.sql(expression) 1402 1403 def version_sql(self, expression: exp.Version) -> str: 1404 if expression.name == "TIMESTAMP": 1405 expression.set("this", "SYSTEM_TIME") 1406 return super().version_sql(expression) 1407 1408 def contains_sql(self, expression: exp.Contains) -> str: 1409 this = expression.this 1410 expr = expression.expression 1411 1412 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1413 this = this.this 1414 expr = expr.this 1415 1416 return self.func("CONTAINS_SUBSTR", this, expr) 1417 1418 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1419 this = expression.this 1420 1421 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1422 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1423 # because they aren't literals and so the above syntax is invalid BigQuery. 1424 if isinstance(this, exp.Array): 1425 elem = seq_get(this.expressions, 0) 1426 if not (elem and elem.find(exp.Query)): 1427 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1428 1429 return super().cast_sql(expression, safe_prefix=safe_prefix) 1430 1431 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1432 variables = self.expressions(expression, "this") 1433 default = self.sql(expression, "default") 1434 default = f" DEFAULT {default}" if default else "" 1435 kind = self.sql(expression, "kind") 1436 kind = f" {kind}" if kind else "" 1437 1438 return f"{variables}{kind}{default}"
396class BigQuery(Dialect): 397 WEEK_OFFSET = -1 398 UNNEST_COLUMN_ONLY = True 399 SUPPORTS_USER_DEFINED_TYPES = False 400 SUPPORTS_SEMI_ANTI_JOIN = False 401 LOG_BASE_FIRST = False 402 HEX_LOWERCASE = True 403 FORCE_EARLY_ALIAS_REF_EXPANSION = True 404 PRESERVE_ORIGINAL_NAMES = True 405 HEX_STRING_IS_INTEGER_TYPE = True 406 407 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 408 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 409 410 # bigquery udfs are case sensitive 411 NORMALIZE_FUNCTIONS = False 412 413 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 414 TIME_MAPPING = { 415 "%D": "%m/%d/%y", 416 "%E6S": "%S.%f", 417 "%e": "%-d", 418 } 419 420 FORMAT_MAPPING = { 421 "DD": "%d", 422 "MM": "%m", 423 "MON": "%b", 424 "MONTH": "%B", 425 "YYYY": "%Y", 426 "YY": "%y", 427 "HH": "%I", 428 "HH12": "%I", 429 "HH24": "%H", 430 "MI": "%M", 431 "SS": "%S", 432 "SSSSS": "%f", 433 "TZH": "%z", 434 } 435 436 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 437 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 438 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 439 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 440 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 441 442 # All set operations require either a DISTINCT or ALL specifier 443 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 444 445 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 446 TYPE_TO_EXPRESSIONS = { 447 **Dialect.TYPE_TO_EXPRESSIONS, 448 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 449 } 450 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 451 452 ANNOTATORS = { 453 **Dialect.ANNOTATORS, 454 **{ 455 expr_type: annotate_with_type_lambda(data_type) 456 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 457 for expr_type in expressions 458 }, 459 **{ 460 expr_type: lambda self, e: _annotate_math_functions(self, e) 461 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 462 }, 463 **{ 464 expr_type: lambda self, e: self._annotate_by_args(e, "this") 465 for expr_type in ( 466 exp.Left, 467 exp.Right, 468 exp.Lower, 469 exp.Upper, 470 exp.Pad, 471 exp.Trim, 472 exp.RegexpExtract, 473 exp.RegexpReplace, 474 exp.Repeat, 475 exp.Substring, 476 ) 477 }, 478 exp.Array: _annotate_array, 479 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 480 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 485 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 486 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 487 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 488 e, exp.DataType.Type.VARCHAR 489 ), 490 exp.Concat: _annotate_concat, 491 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 492 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 493 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 494 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 495 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 496 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 497 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 498 ), 499 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 500 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 501 e, exp.DataType.Type.VARCHAR 502 ), 503 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 504 e, exp.DataType.build("ARRAY<VARCHAR>") 505 ), 506 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 507 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 508 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 509 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 510 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 511 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 512 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 513 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 514 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 515 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 516 e, exp.DataType.Type.DATETIME 517 ), 518 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 519 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 520 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 521 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 522 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 523 } 524 525 def normalize_identifier(self, expression: E) -> E: 526 if ( 527 isinstance(expression, exp.Identifier) 528 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 529 ): 530 parent = expression.parent 531 while isinstance(parent, exp.Dot): 532 parent = parent.parent 533 534 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 535 # by default. The following check uses a heuristic to detect tables based on whether 536 # they are qualified. This should generally be correct, because tables in BigQuery 537 # must be qualified with at least a dataset, unless @@dataset_id is set. 538 case_sensitive = ( 539 isinstance(parent, exp.UserDefinedFunction) 540 or ( 541 isinstance(parent, exp.Table) 542 and parent.db 543 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 544 ) 545 or expression.meta.get("is_table") 546 ) 547 if not case_sensitive: 548 expression.set("this", expression.this.lower()) 549 550 return t.cast(E, expression) 551 552 return super().normalize_identifier(expression) 553 554 class Tokenizer(tokens.Tokenizer): 555 QUOTES = ["'", '"', '"""', "'''"] 556 COMMENTS = ["--", "#", ("/*", "*/")] 557 IDENTIFIERS = ["`"] 558 STRING_ESCAPES = ["\\"] 559 560 HEX_STRINGS = [("0x", ""), ("0X", "")] 561 562 BYTE_STRINGS = [ 563 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 564 ] 565 566 RAW_STRINGS = [ 567 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 568 ] 569 570 NESTED_COMMENTS = False 571 572 KEYWORDS = { 573 **tokens.Tokenizer.KEYWORDS, 574 "ANY TYPE": TokenType.VARIANT, 575 "BEGIN": TokenType.COMMAND, 576 "BEGIN TRANSACTION": TokenType.BEGIN, 577 "BYTEINT": TokenType.INT, 578 "BYTES": TokenType.BINARY, 579 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 580 "DATETIME": TokenType.TIMESTAMP, 581 "DECLARE": TokenType.DECLARE, 582 "ELSEIF": TokenType.COMMAND, 583 "EXCEPTION": TokenType.COMMAND, 584 "EXPORT": TokenType.EXPORT, 585 "FLOAT64": TokenType.DOUBLE, 586 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 587 "MODEL": TokenType.MODEL, 588 "NOT DETERMINISTIC": TokenType.VOLATILE, 589 "RECORD": TokenType.STRUCT, 590 "TIMESTAMP": TokenType.TIMESTAMPTZ, 591 } 592 KEYWORDS.pop("DIV") 593 KEYWORDS.pop("VALUES") 594 KEYWORDS.pop("/*+") 595 596 class Parser(parser.Parser): 597 PREFIXED_PIVOT_COLUMNS = True 598 LOG_DEFAULTS_TO_LN = True 599 SUPPORTS_IMPLICIT_UNNEST = True 600 JOINS_HAVE_EQUAL_PRECEDENCE = True 601 602 # BigQuery does not allow ASC/DESC to be used as an identifier 603 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 604 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 605 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 606 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 607 TokenType.ASC, 608 TokenType.DESC, 609 } 610 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 611 612 FUNCTIONS = { 613 **parser.Parser.FUNCTIONS, 614 "CONTAINS_SUBSTR": _build_contains_substring, 615 "DATE": _build_date, 616 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 617 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 618 "DATE_TRUNC": lambda args: exp.DateTrunc( 619 unit=seq_get(args, 1), 620 this=seq_get(args, 0), 621 zone=seq_get(args, 2), 622 ), 623 "DATETIME": _build_datetime, 624 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 625 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 626 "DIV": binary_from_function(exp.IntDiv), 627 "EDIT_DISTANCE": _build_levenshtein, 628 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 629 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 630 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 631 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 632 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 633 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 634 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 635 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 636 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 637 "MD5": exp.MD5Digest.from_arg_list, 638 "TO_HEX": _build_to_hex, 639 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 640 [seq_get(args, 1), seq_get(args, 0)] 641 ), 642 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 643 [seq_get(args, 1), seq_get(args, 0)] 644 ), 645 "PARSE_TIMESTAMP": _build_parse_timestamp, 646 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 647 [seq_get(args, 1), seq_get(args, 0)] 648 ), 649 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 650 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 651 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 652 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 653 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 654 ), 655 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 656 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 657 "SPLIT": lambda args: exp.Split( 658 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 659 this=seq_get(args, 0), 660 expression=seq_get(args, 1) or exp.Literal.string(","), 661 ), 662 "STRPOS": exp.StrPosition.from_arg_list, 663 "TIME": _build_time, 664 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 665 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 666 "TIMESTAMP": _build_timestamp, 667 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 668 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 669 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 670 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 671 ), 672 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 673 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 674 ), 675 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 676 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 677 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 678 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 679 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 680 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 681 } 682 683 FUNCTION_PARSERS = { 684 **parser.Parser.FUNCTION_PARSERS, 685 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 686 "JSON_ARRAY": lambda self: self.expression( 687 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 688 ), 689 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 690 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 691 } 692 FUNCTION_PARSERS.pop("TRIM") 693 694 NO_PAREN_FUNCTIONS = { 695 **parser.Parser.NO_PAREN_FUNCTIONS, 696 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 697 } 698 699 NESTED_TYPE_TOKENS = { 700 *parser.Parser.NESTED_TYPE_TOKENS, 701 TokenType.TABLE, 702 } 703 704 PROPERTY_PARSERS = { 705 **parser.Parser.PROPERTY_PARSERS, 706 "NOT DETERMINISTIC": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 708 ), 709 "OPTIONS": lambda self: self._parse_with_property(), 710 } 711 712 CONSTRAINT_PARSERS = { 713 **parser.Parser.CONSTRAINT_PARSERS, 714 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 715 } 716 717 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 718 RANGE_PARSERS.pop(TokenType.OVERLAPS) 719 720 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 721 722 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 723 724 STATEMENT_PARSERS = { 725 **parser.Parser.STATEMENT_PARSERS, 726 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 727 TokenType.END: lambda self: self._parse_as_command(self._prev), 728 TokenType.FOR: lambda self: self._parse_for_in(), 729 TokenType.EXPORT: lambda self: self._parse_export_data(), 730 TokenType.DECLARE: lambda self: self._parse_declare(), 731 } 732 733 BRACKET_OFFSETS = { 734 "OFFSET": (0, False), 735 "ORDINAL": (1, False), 736 "SAFE_OFFSET": (0, True), 737 "SAFE_ORDINAL": (1, True), 738 } 739 740 def _parse_for_in(self) -> exp.ForIn: 741 this = self._parse_range() 742 self._match_text_seq("DO") 743 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 744 745 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 746 this = super()._parse_table_part(schema=schema) or self._parse_number() 747 748 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 749 if isinstance(this, exp.Identifier): 750 table_name = this.name 751 while self._match(TokenType.DASH, advance=False) and self._next: 752 start = self._curr 753 while self._is_connected() and not self._match_set( 754 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 755 ): 756 self._advance() 757 758 if start == self._curr: 759 break 760 761 table_name += self._find_sql(start, self._prev) 762 763 this = exp.Identifier( 764 this=table_name, quoted=this.args.get("quoted") 765 ).update_positions(this) 766 elif isinstance(this, exp.Literal): 767 table_name = this.name 768 769 if self._is_connected() and self._parse_var(any_token=True): 770 table_name += self._prev.text 771 772 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 773 774 return this 775 776 def _parse_table_parts( 777 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 778 ) -> exp.Table: 779 table = super()._parse_table_parts( 780 schema=schema, is_db_reference=is_db_reference, wildcard=True 781 ) 782 783 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 784 if not table.catalog: 785 if table.db: 786 previous_db = table.args["db"] 787 parts = table.db.split(".") 788 if len(parts) == 2 and not table.args["db"].quoted: 789 table.set( 790 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 791 ) 792 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 793 else: 794 previous_this = table.this 795 parts = table.name.split(".") 796 if len(parts) == 2 and not table.this.quoted: 797 table.set( 798 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 799 ) 800 table.set( 801 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 802 ) 803 804 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 805 alias = table.this 806 catalog, db, this, *rest = ( 807 exp.to_identifier(p, quoted=True) 808 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 809 ) 810 811 for part in (catalog, db, this): 812 if part: 813 part.update_positions(table.this) 814 815 if rest and this: 816 this = exp.Dot.build([this, *rest]) # type: ignore 817 818 table = exp.Table( 819 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 820 ) 821 table.meta["quoted_table"] = True 822 else: 823 alias = None 824 825 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 826 # dataset, so if the project identifier is omitted we need to fix the ast so that 827 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 828 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 829 # views, because it would seem like the "catalog" part is set, when it'd actually 830 # be the region/dataset. Merging the two identifiers into a single one is done to 831 # avoid producing a 4-part Table reference, which would cause issues in the schema 832 # module, when there are 3-part table names mixed with information schema views. 833 # 834 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 835 table_parts = table.parts 836 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 837 # We need to alias the table here to avoid breaking existing qualified columns. 838 # This is expected to be safe, because if there's an actual alias coming up in 839 # the token stream, it will overwrite this one. If there isn't one, we are only 840 # exposing the name that can be used to reference the view explicitly (a no-op). 841 exp.alias_( 842 table, 843 t.cast(exp.Identifier, alias or table_parts[-1]), 844 table=True, 845 copy=False, 846 ) 847 848 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 849 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 850 line=table_parts[-2].meta.get("line"), 851 col=table_parts[-1].meta.get("col"), 852 start=table_parts[-2].meta.get("start"), 853 end=table_parts[-1].meta.get("end"), 854 ) 855 table.set("this", new_this) 856 table.set("db", seq_get(table_parts, -3)) 857 table.set("catalog", seq_get(table_parts, -4)) 858 859 return table 860 861 def _parse_column(self) -> t.Optional[exp.Expression]: 862 column = super()._parse_column() 863 if isinstance(column, exp.Column): 864 parts = column.parts 865 if any("." in p.name for p in parts): 866 catalog, db, table, this, *rest = ( 867 exp.to_identifier(p, quoted=True) 868 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 869 ) 870 871 if rest and this: 872 this = exp.Dot.build([this, *rest]) # type: ignore 873 874 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 875 column.meta["quoted_column"] = True 876 877 return column 878 879 @t.overload 880 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 881 882 @t.overload 883 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 884 885 def _parse_json_object(self, agg=False): 886 json_object = super()._parse_json_object() 887 array_kv_pair = seq_get(json_object.expressions, 0) 888 889 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 890 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 891 if ( 892 array_kv_pair 893 and isinstance(array_kv_pair.this, exp.Array) 894 and isinstance(array_kv_pair.expression, exp.Array) 895 ): 896 keys = array_kv_pair.this.expressions 897 values = array_kv_pair.expression.expressions 898 899 json_object.set( 900 "expressions", 901 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 902 ) 903 904 return json_object 905 906 def _parse_bracket( 907 self, this: t.Optional[exp.Expression] = None 908 ) -> t.Optional[exp.Expression]: 909 bracket = super()._parse_bracket(this) 910 911 if this is bracket: 912 return bracket 913 914 if isinstance(bracket, exp.Bracket): 915 for expression in bracket.expressions: 916 name = expression.name.upper() 917 918 if name not in self.BRACKET_OFFSETS: 919 break 920 921 offset, safe = self.BRACKET_OFFSETS[name] 922 bracket.set("offset", offset) 923 bracket.set("safe", safe) 924 expression.replace(expression.expressions[0]) 925 926 return bracket 927 928 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 929 unnest = super()._parse_unnest(with_alias=with_alias) 930 931 if not unnest: 932 return None 933 934 unnest_expr = seq_get(unnest.expressions, 0) 935 if unnest_expr: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 939 940 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 941 # in contrast to other dialects such as DuckDB which flattens only the array by default 942 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 943 array_elem.is_type(exp.DataType.Type.STRUCT) 944 for array_elem in unnest_expr._type.expressions 945 ): 946 unnest.set("explode_array", True) 947 948 return unnest 949 950 def _parse_make_interval(self) -> exp.MakeInterval: 951 expr = exp.MakeInterval() 952 953 for arg_key in expr.arg_types: 954 value = self._parse_lambda() 955 956 if not value: 957 break 958 959 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 960 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 961 if isinstance(value, exp.Kwarg): 962 arg_key = value.this.name 963 964 expr.set(arg_key, value) 965 966 self._match(TokenType.COMMA) 967 968 return expr 969 970 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 971 expr = self.expression( 972 exp.FeaturesAtTime, 973 this=(self._match(TokenType.TABLE) and self._parse_table()) 974 or self._parse_select(nested=True), 975 ) 976 977 while self._match(TokenType.COMMA): 978 arg = self._parse_lambda() 979 980 # Get the LHS of the Kwarg and set the arg to that value, e.g 981 # "num_rows => 1" sets the expr's `num_rows` arg 982 if arg: 983 expr.set(arg.this.name, arg) 984 985 return expr 986 987 def _parse_export_data(self) -> exp.Export: 988 self._match_text_seq("DATA") 989 990 return self.expression( 991 exp.Export, 992 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 993 options=self._parse_properties(), 994 this=self._match_text_seq("AS") and self._parse_select(), 995 ) 996 997 class Generator(generator.Generator): 998 INTERVAL_ALLOWS_PLURAL_FORM = False 999 JOIN_HINTS = False 1000 QUERY_HINTS = False 1001 TABLE_HINTS = False 1002 LIMIT_FETCH = "LIMIT" 1003 RENAME_TABLE_WITH_DB = False 1004 NVL2_SUPPORTED = False 1005 UNNEST_WITH_ORDINALITY = False 1006 COLLATE_IS_FUNC = True 1007 LIMIT_ONLY_LITERALS = True 1008 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1009 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1010 JSON_KEY_VALUE_PAIR_SEP = "," 1011 NULL_ORDERING_SUPPORTED = False 1012 IGNORE_NULLS_IN_FUNC = True 1013 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1014 CAN_IMPLEMENT_ARRAY_ANY = True 1015 SUPPORTS_TO_NUMBER = False 1016 NAMED_PLACEHOLDER_TOKEN = "@" 1017 HEX_FUNC = "TO_HEX" 1018 WITH_PROPERTIES_PREFIX = "OPTIONS" 1019 SUPPORTS_EXPLODING_PROJECTIONS = False 1020 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1021 SUPPORTS_UNIX_SECONDS = True 1022 1023 TS_OR_DS_TYPES = ( 1024 exp.TsOrDsToDatetime, 1025 exp.TsOrDsToTimestamp, 1026 exp.TsOrDsToTime, 1027 exp.TsOrDsToDate, 1028 ) 1029 1030 TRANSFORMS = { 1031 **generator.Generator.TRANSFORMS, 1032 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1033 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1034 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1035 exp.Array: inline_array_unless_query, 1036 exp.ArrayContains: _array_contains_sql, 1037 exp.ArrayFilter: filter_array_using_unnest, 1038 exp.ArrayRemove: filter_array_using_unnest, 1039 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1040 exp.CollateProperty: lambda self, e: ( 1041 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1042 if e.args.get("default") 1043 else f"COLLATE {self.sql(e, 'this')}" 1044 ), 1045 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1046 exp.CountIf: rename_func("COUNTIF"), 1047 exp.Create: _create_sql, 1048 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1049 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1050 exp.DateDiff: lambda self, e: self.func( 1051 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1052 ), 1053 exp.DateFromParts: rename_func("DATE"), 1054 exp.DateStrToDate: datestrtodate_sql, 1055 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1056 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1057 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1058 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1059 exp.FromTimeZone: lambda self, e: self.func( 1060 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1061 ), 1062 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1063 exp.GroupConcat: lambda self, e: groupconcat_sql( 1064 self, e, func_name="STRING_AGG", within_group=False 1065 ), 1066 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1067 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1068 exp.If: if_sql(false_value="NULL"), 1069 exp.ILike: no_ilike_sql, 1070 exp.IntDiv: rename_func("DIV"), 1071 exp.Int64: rename_func("INT64"), 1072 exp.JSONExtract: _json_extract_sql, 1073 exp.JSONExtractArray: _json_extract_sql, 1074 exp.JSONExtractScalar: _json_extract_sql, 1075 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1076 exp.Levenshtein: _levenshtein_sql, 1077 exp.Max: max_or_greatest, 1078 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1079 exp.MD5Digest: rename_func("MD5"), 1080 exp.Min: min_or_least, 1081 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1082 exp.RegexpExtract: lambda self, e: self.func( 1083 "REGEXP_EXTRACT", 1084 e.this, 1085 e.expression, 1086 e.args.get("position"), 1087 e.args.get("occurrence"), 1088 ), 1089 exp.RegexpExtractAll: lambda self, e: self.func( 1090 "REGEXP_EXTRACT_ALL", e.this, e.expression 1091 ), 1092 exp.RegexpReplace: regexp_replace_sql, 1093 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1094 exp.ReturnsProperty: _returnsproperty_sql, 1095 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1096 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1097 exp.ParseDatetime: lambda self, e: self.func( 1098 "PARSE_DATETIME", self.format_time(e), e.this 1099 ), 1100 exp.Select: transforms.preprocess( 1101 [ 1102 transforms.explode_projection_to_unnest(), 1103 transforms.unqualify_unnest, 1104 transforms.eliminate_distinct_on, 1105 _alias_ordered_group, 1106 transforms.eliminate_semi_and_anti_joins, 1107 ] 1108 ), 1109 exp.SHA: rename_func("SHA1"), 1110 exp.SHA2: sha256_sql, 1111 exp.Space: space_sql, 1112 exp.StabilityProperty: lambda self, e: ( 1113 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1114 ), 1115 exp.String: rename_func("STRING"), 1116 exp.StrPosition: lambda self, e: ( 1117 strposition_sql( 1118 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1119 ) 1120 ), 1121 exp.StrToDate: _str_to_datetime_sql, 1122 exp.StrToTime: _str_to_datetime_sql, 1123 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1124 exp.TimeFromParts: rename_func("TIME"), 1125 exp.TimestampFromParts: rename_func("DATETIME"), 1126 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1127 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1128 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1129 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1130 exp.TimeStrToTime: timestrtotime_sql, 1131 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1132 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1133 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1134 exp.TsOrDsToTime: rename_func("TIME"), 1135 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1136 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1137 exp.Unhex: rename_func("FROM_HEX"), 1138 exp.UnixDate: rename_func("UNIX_DATE"), 1139 exp.UnixToTime: _unix_to_time_sql, 1140 exp.Uuid: lambda *_: "GENERATE_UUID()", 1141 exp.Values: _derived_table_values_to_unnest, 1142 exp.VariancePop: rename_func("VAR_POP"), 1143 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1144 } 1145 1146 SUPPORTED_JSON_PATH_PARTS = { 1147 exp.JSONPathKey, 1148 exp.JSONPathRoot, 1149 exp.JSONPathSubscript, 1150 } 1151 1152 TYPE_MAPPING = { 1153 **generator.Generator.TYPE_MAPPING, 1154 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1155 exp.DataType.Type.BIGINT: "INT64", 1156 exp.DataType.Type.BINARY: "BYTES", 1157 exp.DataType.Type.BLOB: "BYTES", 1158 exp.DataType.Type.BOOLEAN: "BOOL", 1159 exp.DataType.Type.CHAR: "STRING", 1160 exp.DataType.Type.DECIMAL: "NUMERIC", 1161 exp.DataType.Type.DOUBLE: "FLOAT64", 1162 exp.DataType.Type.FLOAT: "FLOAT64", 1163 exp.DataType.Type.INT: "INT64", 1164 exp.DataType.Type.NCHAR: "STRING", 1165 exp.DataType.Type.NVARCHAR: "STRING", 1166 exp.DataType.Type.SMALLINT: "INT64", 1167 exp.DataType.Type.TEXT: "STRING", 1168 exp.DataType.Type.TIMESTAMP: "DATETIME", 1169 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1170 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1171 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1172 exp.DataType.Type.TINYINT: "INT64", 1173 exp.DataType.Type.ROWVERSION: "BYTES", 1174 exp.DataType.Type.UUID: "STRING", 1175 exp.DataType.Type.VARBINARY: "BYTES", 1176 exp.DataType.Type.VARCHAR: "STRING", 1177 exp.DataType.Type.VARIANT: "ANY TYPE", 1178 } 1179 1180 PROPERTIES_LOCATION = { 1181 **generator.Generator.PROPERTIES_LOCATION, 1182 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1183 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1184 } 1185 1186 # WINDOW comes after QUALIFY 1187 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1188 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1189 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1190 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1191 } 1192 1193 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1194 RESERVED_KEYWORDS = { 1195 "all", 1196 "and", 1197 "any", 1198 "array", 1199 "as", 1200 "asc", 1201 "assert_rows_modified", 1202 "at", 1203 "between", 1204 "by", 1205 "case", 1206 "cast", 1207 "collate", 1208 "contains", 1209 "create", 1210 "cross", 1211 "cube", 1212 "current", 1213 "default", 1214 "define", 1215 "desc", 1216 "distinct", 1217 "else", 1218 "end", 1219 "enum", 1220 "escape", 1221 "except", 1222 "exclude", 1223 "exists", 1224 "extract", 1225 "false", 1226 "fetch", 1227 "following", 1228 "for", 1229 "from", 1230 "full", 1231 "group", 1232 "grouping", 1233 "groups", 1234 "hash", 1235 "having", 1236 "if", 1237 "ignore", 1238 "in", 1239 "inner", 1240 "intersect", 1241 "interval", 1242 "into", 1243 "is", 1244 "join", 1245 "lateral", 1246 "left", 1247 "like", 1248 "limit", 1249 "lookup", 1250 "merge", 1251 "natural", 1252 "new", 1253 "no", 1254 "not", 1255 "null", 1256 "nulls", 1257 "of", 1258 "on", 1259 "or", 1260 "order", 1261 "outer", 1262 "over", 1263 "partition", 1264 "preceding", 1265 "proto", 1266 "qualify", 1267 "range", 1268 "recursive", 1269 "respect", 1270 "right", 1271 "rollup", 1272 "rows", 1273 "select", 1274 "set", 1275 "some", 1276 "struct", 1277 "tablesample", 1278 "then", 1279 "to", 1280 "treat", 1281 "true", 1282 "unbounded", 1283 "union", 1284 "unnest", 1285 "using", 1286 "when", 1287 "where", 1288 "window", 1289 "with", 1290 "within", 1291 } 1292 1293 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1294 unit = expression.unit 1295 unit_sql = unit.name if unit.is_string else self.sql(unit) 1296 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1297 1298 def mod_sql(self, expression: exp.Mod) -> str: 1299 this = expression.this 1300 expr = expression.expression 1301 return self.func( 1302 "MOD", 1303 this.unnest() if isinstance(this, exp.Paren) else this, 1304 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1305 ) 1306 1307 def column_parts(self, expression: exp.Column) -> str: 1308 if expression.meta.get("quoted_column"): 1309 # If a column reference is of the form `dataset.table`.name, we need 1310 # to preserve the quoted table path, otherwise the reference breaks 1311 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1312 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1313 return f"{table_path}.{self.sql(expression, 'this')}" 1314 1315 return super().column_parts(expression) 1316 1317 def table_parts(self, expression: exp.Table) -> str: 1318 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1319 # we need to make sure the correct quoting is used in each case. 1320 # 1321 # For example, if there is a CTE x that clashes with a schema name, then the former will 1322 # return the table y in that schema, whereas the latter will return the CTE's y column: 1323 # 1324 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1325 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1326 if expression.meta.get("quoted_table"): 1327 table_parts = ".".join(p.name for p in expression.parts) 1328 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1329 1330 return super().table_parts(expression) 1331 1332 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1333 this = expression.this 1334 if isinstance(this, exp.TsOrDsToDatetime): 1335 func_name = "FORMAT_DATETIME" 1336 elif isinstance(this, exp.TsOrDsToTimestamp): 1337 func_name = "FORMAT_TIMESTAMP" 1338 elif isinstance(this, exp.TsOrDsToTime): 1339 func_name = "FORMAT_TIME" 1340 else: 1341 func_name = "FORMAT_DATE" 1342 1343 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1344 return self.func( 1345 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1346 ) 1347 1348 def eq_sql(self, expression: exp.EQ) -> str: 1349 # Operands of = cannot be NULL in BigQuery 1350 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1351 if not isinstance(expression.parent, exp.Update): 1352 return "NULL" 1353 1354 return self.binary(expression, "=") 1355 1356 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1357 parent = expression.parent 1358 1359 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1360 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1361 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1362 return self.func( 1363 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1364 ) 1365 1366 return super().attimezone_sql(expression) 1367 1368 def trycast_sql(self, expression: exp.TryCast) -> str: 1369 return self.cast_sql(expression, safe_prefix="SAFE_") 1370 1371 def bracket_sql(self, expression: exp.Bracket) -> str: 1372 this = expression.this 1373 expressions = expression.expressions 1374 1375 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1376 arg = expressions[0] 1377 if arg.type is None: 1378 from sqlglot.optimizer.annotate_types import annotate_types 1379 1380 arg = annotate_types(arg, dialect=self.dialect) 1381 1382 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1383 # BQ doesn't support bracket syntax with string values for structs 1384 return f"{self.sql(this)}.{arg.name}" 1385 1386 expressions_sql = self.expressions(expression, flat=True) 1387 offset = expression.args.get("offset") 1388 1389 if offset == 0: 1390 expressions_sql = f"OFFSET({expressions_sql})" 1391 elif offset == 1: 1392 expressions_sql = f"ORDINAL({expressions_sql})" 1393 elif offset is not None: 1394 self.unsupported(f"Unsupported array offset: {offset}") 1395 1396 if expression.args.get("safe"): 1397 expressions_sql = f"SAFE_{expressions_sql}" 1398 1399 return f"{self.sql(this)}[{expressions_sql}]" 1400 1401 def in_unnest_op(self, expression: exp.Unnest) -> str: 1402 return self.sql(expression) 1403 1404 def version_sql(self, expression: exp.Version) -> str: 1405 if expression.name == "TIMESTAMP": 1406 expression.set("this", "SYSTEM_TIME") 1407 return super().version_sql(expression) 1408 1409 def contains_sql(self, expression: exp.Contains) -> str: 1410 this = expression.this 1411 expr = expression.expression 1412 1413 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1414 this = this.this 1415 expr = expr.this 1416 1417 return self.func("CONTAINS_SUBSTR", this, expr) 1418 1419 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1420 this = expression.this 1421 1422 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1423 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1424 # because they aren't literals and so the above syntax is invalid BigQuery. 1425 if isinstance(this, exp.Array): 1426 elem = seq_get(this.expressions, 0) 1427 if not (elem and elem.find(exp.Query)): 1428 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1429 1430 return super().cast_sql(expression, safe_prefix=safe_prefix) 1431 1432 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1433 variables = self.expressions(expression, "this") 1434 default = self.sql(expression, "default") 1435 default = f" DEFAULT {default}" if default else "" 1436 kind = self.sql(expression, "kind") 1437 kind = f" {kind}" if kind else "" 1438 1439 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
525 def normalize_identifier(self, expression: E) -> E: 526 if ( 527 isinstance(expression, exp.Identifier) 528 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 529 ): 530 parent = expression.parent 531 while isinstance(parent, exp.Dot): 532 parent = parent.parent 533 534 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 535 # by default. The following check uses a heuristic to detect tables based on whether 536 # they are qualified. This should generally be correct, because tables in BigQuery 537 # must be qualified with at least a dataset, unless @@dataset_id is set. 538 case_sensitive = ( 539 isinstance(parent, exp.UserDefinedFunction) 540 or ( 541 isinstance(parent, exp.Table) 542 and parent.db 543 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 544 ) 545 or expression.meta.get("is_table") 546 ) 547 if not case_sensitive: 548 expression.set("this", expression.this.lower()) 549 550 return t.cast(E, expression) 551 552 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
554 class Tokenizer(tokens.Tokenizer): 555 QUOTES = ["'", '"', '"""', "'''"] 556 COMMENTS = ["--", "#", ("/*", "*/")] 557 IDENTIFIERS = ["`"] 558 STRING_ESCAPES = ["\\"] 559 560 HEX_STRINGS = [("0x", ""), ("0X", "")] 561 562 BYTE_STRINGS = [ 563 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 564 ] 565 566 RAW_STRINGS = [ 567 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 568 ] 569 570 NESTED_COMMENTS = False 571 572 KEYWORDS = { 573 **tokens.Tokenizer.KEYWORDS, 574 "ANY TYPE": TokenType.VARIANT, 575 "BEGIN": TokenType.COMMAND, 576 "BEGIN TRANSACTION": TokenType.BEGIN, 577 "BYTEINT": TokenType.INT, 578 "BYTES": TokenType.BINARY, 579 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 580 "DATETIME": TokenType.TIMESTAMP, 581 "DECLARE": TokenType.DECLARE, 582 "ELSEIF": TokenType.COMMAND, 583 "EXCEPTION": TokenType.COMMAND, 584 "EXPORT": TokenType.EXPORT, 585 "FLOAT64": TokenType.DOUBLE, 586 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 587 "MODEL": TokenType.MODEL, 588 "NOT DETERMINISTIC": TokenType.VOLATILE, 589 "RECORD": TokenType.STRUCT, 590 "TIMESTAMP": TokenType.TIMESTAMPTZ, 591 } 592 KEYWORDS.pop("DIV") 593 KEYWORDS.pop("VALUES") 594 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
596 class Parser(parser.Parser): 597 PREFIXED_PIVOT_COLUMNS = True 598 LOG_DEFAULTS_TO_LN = True 599 SUPPORTS_IMPLICIT_UNNEST = True 600 JOINS_HAVE_EQUAL_PRECEDENCE = True 601 602 # BigQuery does not allow ASC/DESC to be used as an identifier 603 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 604 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 605 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 606 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 607 TokenType.ASC, 608 TokenType.DESC, 609 } 610 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 611 612 FUNCTIONS = { 613 **parser.Parser.FUNCTIONS, 614 "CONTAINS_SUBSTR": _build_contains_substring, 615 "DATE": _build_date, 616 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 617 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 618 "DATE_TRUNC": lambda args: exp.DateTrunc( 619 unit=seq_get(args, 1), 620 this=seq_get(args, 0), 621 zone=seq_get(args, 2), 622 ), 623 "DATETIME": _build_datetime, 624 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 625 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 626 "DIV": binary_from_function(exp.IntDiv), 627 "EDIT_DISTANCE": _build_levenshtein, 628 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 629 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 630 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 631 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 632 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 633 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 634 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 635 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 636 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 637 "MD5": exp.MD5Digest.from_arg_list, 638 "TO_HEX": _build_to_hex, 639 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 640 [seq_get(args, 1), seq_get(args, 0)] 641 ), 642 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 643 [seq_get(args, 1), seq_get(args, 0)] 644 ), 645 "PARSE_TIMESTAMP": _build_parse_timestamp, 646 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 647 [seq_get(args, 1), seq_get(args, 0)] 648 ), 649 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 650 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 651 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 652 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 653 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 654 ), 655 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 656 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 657 "SPLIT": lambda args: exp.Split( 658 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 659 this=seq_get(args, 0), 660 expression=seq_get(args, 1) or exp.Literal.string(","), 661 ), 662 "STRPOS": exp.StrPosition.from_arg_list, 663 "TIME": _build_time, 664 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 665 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 666 "TIMESTAMP": _build_timestamp, 667 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 668 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 669 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 670 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 671 ), 672 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 673 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 674 ), 675 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 676 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 677 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 678 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 679 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 680 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 681 } 682 683 FUNCTION_PARSERS = { 684 **parser.Parser.FUNCTION_PARSERS, 685 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 686 "JSON_ARRAY": lambda self: self.expression( 687 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 688 ), 689 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 690 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 691 } 692 FUNCTION_PARSERS.pop("TRIM") 693 694 NO_PAREN_FUNCTIONS = { 695 **parser.Parser.NO_PAREN_FUNCTIONS, 696 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 697 } 698 699 NESTED_TYPE_TOKENS = { 700 *parser.Parser.NESTED_TYPE_TOKENS, 701 TokenType.TABLE, 702 } 703 704 PROPERTY_PARSERS = { 705 **parser.Parser.PROPERTY_PARSERS, 706 "NOT DETERMINISTIC": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 708 ), 709 "OPTIONS": lambda self: self._parse_with_property(), 710 } 711 712 CONSTRAINT_PARSERS = { 713 **parser.Parser.CONSTRAINT_PARSERS, 714 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 715 } 716 717 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 718 RANGE_PARSERS.pop(TokenType.OVERLAPS) 719 720 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 721 722 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 723 724 STATEMENT_PARSERS = { 725 **parser.Parser.STATEMENT_PARSERS, 726 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 727 TokenType.END: lambda self: self._parse_as_command(self._prev), 728 TokenType.FOR: lambda self: self._parse_for_in(), 729 TokenType.EXPORT: lambda self: self._parse_export_data(), 730 TokenType.DECLARE: lambda self: self._parse_declare(), 731 } 732 733 BRACKET_OFFSETS = { 734 "OFFSET": (0, False), 735 "ORDINAL": (1, False), 736 "SAFE_OFFSET": (0, True), 737 "SAFE_ORDINAL": (1, True), 738 } 739 740 def _parse_for_in(self) -> exp.ForIn: 741 this = self._parse_range() 742 self._match_text_seq("DO") 743 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 744 745 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 746 this = super()._parse_table_part(schema=schema) or self._parse_number() 747 748 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 749 if isinstance(this, exp.Identifier): 750 table_name = this.name 751 while self._match(TokenType.DASH, advance=False) and self._next: 752 start = self._curr 753 while self._is_connected() and not self._match_set( 754 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 755 ): 756 self._advance() 757 758 if start == self._curr: 759 break 760 761 table_name += self._find_sql(start, self._prev) 762 763 this = exp.Identifier( 764 this=table_name, quoted=this.args.get("quoted") 765 ).update_positions(this) 766 elif isinstance(this, exp.Literal): 767 table_name = this.name 768 769 if self._is_connected() and self._parse_var(any_token=True): 770 table_name += self._prev.text 771 772 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 773 774 return this 775 776 def _parse_table_parts( 777 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 778 ) -> exp.Table: 779 table = super()._parse_table_parts( 780 schema=schema, is_db_reference=is_db_reference, wildcard=True 781 ) 782 783 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 784 if not table.catalog: 785 if table.db: 786 previous_db = table.args["db"] 787 parts = table.db.split(".") 788 if len(parts) == 2 and not table.args["db"].quoted: 789 table.set( 790 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 791 ) 792 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 793 else: 794 previous_this = table.this 795 parts = table.name.split(".") 796 if len(parts) == 2 and not table.this.quoted: 797 table.set( 798 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 799 ) 800 table.set( 801 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 802 ) 803 804 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 805 alias = table.this 806 catalog, db, this, *rest = ( 807 exp.to_identifier(p, quoted=True) 808 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 809 ) 810 811 for part in (catalog, db, this): 812 if part: 813 part.update_positions(table.this) 814 815 if rest and this: 816 this = exp.Dot.build([this, *rest]) # type: ignore 817 818 table = exp.Table( 819 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 820 ) 821 table.meta["quoted_table"] = True 822 else: 823 alias = None 824 825 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 826 # dataset, so if the project identifier is omitted we need to fix the ast so that 827 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 828 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 829 # views, because it would seem like the "catalog" part is set, when it'd actually 830 # be the region/dataset. Merging the two identifiers into a single one is done to 831 # avoid producing a 4-part Table reference, which would cause issues in the schema 832 # module, when there are 3-part table names mixed with information schema views. 833 # 834 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 835 table_parts = table.parts 836 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 837 # We need to alias the table here to avoid breaking existing qualified columns. 838 # This is expected to be safe, because if there's an actual alias coming up in 839 # the token stream, it will overwrite this one. If there isn't one, we are only 840 # exposing the name that can be used to reference the view explicitly (a no-op). 841 exp.alias_( 842 table, 843 t.cast(exp.Identifier, alias or table_parts[-1]), 844 table=True, 845 copy=False, 846 ) 847 848 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 849 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 850 line=table_parts[-2].meta.get("line"), 851 col=table_parts[-1].meta.get("col"), 852 start=table_parts[-2].meta.get("start"), 853 end=table_parts[-1].meta.get("end"), 854 ) 855 table.set("this", new_this) 856 table.set("db", seq_get(table_parts, -3)) 857 table.set("catalog", seq_get(table_parts, -4)) 858 859 return table 860 861 def _parse_column(self) -> t.Optional[exp.Expression]: 862 column = super()._parse_column() 863 if isinstance(column, exp.Column): 864 parts = column.parts 865 if any("." in p.name for p in parts): 866 catalog, db, table, this, *rest = ( 867 exp.to_identifier(p, quoted=True) 868 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 869 ) 870 871 if rest and this: 872 this = exp.Dot.build([this, *rest]) # type: ignore 873 874 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 875 column.meta["quoted_column"] = True 876 877 return column 878 879 @t.overload 880 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 881 882 @t.overload 883 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 884 885 def _parse_json_object(self, agg=False): 886 json_object = super()._parse_json_object() 887 array_kv_pair = seq_get(json_object.expressions, 0) 888 889 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 890 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 891 if ( 892 array_kv_pair 893 and isinstance(array_kv_pair.this, exp.Array) 894 and isinstance(array_kv_pair.expression, exp.Array) 895 ): 896 keys = array_kv_pair.this.expressions 897 values = array_kv_pair.expression.expressions 898 899 json_object.set( 900 "expressions", 901 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 902 ) 903 904 return json_object 905 906 def _parse_bracket( 907 self, this: t.Optional[exp.Expression] = None 908 ) -> t.Optional[exp.Expression]: 909 bracket = super()._parse_bracket(this) 910 911 if this is bracket: 912 return bracket 913 914 if isinstance(bracket, exp.Bracket): 915 for expression in bracket.expressions: 916 name = expression.name.upper() 917 918 if name not in self.BRACKET_OFFSETS: 919 break 920 921 offset, safe = self.BRACKET_OFFSETS[name] 922 bracket.set("offset", offset) 923 bracket.set("safe", safe) 924 expression.replace(expression.expressions[0]) 925 926 return bracket 927 928 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 929 unnest = super()._parse_unnest(with_alias=with_alias) 930 931 if not unnest: 932 return None 933 934 unnest_expr = seq_get(unnest.expressions, 0) 935 if unnest_expr: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 939 940 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 941 # in contrast to other dialects such as DuckDB which flattens only the array by default 942 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 943 array_elem.is_type(exp.DataType.Type.STRUCT) 944 for array_elem in unnest_expr._type.expressions 945 ): 946 unnest.set("explode_array", True) 947 948 return unnest 949 950 def _parse_make_interval(self) -> exp.MakeInterval: 951 expr = exp.MakeInterval() 952 953 for arg_key in expr.arg_types: 954 value = self._parse_lambda() 955 956 if not value: 957 break 958 959 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 960 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 961 if isinstance(value, exp.Kwarg): 962 arg_key = value.this.name 963 964 expr.set(arg_key, value) 965 966 self._match(TokenType.COMMA) 967 968 return expr 969 970 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 971 expr = self.expression( 972 exp.FeaturesAtTime, 973 this=(self._match(TokenType.TABLE) and self._parse_table()) 974 or self._parse_select(nested=True), 975 ) 976 977 while self._match(TokenType.COMMA): 978 arg = self._parse_lambda() 979 980 # Get the LHS of the Kwarg and set the arg to that value, e.g 981 # "num_rows => 1" sets the expr's `num_rows` arg 982 if arg: 983 expr.set(arg.this.name, arg) 984 985 return expr 986 987 def _parse_export_data(self) -> exp.Export: 988 self._match_text_seq("DATA") 989 990 return self.expression( 991 exp.Export, 992 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 993 options=self._parse_properties(), 994 this=self._match_text_seq("AS") and self._parse_select(), 995 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
997 class Generator(generator.Generator): 998 INTERVAL_ALLOWS_PLURAL_FORM = False 999 JOIN_HINTS = False 1000 QUERY_HINTS = False 1001 TABLE_HINTS = False 1002 LIMIT_FETCH = "LIMIT" 1003 RENAME_TABLE_WITH_DB = False 1004 NVL2_SUPPORTED = False 1005 UNNEST_WITH_ORDINALITY = False 1006 COLLATE_IS_FUNC = True 1007 LIMIT_ONLY_LITERALS = True 1008 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1009 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1010 JSON_KEY_VALUE_PAIR_SEP = "," 1011 NULL_ORDERING_SUPPORTED = False 1012 IGNORE_NULLS_IN_FUNC = True 1013 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1014 CAN_IMPLEMENT_ARRAY_ANY = True 1015 SUPPORTS_TO_NUMBER = False 1016 NAMED_PLACEHOLDER_TOKEN = "@" 1017 HEX_FUNC = "TO_HEX" 1018 WITH_PROPERTIES_PREFIX = "OPTIONS" 1019 SUPPORTS_EXPLODING_PROJECTIONS = False 1020 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1021 SUPPORTS_UNIX_SECONDS = True 1022 1023 TS_OR_DS_TYPES = ( 1024 exp.TsOrDsToDatetime, 1025 exp.TsOrDsToTimestamp, 1026 exp.TsOrDsToTime, 1027 exp.TsOrDsToDate, 1028 ) 1029 1030 TRANSFORMS = { 1031 **generator.Generator.TRANSFORMS, 1032 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1033 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1034 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1035 exp.Array: inline_array_unless_query, 1036 exp.ArrayContains: _array_contains_sql, 1037 exp.ArrayFilter: filter_array_using_unnest, 1038 exp.ArrayRemove: filter_array_using_unnest, 1039 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1040 exp.CollateProperty: lambda self, e: ( 1041 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1042 if e.args.get("default") 1043 else f"COLLATE {self.sql(e, 'this')}" 1044 ), 1045 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1046 exp.CountIf: rename_func("COUNTIF"), 1047 exp.Create: _create_sql, 1048 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1049 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1050 exp.DateDiff: lambda self, e: self.func( 1051 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1052 ), 1053 exp.DateFromParts: rename_func("DATE"), 1054 exp.DateStrToDate: datestrtodate_sql, 1055 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1056 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1057 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1058 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1059 exp.FromTimeZone: lambda self, e: self.func( 1060 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1061 ), 1062 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1063 exp.GroupConcat: lambda self, e: groupconcat_sql( 1064 self, e, func_name="STRING_AGG", within_group=False 1065 ), 1066 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1067 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1068 exp.If: if_sql(false_value="NULL"), 1069 exp.ILike: no_ilike_sql, 1070 exp.IntDiv: rename_func("DIV"), 1071 exp.Int64: rename_func("INT64"), 1072 exp.JSONExtract: _json_extract_sql, 1073 exp.JSONExtractArray: _json_extract_sql, 1074 exp.JSONExtractScalar: _json_extract_sql, 1075 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1076 exp.Levenshtein: _levenshtein_sql, 1077 exp.Max: max_or_greatest, 1078 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1079 exp.MD5Digest: rename_func("MD5"), 1080 exp.Min: min_or_least, 1081 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1082 exp.RegexpExtract: lambda self, e: self.func( 1083 "REGEXP_EXTRACT", 1084 e.this, 1085 e.expression, 1086 e.args.get("position"), 1087 e.args.get("occurrence"), 1088 ), 1089 exp.RegexpExtractAll: lambda self, e: self.func( 1090 "REGEXP_EXTRACT_ALL", e.this, e.expression 1091 ), 1092 exp.RegexpReplace: regexp_replace_sql, 1093 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1094 exp.ReturnsProperty: _returnsproperty_sql, 1095 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1096 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1097 exp.ParseDatetime: lambda self, e: self.func( 1098 "PARSE_DATETIME", self.format_time(e), e.this 1099 ), 1100 exp.Select: transforms.preprocess( 1101 [ 1102 transforms.explode_projection_to_unnest(), 1103 transforms.unqualify_unnest, 1104 transforms.eliminate_distinct_on, 1105 _alias_ordered_group, 1106 transforms.eliminate_semi_and_anti_joins, 1107 ] 1108 ), 1109 exp.SHA: rename_func("SHA1"), 1110 exp.SHA2: sha256_sql, 1111 exp.Space: space_sql, 1112 exp.StabilityProperty: lambda self, e: ( 1113 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1114 ), 1115 exp.String: rename_func("STRING"), 1116 exp.StrPosition: lambda self, e: ( 1117 strposition_sql( 1118 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1119 ) 1120 ), 1121 exp.StrToDate: _str_to_datetime_sql, 1122 exp.StrToTime: _str_to_datetime_sql, 1123 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1124 exp.TimeFromParts: rename_func("TIME"), 1125 exp.TimestampFromParts: rename_func("DATETIME"), 1126 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1127 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1128 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1129 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1130 exp.TimeStrToTime: timestrtotime_sql, 1131 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1132 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1133 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1134 exp.TsOrDsToTime: rename_func("TIME"), 1135 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1136 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1137 exp.Unhex: rename_func("FROM_HEX"), 1138 exp.UnixDate: rename_func("UNIX_DATE"), 1139 exp.UnixToTime: _unix_to_time_sql, 1140 exp.Uuid: lambda *_: "GENERATE_UUID()", 1141 exp.Values: _derived_table_values_to_unnest, 1142 exp.VariancePop: rename_func("VAR_POP"), 1143 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1144 } 1145 1146 SUPPORTED_JSON_PATH_PARTS = { 1147 exp.JSONPathKey, 1148 exp.JSONPathRoot, 1149 exp.JSONPathSubscript, 1150 } 1151 1152 TYPE_MAPPING = { 1153 **generator.Generator.TYPE_MAPPING, 1154 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1155 exp.DataType.Type.BIGINT: "INT64", 1156 exp.DataType.Type.BINARY: "BYTES", 1157 exp.DataType.Type.BLOB: "BYTES", 1158 exp.DataType.Type.BOOLEAN: "BOOL", 1159 exp.DataType.Type.CHAR: "STRING", 1160 exp.DataType.Type.DECIMAL: "NUMERIC", 1161 exp.DataType.Type.DOUBLE: "FLOAT64", 1162 exp.DataType.Type.FLOAT: "FLOAT64", 1163 exp.DataType.Type.INT: "INT64", 1164 exp.DataType.Type.NCHAR: "STRING", 1165 exp.DataType.Type.NVARCHAR: "STRING", 1166 exp.DataType.Type.SMALLINT: "INT64", 1167 exp.DataType.Type.TEXT: "STRING", 1168 exp.DataType.Type.TIMESTAMP: "DATETIME", 1169 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1170 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1171 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1172 exp.DataType.Type.TINYINT: "INT64", 1173 exp.DataType.Type.ROWVERSION: "BYTES", 1174 exp.DataType.Type.UUID: "STRING", 1175 exp.DataType.Type.VARBINARY: "BYTES", 1176 exp.DataType.Type.VARCHAR: "STRING", 1177 exp.DataType.Type.VARIANT: "ANY TYPE", 1178 } 1179 1180 PROPERTIES_LOCATION = { 1181 **generator.Generator.PROPERTIES_LOCATION, 1182 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1183 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1184 } 1185 1186 # WINDOW comes after QUALIFY 1187 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1188 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1189 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1190 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1191 } 1192 1193 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1194 RESERVED_KEYWORDS = { 1195 "all", 1196 "and", 1197 "any", 1198 "array", 1199 "as", 1200 "asc", 1201 "assert_rows_modified", 1202 "at", 1203 "between", 1204 "by", 1205 "case", 1206 "cast", 1207 "collate", 1208 "contains", 1209 "create", 1210 "cross", 1211 "cube", 1212 "current", 1213 "default", 1214 "define", 1215 "desc", 1216 "distinct", 1217 "else", 1218 "end", 1219 "enum", 1220 "escape", 1221 "except", 1222 "exclude", 1223 "exists", 1224 "extract", 1225 "false", 1226 "fetch", 1227 "following", 1228 "for", 1229 "from", 1230 "full", 1231 "group", 1232 "grouping", 1233 "groups", 1234 "hash", 1235 "having", 1236 "if", 1237 "ignore", 1238 "in", 1239 "inner", 1240 "intersect", 1241 "interval", 1242 "into", 1243 "is", 1244 "join", 1245 "lateral", 1246 "left", 1247 "like", 1248 "limit", 1249 "lookup", 1250 "merge", 1251 "natural", 1252 "new", 1253 "no", 1254 "not", 1255 "null", 1256 "nulls", 1257 "of", 1258 "on", 1259 "or", 1260 "order", 1261 "outer", 1262 "over", 1263 "partition", 1264 "preceding", 1265 "proto", 1266 "qualify", 1267 "range", 1268 "recursive", 1269 "respect", 1270 "right", 1271 "rollup", 1272 "rows", 1273 "select", 1274 "set", 1275 "some", 1276 "struct", 1277 "tablesample", 1278 "then", 1279 "to", 1280 "treat", 1281 "true", 1282 "unbounded", 1283 "union", 1284 "unnest", 1285 "using", 1286 "when", 1287 "where", 1288 "window", 1289 "with", 1290 "within", 1291 } 1292 1293 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1294 unit = expression.unit 1295 unit_sql = unit.name if unit.is_string else self.sql(unit) 1296 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1297 1298 def mod_sql(self, expression: exp.Mod) -> str: 1299 this = expression.this 1300 expr = expression.expression 1301 return self.func( 1302 "MOD", 1303 this.unnest() if isinstance(this, exp.Paren) else this, 1304 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1305 ) 1306 1307 def column_parts(self, expression: exp.Column) -> str: 1308 if expression.meta.get("quoted_column"): 1309 # If a column reference is of the form `dataset.table`.name, we need 1310 # to preserve the quoted table path, otherwise the reference breaks 1311 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1312 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1313 return f"{table_path}.{self.sql(expression, 'this')}" 1314 1315 return super().column_parts(expression) 1316 1317 def table_parts(self, expression: exp.Table) -> str: 1318 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1319 # we need to make sure the correct quoting is used in each case. 1320 # 1321 # For example, if there is a CTE x that clashes with a schema name, then the former will 1322 # return the table y in that schema, whereas the latter will return the CTE's y column: 1323 # 1324 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1325 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1326 if expression.meta.get("quoted_table"): 1327 table_parts = ".".join(p.name for p in expression.parts) 1328 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1329 1330 return super().table_parts(expression) 1331 1332 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1333 this = expression.this 1334 if isinstance(this, exp.TsOrDsToDatetime): 1335 func_name = "FORMAT_DATETIME" 1336 elif isinstance(this, exp.TsOrDsToTimestamp): 1337 func_name = "FORMAT_TIMESTAMP" 1338 elif isinstance(this, exp.TsOrDsToTime): 1339 func_name = "FORMAT_TIME" 1340 else: 1341 func_name = "FORMAT_DATE" 1342 1343 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1344 return self.func( 1345 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1346 ) 1347 1348 def eq_sql(self, expression: exp.EQ) -> str: 1349 # Operands of = cannot be NULL in BigQuery 1350 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1351 if not isinstance(expression.parent, exp.Update): 1352 return "NULL" 1353 1354 return self.binary(expression, "=") 1355 1356 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1357 parent = expression.parent 1358 1359 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1360 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1361 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1362 return self.func( 1363 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1364 ) 1365 1366 return super().attimezone_sql(expression) 1367 1368 def trycast_sql(self, expression: exp.TryCast) -> str: 1369 return self.cast_sql(expression, safe_prefix="SAFE_") 1370 1371 def bracket_sql(self, expression: exp.Bracket) -> str: 1372 this = expression.this 1373 expressions = expression.expressions 1374 1375 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1376 arg = expressions[0] 1377 if arg.type is None: 1378 from sqlglot.optimizer.annotate_types import annotate_types 1379 1380 arg = annotate_types(arg, dialect=self.dialect) 1381 1382 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1383 # BQ doesn't support bracket syntax with string values for structs 1384 return f"{self.sql(this)}.{arg.name}" 1385 1386 expressions_sql = self.expressions(expression, flat=True) 1387 offset = expression.args.get("offset") 1388 1389 if offset == 0: 1390 expressions_sql = f"OFFSET({expressions_sql})" 1391 elif offset == 1: 1392 expressions_sql = f"ORDINAL({expressions_sql})" 1393 elif offset is not None: 1394 self.unsupported(f"Unsupported array offset: {offset}") 1395 1396 if expression.args.get("safe"): 1397 expressions_sql = f"SAFE_{expressions_sql}" 1398 1399 return f"{self.sql(this)}[{expressions_sql}]" 1400 1401 def in_unnest_op(self, expression: exp.Unnest) -> str: 1402 return self.sql(expression) 1403 1404 def version_sql(self, expression: exp.Version) -> str: 1405 if expression.name == "TIMESTAMP": 1406 expression.set("this", "SYSTEM_TIME") 1407 return super().version_sql(expression) 1408 1409 def contains_sql(self, expression: exp.Contains) -> str: 1410 this = expression.this 1411 expr = expression.expression 1412 1413 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1414 this = this.this 1415 expr = expr.this 1416 1417 return self.func("CONTAINS_SUBSTR", this, expr) 1418 1419 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1420 this = expression.this 1421 1422 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1423 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1424 # because they aren't literals and so the above syntax is invalid BigQuery. 1425 if isinstance(this, exp.Array): 1426 elem = seq_get(this.expressions, 0) 1427 if not (elem and elem.find(exp.Query)): 1428 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1429 1430 return super().cast_sql(expression, safe_prefix=safe_prefix) 1431 1432 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1433 variables = self.expressions(expression, "this") 1434 default = self.sql(expression, "default") 1435 default = f" DEFAULT {default}" if default else "" 1436 kind = self.sql(expression, "kind") 1437 kind = f" {kind}" if kind else "" 1438 1439 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1307 def column_parts(self, expression: exp.Column) -> str: 1308 if expression.meta.get("quoted_column"): 1309 # If a column reference is of the form `dataset.table`.name, we need 1310 # to preserve the quoted table path, otherwise the reference breaks 1311 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1312 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1313 return f"{table_path}.{self.sql(expression, 'this')}" 1314 1315 return super().column_parts(expression)
1317 def table_parts(self, expression: exp.Table) -> str: 1318 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1319 # we need to make sure the correct quoting is used in each case. 1320 # 1321 # For example, if there is a CTE x that clashes with a schema name, then the former will 1322 # return the table y in that schema, whereas the latter will return the CTE's y column: 1323 # 1324 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1325 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1326 if expression.meta.get("quoted_table"): 1327 table_parts = ".".join(p.name for p in expression.parts) 1328 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1329 1330 return super().table_parts(expression)
1332 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1333 this = expression.this 1334 if isinstance(this, exp.TsOrDsToDatetime): 1335 func_name = "FORMAT_DATETIME" 1336 elif isinstance(this, exp.TsOrDsToTimestamp): 1337 func_name = "FORMAT_TIMESTAMP" 1338 elif isinstance(this, exp.TsOrDsToTime): 1339 func_name = "FORMAT_TIME" 1340 else: 1341 func_name = "FORMAT_DATE" 1342 1343 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1344 return self.func( 1345 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1346 )
1348 def eq_sql(self, expression: exp.EQ) -> str: 1349 # Operands of = cannot be NULL in BigQuery 1350 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1351 if not isinstance(expression.parent, exp.Update): 1352 return "NULL" 1353 1354 return self.binary(expression, "=")
1356 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1357 parent = expression.parent 1358 1359 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1360 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1361 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1362 return self.func( 1363 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1364 ) 1365 1366 return super().attimezone_sql(expression)
1371 def bracket_sql(self, expression: exp.Bracket) -> str: 1372 this = expression.this 1373 expressions = expression.expressions 1374 1375 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1376 arg = expressions[0] 1377 if arg.type is None: 1378 from sqlglot.optimizer.annotate_types import annotate_types 1379 1380 arg = annotate_types(arg, dialect=self.dialect) 1381 1382 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1383 # BQ doesn't support bracket syntax with string values for structs 1384 return f"{self.sql(this)}.{arg.name}" 1385 1386 expressions_sql = self.expressions(expression, flat=True) 1387 offset = expression.args.get("offset") 1388 1389 if offset == 0: 1390 expressions_sql = f"OFFSET({expressions_sql})" 1391 elif offset == 1: 1392 expressions_sql = f"ORDINAL({expressions_sql})" 1393 elif offset is not None: 1394 self.unsupported(f"Unsupported array offset: {offset}") 1395 1396 if expression.args.get("safe"): 1397 expressions_sql = f"SAFE_{expressions_sql}" 1398 1399 return f"{self.sql(this)}[{expressions_sql}]"
1419 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1420 this = expression.this 1421 1422 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1423 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1424 # because they aren't literals and so the above syntax is invalid BigQuery. 1425 if isinstance(this, exp.Array): 1426 elem = seq_get(this.expressions, 0) 1427 if not (elem and elem.find(exp.Query)): 1428 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1429 1430 return super().cast_sql(expression, safe_prefix=safe_prefix)
1432 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1433 variables = self.expressions(expression, "this") 1434 default = self.sql(expression, "default") 1435 default = f" DEFAULT {default}" if default else "" 1436 kind = self.sql(expression, "kind") 1437 kind = f" {kind}" if kind else "" 1438 1439 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql