diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ccb2ed1bc..8d4740394 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -459,40 +459,6 @@ pub enum CastFormat { ValueAtTimeZone(Value, Value), } -/// Represents the syntax/style used in a map access. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MapAccessSyntax { - /// Access using bracket notation. `mymap[mykey]` - Bracket, - /// Access using period notation. `mymap.mykey` - Period, -} - -/// Expression used to access a value in a nested structure. -/// -/// Example: `SAFE_OFFSET(0)` in -/// ```sql -/// SELECT mymap[SAFE_OFFSET(0)]; -/// ``` -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MapAccessKey { - pub key: Expr, - pub syntax: MapAccessSyntax, -} - -impl fmt::Display for MapAccessKey { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.syntax { - MapAccessSyntax::Bracket => write!(f, "[{}]", self.key), - MapAccessSyntax::Period => write!(f, ".{}", self.key), - } - } -} - /// An element of a JSON path. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -629,6 +595,28 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), + /// Multi-part expression access. + /// + /// This structure represents an access chain in structured / nested types + /// such as maps, arrays, and lists: + /// - Array + /// - A 1-dim array `a[1]` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` + /// - A 2-dim array `a[1][2]` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// - Map or Struct (Bracket-style) + /// - A map `a['field1']` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` + /// - A 2-dim map `a['field1']['field2']` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) + /// - A struct access `a[field1].field2` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) + CompoundFieldAccess { + root: Box, + access_chain: Vec, + }, /// Access data nested in a value containing semi-structured data, such as /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. /// @@ -882,14 +870,6 @@ pub enum Expr { data_type: DataType, value: String, }, - /// Access a map-like object by field (e.g. `column['field']` or `column[4]` - /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) - /// - MapAccess { - column: Box, - keys: Vec, - }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// Arbitrary expr method call @@ -978,11 +958,6 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps Map(Map), - /// An access of nested data using subscript syntax, for example `array[2]`. - Subscript { - expr: Box, - subscript: Box, - }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), /// An interval expression e.g. `INTERVAL '1' YEAR` @@ -1099,6 +1074,27 @@ impl fmt::Display for Subscript { } } +/// An element of a [`Expr::CompoundFieldAccess`]. +/// It can be an expression or a subscript. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AccessExpr { + /// Accesses a field using dot notation, e.g. `foo.bar.baz`. + Dot(Expr), + /// Accesses a field or array element using bracket notation, e.g. `foo['bar']`. + Subscript(Subscript), +} + +impl fmt::Display for AccessExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AccessExpr::Dot(expr) => write!(f, ".{}", expr), + AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript), + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1294,12 +1290,16 @@ impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{s}"), - Expr::MapAccess { column, keys } => { - write!(f, "{column}{}", display_separated(keys, "")) - } Expr::Wildcard(_) => f.write_str("*"), Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), + Expr::CompoundFieldAccess { root, access_chain } => { + write!(f, "{}", root)?; + for field in access_chain { + write!(f, "{}", field)?; + } + Ok(()) + } Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"), Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"), Expr::IsFalse(ast) => write!(f, "{ast} IS FALSE"), @@ -1719,12 +1719,6 @@ impl fmt::Display for Expr { Expr::Map(map) => { write!(f, "{map}") } - Expr::Subscript { - expr, - subscript: key, - } => { - write!(f, "{expr}[{key}]") - } Expr::Array(set) => { write!(f, "{set}") } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index c2c7c14f0..1c28fd81a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -20,20 +20,20 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, - Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, - ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, - CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, - Expr, ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, - FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, - IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, Measure, NamedWindowDefinition, - ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByExpr, Partition, - PivotValueSource, ProjectionSelect, Query, ReferentialAction, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, - Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, - TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values, ViewColumnDef, - WildcardAdditionalOptions, With, WithFill, + dcl::SecondaryRoles, AccessExpr, AlterColumnOperation, AlterIndexOperation, + AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, + ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, + CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, + ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch, FromTable, Function, + FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, + GroupByExpr, HavingBound, IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, + JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, + Measure, NamedWindowDefinition, ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, + OrderBy, OrderByExpr, Partition, PivotValueSource, ProjectionSelect, Query, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values, + ViewColumnDef, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -1261,6 +1261,9 @@ impl Spanned for Expr { Expr::Identifier(ident) => ident.span, Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i| i.span)), Expr::CompositeAccess { expr, key } => expr.span().union(&key.span), + Expr::CompoundFieldAccess { root, access_chain } => { + union_spans(iter::once(root.span()).chain(access_chain.iter().map(|i| i.span()))) + } Expr::IsFalse(expr) => expr.span(), Expr::IsNotFalse(expr) => expr.span(), Expr::IsTrue(expr) => expr.span(), @@ -1335,9 +1338,6 @@ impl Spanned for Expr { Expr::Nested(expr) => expr.span(), Expr::Value(value) => value.span(), Expr::TypedString { .. } => Span::empty(), - Expr::MapAccess { column, keys } => column - .span() - .union(&union_spans(keys.iter().map(|i| i.key.span()))), Expr::Function(function) => function.span(), Expr::GroupingSets(vec) => { union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span()))) @@ -1433,7 +1433,6 @@ impl Spanned for Expr { Expr::Named { .. } => Span::empty(), Expr::Dictionary(_) => Span::empty(), Expr::Map(_) => Span::empty(), - Expr::Subscript { expr, subscript } => expr.span().union(&subscript.span()), Expr::Interval(interval) => interval.value.span(), Expr::Wildcard(token) => token.0.span, Expr::QualifiedWildcard(object_name, token) => union_spans( @@ -1472,6 +1471,15 @@ impl Spanned for Subscript { } } +impl Spanned for AccessExpr { + fn span(&self) -> Span { + match self { + AccessExpr::Dot(ident) => ident.span(), + AccessExpr::Subscript(subscript) => subscript.span(), + } + } +} + impl Spanned for ObjectName { fn span(&self) -> Span { let ObjectName(segments) = self; diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 77d2ccff1..0596d7c3c 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -234,6 +234,10 @@ impl Dialect for SnowflakeDialect { RESERVED_FOR_IDENTIFIER.contains(&kw) } } + + fn supports_partiql(&self) -> bool { + true + } } /// Parse snowflake create table statement. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7d70460b4..13efdf58f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1146,53 +1146,39 @@ impl<'a> Parser<'a> { w_span: Span, ) -> Result { match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident(w_span)]; - let mut ending_wildcard: Option = None; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ending_wildcard = Some(next_token); - break; - } else { - return self.expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if let Some(wildcard_token) = ending_wildcard { - Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(wildcard_token), - )) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } + Token::Period => { + self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![]) + } + Token::LParen => { + let id_parts = vec![w.to_ident(w_span)]; + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) } else { - Ok(Expr::CompoundIdentifier(id_parts)) + let mut expr = self.parse_function(ObjectName(id_parts))?; + // consume all period if it's a method chain + expr = self.try_parse_method(expr)?; + let fields = vec![]; + self.parse_compound_field_access(expr, fields) } } + Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => + { + let ident = Expr::Identifier(w.to_ident(w_span)); + let mut fields = vec![]; + self.parse_multi_dim_subscript(&mut fields)?; + self.parse_compound_field_access(ident, fields) + } + // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html + Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::HexStringLiteral(_) + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value.clone(), + value: self.parse_introduced_string_value()?, + }) + } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1429,6 +1415,144 @@ impl<'a> Parser<'a> { } } + /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`. + /// If all the fields are `Expr::Identifier`s, return an [Expr::CompoundIdentifier] instead. + /// If only the root exists, return the root. + /// If self supports [Dialect::supports_partiql], it will fall back when occurs [Token::LBracket] for JsonAccess parsing. + pub fn parse_compound_field_access( + &mut self, + root: Expr, + mut chain: Vec, + ) -> Result { + let mut ending_wildcard: Option = None; + let mut ending_lbracket = false; + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let expr = Expr::Identifier(w.to_ident(next_token.span)); + chain.push(AccessExpr::Dot(expr)); + if self.peek_token().token == Token::LBracket { + if self.dialect.supports_partiql() { + self.next_token(); + ending_lbracket = true; + break; + } else { + self.parse_multi_dim_subscript(&mut chain)? + } + } + } + Token::Mul => { + // Postgres explicitly allows funcnm(tablenm.*) and the + // function array_agg traverses this control flow + if dialect_of!(self is PostgreSqlDialect) { + ending_wildcard = Some(next_token); + break; + } else { + return self.expected("an identifier after '.'", next_token); + } + } + Token::SingleQuotedString(s) => { + let expr = Expr::Identifier(Ident::with_quote('\'', s)); + chain.push(AccessExpr::Dot(expr)); + } + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); + } + } + } + + // if dialect supports partiql, we need to go back one Token::LBracket for the JsonAccess parsing + if self.dialect.supports_partiql() && ending_lbracket { + self.prev_token(); + } + + if let Some(wildcard_token) = ending_wildcard { + if !Self::is_all_ident(&root, &chain) { + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + }; + Ok(Expr::QualifiedWildcard( + ObjectName(Self::exprs_to_idents(root, chain)?), + AttachedToken(wildcard_token), + )) + } else if self.peek_token().token == Token::LParen { + if !Self::is_all_ident(&root, &chain) { + // consume LParen + self.next_token(); + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + }; + let id_parts = Self::exprs_to_idents(root, chain)?; + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) + } else { + self.parse_function(ObjectName(id_parts)) + } + } else { + if Self::is_all_ident(&root, &chain) { + return Ok(Expr::CompoundIdentifier(Self::exprs_to_idents( + root, chain, + )?)); + } + if chain.is_empty() { + return Ok(root); + } + Ok(Expr::CompoundFieldAccess { + root: Box::new(root), + access_chain: chain.clone(), + }) + } + } + + /// Check if the root is an identifier and all fields are identifiers. + fn is_all_ident(root: &Expr, fields: &[AccessExpr]) -> bool { + if !matches!(root, Expr::Identifier(_)) { + return false; + } + fields + .iter() + .all(|x| matches!(x, AccessExpr::Dot(Expr::Identifier(_)))) + } + + /// Convert a root and a list of fields to a list of identifiers. + fn exprs_to_idents(root: Expr, fields: Vec) -> Result, ParserError> { + let mut idents = vec![]; + if let Expr::Identifier(root) = root { + idents.push(root); + for x in fields { + if let AccessExpr::Dot(Expr::Identifier(ident)) = x { + idents.push(ident); + } else { + return parser_err!( + format!("Expected identifier, found: {}", x), + x.span().start + ); + } + } + Ok(idents) + } else { + parser_err!( + format!("Expected identifier, found: {}", root), + root.span().start + ) + } + } + + /// Try to parse OuterJoin expression `(+)` + fn parse_outer_join_expr(&mut self, id_parts: &[Ident]) -> Option { + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) + { + Some(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts.to_vec()) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + None + } + } + pub fn parse_utility_options(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Self::parse_utility_option)?; @@ -3045,13 +3169,18 @@ impl<'a> Parser<'a> { expr: Box::new(expr), }) } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - self.parse_subscript(expr) - } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) + { + let mut chain = vec![]; + // back to LBracket + self.prev_token(); + self.parse_multi_dim_subscript(&mut chain)?; + self.parse_compound_field_access(expr, chain) + } else if self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) } else { - self.parse_map_access(expr) + parser_err!("Array subscripting is not supported", tok.span.start) } } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { self.prev_token(); @@ -3147,15 +3276,24 @@ impl<'a> Parser<'a> { }) } + /// Parse a multi-dimension array accessing like `[1:3][1][1]` + pub fn parse_multi_dim_subscript( + &mut self, + chain: &mut Vec, + ) -> Result<(), ParserError> { + while self.consume_token(&Token::LBracket) { + self.parse_subscript(chain)?; + } + Ok(()) + } + /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` - pub fn parse_subscript(&mut self, expr: Expr) -> Result { + fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - Ok(Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(subscript), - }) + chain.push(AccessExpr::Subscript(subscript)); + Ok(()) } fn parse_json_path_object_key(&mut self) -> Result { @@ -3217,46 +3355,6 @@ impl<'a> Parser<'a> { Ok(JsonPath { path }) } - pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - let mut keys = vec![MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - }]; - loop { - let key = match self.peek_token().token { - Token::LBracket => { - self.next_token(); // consume `[` - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - } - } - // Access on BigQuery nested and repeated expressions can - // mix notations in the same expression. - // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns - Token::Period if dialect_of!(self is BigQueryDialect) => { - self.next_token(); // consume `.` - MapAccessKey { - key: self.parse_expr()?, - syntax: MapAccessSyntax::Period, - } - } - _ => break, - }; - keys.push(key); - } - - Ok(Expr::MapAccess { - column: Box::new(expr), - keys, - }) - } - /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 0311eba16..94bbdbde8 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -23,7 +23,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use sqlparser::parser::{ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; +use sqlparser::tokenizer::{Location, Span}; use test_utils::*; #[test] @@ -1954,27 +1954,47 @@ fn parse_map_access_expr() { let sql = "users[-1][safe_offset(2)].a.b"; let expr = bigquery().verified_expr(sql); - fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey { - MapAccessKey { key, syntax } - } - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - map_access_key( - Expr::UnaryOp { + let expected = Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, - MapAccessSyntax::Bracket, - ), - map_access_key( - call("safe_offset", [Expr::Value(number("2"))]), - MapAccessSyntax::Bracket, - ), - map_access_key( - Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]), - MapAccessSyntax::Period, - ), + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + uses_odbc_syntax: false, + }), + }), + AccessExpr::Dot(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 24), Location::of(1, 25)), + "a", + ))), + AccessExpr::Dot(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 26), Location::of(1, 27)), + "b", + ))), ], }; assert_eq!(expr, expected); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index d60506d90..2f1b043b6 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,7 +25,7 @@ use helpers::attached_token::AttachedToken; use sqlparser::tokenizer::Span; use test_utils::*; -use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; +use sqlparser::ast::Expr::{BinaryOp, Identifier}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; use sqlparser::ast::Value::Number; @@ -44,22 +44,21 @@ fn parse_map_access_expr() { select_token: AttachedToken::empty(), top: None, top_before_distinct: false, - projection: vec![UnnamedExpr(MapAccess { - column: Box::new(Identifier(Ident { + projection: vec![UnnamedExpr(Expr::CompoundFieldAccess { + root: Box::new(Identifier(Ident { value: "string_values".to_string(), quote_style: None, span: Span::empty(), })), - keys: vec![MapAccessKey { - key: call( + access_chain: vec![AccessExpr::Subscript(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_names")), Expr::Value(Value::SingleQuotedString("endpoint".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + })], })], into: None, from: vec![TableWithJoins { @@ -76,18 +75,17 @@ fn parse_map_access_expr() { }), op: BinaryOperator::And, right: Box::new(BinaryOp { - left: Box::new(MapAccess { - column: Box::new(Identifier(Ident::new("string_value"))), - keys: vec![MapAccessKey { - key: call( + left: Box::new(Expr::CompoundFieldAccess { + root: Box::new(Identifier(Ident::new("string_value"))), + access_chain: vec![AccessExpr::Subscript(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_name")), Expr::Value(Value::SingleQuotedString("app".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + })], }), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0f1813c2f..008640e90 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -37,8 +37,8 @@ use sqlparser::dialect::{ }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; use sqlparser::tokenizer::Tokenizer; +use sqlparser::tokenizer::{Location, Span}; use test_utils::{ all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, join, number, only, table, table_alias, table_from_name, TestedDialects, @@ -2933,6 +2933,31 @@ fn parse_window_function_null_treatment_arg() { ); } +#[test] +fn test_compound_expr() { + let supported_dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), + Box::new(BigQueryDialect {}), + ]); + let sqls = [ + "SELECT abc[1].f1 FROM t", + "SELECT abc[1].f1.f2 FROM t", + "SELECT f1.abc[1] FROM t", + "SELECT f1.f2.abc[1] FROM t", + "SELECT f1.abc[1].f2 FROM t", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT make_array(1, 2, 3)[1]", + "SELECT make_array(named_struct('a', 1))[1].a", + "SELECT abc[1][-1].a.b FROM t", + "SELECT abc[1][-1].a.b[1] FROM t", + ]; + for sql in sqls { + supported_dialects.verified_stmt(sql); + } +} + #[test] fn parse_negative_value() { let sql1 = "SELECT -1"; @@ -10137,20 +10162,39 @@ fn parse_map_access_expr() { Box::new(ClickHouseDialect {}), ]); let expr = dialects.verified_expr(sql); - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - MapAccessKey { - key: Expr::UnaryOp { + let expected = Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, - syntax: MapAccessSyntax::Bracket, - }, - MapAccessKey { - key: call("safe_offset", [Expr::Value(number("2"))]), - syntax: MapAccessSyntax::Bracket, - }, + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + uses_odbc_syntax: false, + }), + }), ], }; assert_eq!(expr, expected); @@ -10940,8 +10984,8 @@ fn test_map_syntax() { check( "MAP {'a': 10, 'b': 20}['a']", - Expr::Subscript { - expr: Box::new(Expr::Map(Map { + Expr::CompoundFieldAccess { + root: Box::new(Expr::Map(Map { entries: vec![ MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), @@ -10953,9 +10997,9 @@ fn test_map_syntax() { }, ], })), - subscript: Box::new(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(Value::SingleQuotedString("a".to_owned())), - }), + })], }, ); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index d441cd195..db4ffb6f6 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -630,8 +630,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Array(Array { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -639,9 +639,9 @@ fn test_array_index() { ], named: false })), - subscript: Box::new(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(number("3")) - }) + })] }, expr ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index aaf4e65db..557e70bff 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2095,11 +2095,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: num[0].clone() - }), + })], }, expr_from_projection(only(&select.projection)), ); @@ -2107,16 +2107,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), - }), - subscript: Box::new(Subscript::Index { - index: num[0].clone() - }), + AccessExpr::Subscript(Subscript::Index { + index: num[0].clone() + }) + ], }, expr_from_projection(only(&select.projection)), ); @@ -2124,29 +2124,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("bar"))), - subscript: Box::new(Subscript::Index { - index: num[0].clone() - }) + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("bar"))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: num[0].clone() }), - subscript: Box::new(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "baz".to_string(), quote_style: Some('"'), span: Span::empty(), }) - }) - }), - subscript: Box::new(Subscript::Index { - index: Expr::Identifier(Ident { - value: "fooz".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }) - }) + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Identifier(Ident { + value: "fooz".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }) + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2154,33 +2152,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], - named: true, - })], + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), - None - ))), + })], + named: true, + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), None - )), - format: None, - }))), - subscript: Box::new(Subscript::Index { + ))), + None + )), + format: None, + }))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { index: num[1].clone() }), - }), - subscript: Box::new(Subscript::Index { - index: num[2].clone() - }), + AccessExpr::Subscript(Subscript::Index { + index: num[2].clone() + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2269,9 +2267,13 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundFieldAccess { access_chain, .. } = pg_and_generic().verified_expr(sql) + else { panic!("expected subscript expr"); }; + let Some(AccessExpr::Subscript(subscript)) = access_chain.last() else { + panic!("expected subscript"); + }; assert_eq!(expect, *subscript); } @@ -2282,25 +2284,25 @@ fn parse_array_subscript() { fn parse_array_multi_subscript() { let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); assert_eq!( - Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(call( - "make_array", - vec![ - Expr::Value(number("1")), - Expr::Value(number("2")), - Expr::Value(number("3")) - ] - )), - subscript: Box::new(Subscript::Slice { + Expr::CompoundFieldAccess { + root: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), stride: None, }), - }), - subscript: Box::new(Subscript::Index { - index: Expr::Value(number("2")), - }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Value(number("2")), + }), + ], }, expr, );