From 87a8547730bee3f3e9b46adae61e8567c93fb22f Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:45:42 -0400 Subject: [PATCH 01/11] feat: diagnostic to more than one column error --- datafusion/expr/src/expr_fn.rs | 5 ++ datafusion/expr/src/expr_schema.rs | 1 + datafusion/expr/src/logical_plan/plan.rs | 10 ++- datafusion/expr/src/logical_plan/tree_node.rs | 2 + .../optimizer/src/analyzer/type_coercion.rs | 7 ++ datafusion/sql/src/expr/subquery.rs | 61 +++++++++++++- datafusion/sql/src/relation/mod.rs | 2 + datafusion/sql/tests/cases/diagnostic.rs | 79 +++++++++++++++++++ .../substrait/src/logical_plan/consumer.rs | 3 + 9 files changed, 166 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index a8e7fd76d037..0211a063bd86 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -252,6 +252,7 @@ pub fn exists(subquery: Arc) -> Expr { subquery: Subquery { subquery, outer_ref_columns, + spans: None, }, negated: false, }) @@ -264,6 +265,7 @@ pub fn not_exists(subquery: Arc) -> Expr { subquery: Subquery { subquery, outer_ref_columns, + spans: None, }, negated: true, }) @@ -277,6 +279,7 @@ pub fn in_subquery(expr: Expr, subquery: Arc) -> Expr { Subquery { subquery, outer_ref_columns, + spans: None, }, false, )) @@ -290,6 +293,7 @@ pub fn not_in_subquery(expr: Expr, subquery: Arc) -> Expr { Subquery { subquery, outer_ref_columns, + spans: None, }, true, )) @@ -301,6 +305,7 @@ pub fn scalar_subquery(subquery: Arc) -> Expr { Expr::ScalarSubquery(Subquery { subquery, outer_ref_columns, + spans: None, }) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 0a14cb5c60a0..1c59221f762d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -608,6 +608,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result { self.assert_no_expressions(expr)?; let input = self.only_input(inputs)?; @@ -948,6 +950,7 @@ impl LogicalPlan { Ok(LogicalPlan::Subquery(Subquery { subquery: Arc::new(subquery), outer_ref_columns: outer_ref_columns.clone(), + spans: spans.clone(), })) } LogicalPlan::SubqueryAlias(SubqueryAlias { alias, .. }) => { @@ -3617,6 +3620,8 @@ pub struct Subquery { pub subquery: Arc, /// The outer references used in the subquery pub outer_ref_columns: Vec, + /// Span information for subquery projection columns + pub spans: Option, } impl Normalizeable for Subquery { @@ -3651,6 +3656,7 @@ impl Subquery { Subquery { subquery: plan, outer_ref_columns: self.outer_ref_columns.clone(), + spans: None, } } } diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index dfc18c74c70a..87b49a3a2e1b 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -159,10 +159,12 @@ impl TreeNode for LogicalPlan { LogicalPlan::Subquery(Subquery { subquery, outer_ref_columns, + spans, }) => subquery.map_elements(f)?.update_data(|subquery| { LogicalPlan::Subquery(Subquery { subquery, outer_ref_columns, + spans, }) }), LogicalPlan::SubqueryAlias(SubqueryAlias { diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 538ef98ac7be..6d48ce3e80a2 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -311,12 +311,14 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { Expr::ScalarSubquery(Subquery { subquery, outer_ref_columns, + spans, }) => { let new_plan = analyze_internal(self.schema, Arc::unwrap_or_clone(subquery))?.data; Ok(Transformed::yes(Expr::ScalarSubquery(Subquery { subquery: Arc::new(new_plan), outer_ref_columns, + spans, }))) } Expr::Exists(Exists { subquery, negated }) => { @@ -329,6 +331,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { subquery: Subquery { subquery: Arc::new(new_plan), outer_ref_columns: subquery.outer_ref_columns, + spans: subquery.spans, }, negated, }))) @@ -352,6 +355,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { let new_subquery = Subquery { subquery: Arc::new(new_plan), outer_ref_columns: subquery.outer_ref_columns, + spans: subquery.spans, }; Ok(Transformed::yes(Expr::InSubquery(InSubquery::new( Box::new(expr.cast_to(&common_type, self.schema)?), @@ -2089,6 +2093,7 @@ mod test { Subquery { subquery: empty_int32, outer_ref_columns: vec![], + spans: None, }, false, )); @@ -2114,6 +2119,7 @@ mod test { Subquery { subquery: empty_int64, outer_ref_columns: vec![], + spans: None, }, false, )); @@ -2138,6 +2144,7 @@ mod test { Subquery { subquery: empty_inside, outer_ref_columns: vec![], + spans: None, }, false, )); diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 481f024787fe..4f2b6a6466f2 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -16,12 +16,12 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{DFSchema, Result}; +use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans}; use datafusion_expr::expr::Exists; use datafusion_expr::expr::InSubquery; use datafusion_expr::{Expr, Subquery}; use sqlparser::ast::Expr as SQLExpr; -use sqlparser::ast::Query; +use sqlparser::ast::{Query, SelectItem, SetExpr}; use std::sync::Arc; impl SqlToRel<'_, S> { @@ -41,6 +41,7 @@ impl SqlToRel<'_, S> { subquery: Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, + spans: None, }, negated, })) @@ -65,6 +66,7 @@ impl SqlToRel<'_, S> { Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, + spans: None, }, negated, ))) @@ -78,12 +80,67 @@ impl SqlToRel<'_, S> { ) -> Result { let old_outer_query_schema = planner_context.set_outer_query_schema(Some(input_schema.clone().into())); + let mut spans = Spans::new(); + if let SetExpr::Select(select) = subquery.body.as_ref() { + for item in &select.projection { + if let SelectItem::ExprWithAlias { alias, .. } = item { + if let Some(span) = Span::try_from_sqlparser_span(alias.span) { + spans.add_span(span); + } + } + } + } let sub_plan = self.query_to_plan(subquery, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); + if sub_plan.schema().fields().len() > 1 { + let fields = sub_plan.schema().fields(); + let error_message = format!( + "Scalar subquery should only return one column, but found {}: {}", + fields.len(), + sub_plan.schema().field_names().join(", ") + ); + + return plan_err!("{}", &error_message).map_err(|err| { + let spans_vec = spans.clone(); + let primary_span = spans_vec.first().clone(); + + let mut diagnostic = Diagnostic::new_error( + "Scalar subquery returns multiple columns", + primary_span, + ); + + let columns_info = format!( + "Found {} columns: {}", + fields.len(), + sub_plan.schema().field_names().join(", ") + ); + + if spans_vec.0.len() > 1 { + diagnostic.add_note(columns_info.clone(), primary_span); + for (i, span) in spans_vec.iter().skip(1).enumerate() { + diagnostic.add_note( + format!("Extra column {}", i + 1), + Some(span.clone()), + ); + } + } else { + diagnostic.add_note(columns_info, primary_span); + } + + diagnostic.add_help( + "Select only one column in the subquery or use a row constructor", + None, + ); + + err.with_diagnostic(diagnostic) + }); + } + Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, + spans: Some(spans), })) } } diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 800dd151a124..3160c71c78a5 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -211,6 +211,7 @@ impl SqlToRel<'_, S> { LogicalPlan::Subquery(Subquery { subquery: input, outer_ref_columns, + spans: None, }), alias, ) @@ -218,6 +219,7 @@ impl SqlToRel<'_, S> { plan => Ok(LogicalPlan::Subquery(Subquery { subquery: Arc::new(plan), outer_ref_columns, + spans: None, })), } } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index d70484f718c8..a44556eb4860 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -286,3 +286,82 @@ fn test_invalid_function() -> Result<()> { assert_eq!(diag.span, Some(spans["whole"])); Ok(()) } +#[test] +fn test_scalar_subquery_multiple_columns() -> Result<(), Box> { + let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col"; + let spans = get_spans(query); + let diag = do_query(query); + + assert_eq!(diag.message, "Scalar subquery returns multiple columns"); + + let column_notes: Vec<_> = diag + .notes + .iter() + .filter(|note| note.message.contains("Found 2 columns")) + .collect(); + assert!(!column_notes.is_empty(), "Expected note about column count"); + + let extra_column_notes: Vec<_> = diag + .notes + .iter() + .filter(|note| note.message.contains("Extra column")) + .collect(); + assert_eq!( + extra_column_notes.len(), + 1, + "Expected note about extra column" + ); + + let expected_spans = vec![ + ( + "x".to_string(), + Span::new( + Location { + line: 1, + column: 26, + }, + Location { + line: 1, + column: 27, + }, + ), + ), + ( + "y".to_string(), + Span::new( + Location { + line: 1, + column: 44, + }, + Location { + line: 1, + column: 45, + }, + ), + ), + ] + .into_iter() + .collect::>(); + + assert_eq!( + spans, expected_spans, + "Primary diagnostic span should match the first column's span" + ); + + let extra_column_span = extra_column_notes + .first() + .and_then(|note| note.span.as_ref()); + assert_eq!( + extra_column_span, + spans.get("y"), + "Extra column note span should match the second column's span" + ); + assert!( + diag.helps + .iter() + .any(|help| help.message.contains("Select only one column")), + "Expected help message" + ); + + Ok(()) +} diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index ffeff3e9df47..0e8c86690dd6 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -2257,6 +2257,7 @@ pub async fn from_subquery( subquery: Subquery { subquery: Arc::new(haystack_expr), outer_ref_columns: outer_refs, + spans: None, }, negated: false, })) @@ -2275,6 +2276,7 @@ pub async fn from_subquery( Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(plan), outer_ref_columns, + spans: None, })) } SubqueryType::SetPredicate(predicate) => { @@ -2290,6 +2292,7 @@ pub async fn from_subquery( Subquery { subquery: Arc::new(plan), outer_ref_columns, + spans: None, }, false, ))) From 363a94fa40e0fc7160df5d807e27a59a3b7c3ac4 Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Mon, 10 Mar 2025 19:57:21 -0400 Subject: [PATCH 02/11] test: improve test case --- datafusion/sql/tests/cases/diagnostic.rs | 68 ++++++++---------------- 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index a44556eb4860..3fa8de45f50a 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -286,6 +286,7 @@ fn test_invalid_function() -> Result<()> { assert_eq!(diag.span, Some(spans["whole"])); Ok(()) } + #[test] fn test_scalar_subquery_multiple_columns() -> Result<(), Box> { let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col"; @@ -294,12 +295,15 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box = diag + let column_count_notes: Vec<_> = diag .notes .iter() .filter(|note| note.message.contains("Found 2 columns")) .collect(); - assert!(!column_notes.is_empty(), "Expected note about column count"); + assert!( + !column_count_notes.is_empty(), + "Expected note about column count" + ); let extra_column_notes: Vec<_> = diag .notes @@ -309,53 +313,27 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box>(); - - assert_eq!( - spans, expected_spans, - "Primary diagnostic span should match the first column's span" - ); + if let Some(first_note) = diag.notes.first() { + assert_eq!( + first_note.span, + Some(spans["x"]), + "Primary diagnostic span should match the first column's span" + ); + } - let extra_column_span = extra_column_notes + if let Some(extra_column_span) = extra_column_notes .first() - .and_then(|note| note.span.as_ref()); - assert_eq!( - extra_column_span, - spans.get("y"), - "Extra column note span should match the second column's span" - ); + .and_then(|note| note.span.as_ref()) + { + assert_eq!( + *extra_column_span, spans["y"], + "Extra column note span should match the second column's span" + ); + } + assert!( diag.helps .iter() From bab6dd6b4c0831bf0b7047f4e3604eb61b85623f Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:47:03 -0400 Subject: [PATCH 03/11] feat: multiple "in" queries diagnostic --- datafusion/sql/src/expr/subquery.rs | 64 ++++++++++++++++++++++-- datafusion/sql/tests/cases/diagnostic.rs | 58 +++++++++++++++++++++ 2 files changed, 119 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 4f2b6a6466f2..4ac410701b35 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -57,16 +57,74 @@ impl SqlToRel<'_, S> { ) -> Result { let old_outer_query_schema = planner_context.set_outer_query_schema(Some(input_schema.clone().into())); + + let mut spans = Spans::new(); + if let SetExpr::Select(select) = subquery.body.as_ref() { + for item in &select.projection { + if let SelectItem::UnnamedExpr(sql_expr) = item { + if let SQLExpr::Identifier(ident) = sql_expr { + if let Some(span) = Span::try_from_sqlparser_span(ident.span) { + spans.add_span(span); + } + } + } + } + } + let sub_plan = self.query_to_plan(subquery, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); - let expr = Box::new(self.sql_to_expr(expr, input_schema, planner_context)?); + + let sub_schema = sub_plan.schema(); + let expr_obj = self.sql_to_expr(expr.clone(), input_schema, planner_context)?; + + if sub_schema.fields().len() > 1 { + let fields = sub_schema.fields(); + let error_message = format!( + "IN subquery should only return one column, but found {}: {}", + fields.len(), + sub_schema.field_names().join(", ") + ); + + return plan_err!("{}", &error_message).map_err(|err| { + let spans_vec = spans.clone(); + let primary_span = spans_vec.first().clone(); + + let mut diagnostic = Diagnostic::new_error( + "IN subquery returns multiple columns", + primary_span, + ); + + let columns_info = format!( + "Found {} columns: {}", + fields.len(), + sub_schema.field_names().join(", ") + ); + + if spans_vec.0.len() > 1 { + diagnostic.add_note(columns_info.clone(), primary_span); + for (i, span) in spans_vec.iter().skip(1).enumerate() { + diagnostic.add_note( + format!("Extra column {}", i + 1), + Some(span.clone()), + ); + } + } else { + diagnostic.add_note(columns_info, primary_span); + } + + diagnostic.add_help("Select only one column in the IN subquery", None); + + err.with_diagnostic(diagnostic) + }); + } + Ok(Expr::InSubquery(InSubquery::new( - expr, + Box::new(expr_obj), Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: None, + spans: Some(spans), }, negated, ))) diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 3fa8de45f50a..614010dff720 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -343,3 +343,61 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box Result<(), Box> { + // This query uses an IN subquery with multiple columns - this should trigger an error + let query = "SELECT * FROM person WHERE id IN (SELECT /*id*/id/*id*/, /*first*/first_name/*first*/ FROM person)"; + let spans = get_spans(query); + let diag = do_query(query); + + assert_eq!(diag.message, "IN subquery returns multiple columns"); + + let column_count_notes: Vec<_> = diag + .notes + .iter() + .filter(|note| note.message.contains("Found 2 columns")) + .collect(); + assert!( + !column_count_notes.is_empty(), + "Expected note about column count" + ); + + let extra_column_notes: Vec<_> = diag + .notes + .iter() + .filter(|note| note.message.contains("Extra column")) + .collect(); + assert_eq!( + extra_column_notes.len(), + 1, + "Expected one note about extra column" + ); + + if let Some(first_note) = diag.notes.first() { + assert_eq!( + first_note.span, + Some(spans["id"]), + "Primary diagnostic span should match the first column's span" + ); + } + + if let Some(extra_column_span) = extra_column_notes + .first() + .and_then(|note| note.span.as_ref()) + { + assert_eq!( + *extra_column_span, spans["first"], + "Extra column note span should match the second column's span" + ); + } + + assert!( + diag.helps + .iter() + .any(|help| help.message.contains("Select only one column")), + "Expected help message" + ); + + Ok(()) +} From bb7c75df324b19a98cb73a18394c6dff4c268308 Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Mon, 10 Mar 2025 22:12:32 -0400 Subject: [PATCH 04/11] refactor: reuse code in subquery.rs --- datafusion/sql/src/expr/subquery.rs | 167 ++++++++++------------- datafusion/sql/tests/cases/diagnostic.rs | 7 +- 2 files changed, 80 insertions(+), 94 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 4ac410701b35..45b37cbff826 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -17,9 +17,8 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans}; -use datafusion_expr::expr::Exists; -use datafusion_expr::expr::InSubquery; -use datafusion_expr::{Expr, Subquery}; +use datafusion_expr::expr::{Exists, InSubquery}; +use datafusion_expr::{Expr, LogicalPlan, Subquery}; use sqlparser::ast::Expr as SQLExpr; use sqlparser::ast::{Query, SelectItem, SetExpr}; use std::sync::Arc; @@ -61,11 +60,9 @@ impl SqlToRel<'_, S> { let mut spans = Spans::new(); if let SetExpr::Select(select) = subquery.body.as_ref() { for item in &select.projection { - if let SelectItem::UnnamedExpr(sql_expr) = item { - if let SQLExpr::Identifier(ident) = sql_expr { - if let Some(span) = Span::try_from_sqlparser_span(ident.span) { - spans.add_span(span); - } + if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item { + if let Some(span) = Span::try_from_sqlparser_span(ident.span) { + spans.add_span(span); } } } @@ -75,49 +72,14 @@ impl SqlToRel<'_, S> { let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); - let sub_schema = sub_plan.schema(); - let expr_obj = self.sql_to_expr(expr.clone(), input_schema, planner_context)?; - - if sub_schema.fields().len() > 1 { - let fields = sub_schema.fields(); - let error_message = format!( - "IN subquery should only return one column, but found {}: {}", - fields.len(), - sub_schema.field_names().join(", ") - ); - - return plan_err!("{}", &error_message).map_err(|err| { - let spans_vec = spans.clone(); - let primary_span = spans_vec.first().clone(); - - let mut diagnostic = Diagnostic::new_error( - "IN subquery returns multiple columns", - primary_span, - ); + self.validate_single_column( + &sub_plan, + spans.clone(), + "IN subquery should only return one column", + "Select only one column in the IN subquery", + )?; - let columns_info = format!( - "Found {} columns: {}", - fields.len(), - sub_schema.field_names().join(", ") - ); - - if spans_vec.0.len() > 1 { - diagnostic.add_note(columns_info.clone(), primary_span); - for (i, span) in spans_vec.iter().skip(1).enumerate() { - diagnostic.add_note( - format!("Extra column {}", i + 1), - Some(span.clone()), - ); - } - } else { - diagnostic.add_note(columns_info, primary_span); - } - - diagnostic.add_help("Select only one column in the IN subquery", None); - - err.with_diagnostic(diagnostic) - }); - } + let expr_obj = self.sql_to_expr(expr, input_schema, planner_context)?; Ok(Expr::InSubquery(InSubquery::new( Box::new(expr_obj), @@ -151,49 +113,13 @@ impl SqlToRel<'_, S> { let sub_plan = self.query_to_plan(subquery, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); - if sub_plan.schema().fields().len() > 1 { - let fields = sub_plan.schema().fields(); - let error_message = format!( - "Scalar subquery should only return one column, but found {}: {}", - fields.len(), - sub_plan.schema().field_names().join(", ") - ); - - return plan_err!("{}", &error_message).map_err(|err| { - let spans_vec = spans.clone(); - let primary_span = spans_vec.first().clone(); - - let mut diagnostic = Diagnostic::new_error( - "Scalar subquery returns multiple columns", - primary_span, - ); - - let columns_info = format!( - "Found {} columns: {}", - fields.len(), - sub_plan.schema().field_names().join(", ") - ); - if spans_vec.0.len() > 1 { - diagnostic.add_note(columns_info.clone(), primary_span); - for (i, span) in spans_vec.iter().skip(1).enumerate() { - diagnostic.add_note( - format!("Extra column {}", i + 1), - Some(span.clone()), - ); - } - } else { - diagnostic.add_note(columns_info, primary_span); - } - - diagnostic.add_help( - "Select only one column in the subquery or use a row constructor", - None, - ); - - err.with_diagnostic(diagnostic) - }); - } + self.validate_single_column( + &sub_plan, + spans.clone(), + "Scalar subquery should only return one column", + "Select only one column in the subquery or use a row constructor", + )?; Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(sub_plan), @@ -201,4 +127,61 @@ impl SqlToRel<'_, S> { spans: Some(spans), })) } + + fn validate_single_column( + &self, + sub_plan: &LogicalPlan, + spans: Spans, + error_message: &str, + help_message: &str, + ) -> Result<()> { + if sub_plan.schema().fields().len() > 1 { + let sub_schema = sub_plan.schema(); + let fields = sub_schema.fields(); + let field_names = sub_schema.field_names(); + + plan_err!("{}: {}", error_message, field_names.join(", ")).map_err(|err| { + let diagnostic = self.build_multi_column_diagnostic( + fields.len(), + &field_names, + spans, + error_message, + help_message, + ); + err.with_diagnostic(diagnostic) + }) + } else { + Ok(()) + } + } + + fn build_multi_column_diagnostic( + &self, + column_count: usize, + column_names: &[String], + spans: Spans, + error_message: &str, + help_message: &str, + ) -> Diagnostic { + let primary_span = spans.first().clone(); + let columns_info = format!( + "Found {} columns: {}", + column_count, + column_names.join(", ") + ); + + let mut diagnostic = Diagnostic::new_error(error_message, primary_span); + if spans.0.len() > 1 { + diagnostic.add_note(columns_info, primary_span); + for (i, span) in spans.iter().skip(1).enumerate() { + diagnostic + .add_note(format!("Extra column {}", i + 1), Some(span.clone())); + } + } else { + diagnostic.add_note(columns_info, primary_span); + } + + diagnostic.add_help(help_message, None); + diagnostic + } } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 614010dff720..6ba81fe0b717 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -293,7 +293,10 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box = diag .notes @@ -351,7 +354,7 @@ fn test_in_subquery_multiple_columns() -> Result<(), Box> let spans = get_spans(query); let diag = do_query(query); - assert_eq!(diag.message, "IN subquery returns multiple columns"); + assert_eq!(diag.message, "IN subquery should only return one column"); let column_count_notes: Vec<_> = diag .notes From be4e13c92f01f39e0f58b95f281be679110430d5 Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Tue, 11 Mar 2025 00:22:32 -0400 Subject: [PATCH 05/11] chore: clearer note message --- datafusion/sql/src/expr/subquery.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 45b37cbff826..a095298e0a21 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -118,7 +118,7 @@ impl SqlToRel<'_, S> { &sub_plan, spans.clone(), "Scalar subquery should only return one column", - "Select only one column in the subquery or use a row constructor", + "Select only one column in the subquery", )?; Ok(Expr::ScalarSubquery(Subquery { From 7a3f573a91878b57a0f509e45ff1370dfae81c70 Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Tue, 11 Mar 2025 22:47:45 -0400 Subject: [PATCH 06/11] fix: use plain Spans --- datafusion/expr/src/expr_fn.rs | 12 +++++++----- datafusion/expr/src/expr_schema.rs | 4 ++-- datafusion/expr/src/logical_plan/plan.rs | 8 ++++---- datafusion/sql/src/expr/subquery.rs | 8 ++++---- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 0211a063bd86..d0f809e8f851 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -37,7 +37,9 @@ use arrow::compute::kernels::cast_utils::{ parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month, }; use arrow::datatypes::{DataType, Field}; -use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; +use datafusion_common::{ + plan_err, Column, Result, ScalarValue, Span, Spans, TableReference, +}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use sqlparser::ast::NullTreatment; @@ -252,7 +254,7 @@ pub fn exists(subquery: Arc) -> Expr { subquery: Subquery { subquery, outer_ref_columns, - spans: None, + spans: Spans::new(), }, negated: false, }) @@ -265,7 +267,7 @@ pub fn not_exists(subquery: Arc) -> Expr { subquery: Subquery { subquery, outer_ref_columns, - spans: None, + spans: Spans::new(), }, negated: true, }) @@ -279,7 +281,7 @@ pub fn in_subquery(expr: Expr, subquery: Arc) -> Expr { Subquery { subquery, outer_ref_columns, - spans: None, + spans: Spans::new(), }, false, )) @@ -305,7 +307,7 @@ pub fn scalar_subquery(subquery: Arc) -> Expr { Expr::ScalarSubquery(Subquery { subquery, outer_ref_columns, - spans: None, + spans: Spans::new(), }) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 1c59221f762d..ae480a3c8b59 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -30,7 +30,7 @@ use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field}; use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, - Result, TableReference, + Result, Spans, TableReference, }; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use datafusion_functions_window_common::field::WindowUDFFieldArgs; @@ -608,7 +608,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result, /// Span information for subquery projection columns - pub spans: Option, + pub spans: Spans, } impl Normalizeable for Subquery { @@ -3656,7 +3656,7 @@ impl Subquery { Subquery { subquery: plan, outer_ref_columns: self.outer_ref_columns.clone(), - spans: None, + spans: Spans::new(), } } } diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index a095298e0a21..9c83dc5bd627 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -40,7 +40,7 @@ impl SqlToRel<'_, S> { subquery: Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: None, + spans: Spans::new(), }, negated, })) @@ -86,7 +86,7 @@ impl SqlToRel<'_, S> { Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: Some(spans), + spans: spans, }, negated, ))) @@ -117,14 +117,14 @@ impl SqlToRel<'_, S> { self.validate_single_column( &sub_plan, spans.clone(), - "Scalar subquery should only return one column", + "The subquery should only return one column", "Select only one column in the subquery", )?; Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: Some(spans), + spans: spans, })) } From 7b474343a87ee741bd3c73c932e9ef337ef3046c Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Wed, 12 Mar 2025 00:15:49 -0400 Subject: [PATCH 07/11] fix: better error messages --- datafusion/expr/src/expr_fn.rs | 6 +- datafusion/expr/src/logical_plan/plan.rs | 4 +- .../optimizer/src/analyzer/type_coercion.rs | 8 +- datafusion/sql/src/expr/subquery.rs | 38 +++--- datafusion/sql/src/relation/mod.rs | 6 +- datafusion/sql/tests/cases/diagnostic.rs | 114 +++++------------- .../substrait/src/logical_plan/consumer.rs | 9 +- 7 files changed, 63 insertions(+), 122 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index d0f809e8f851..c08738c183e4 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -37,9 +37,7 @@ use arrow::compute::kernels::cast_utils::{ parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month, }; use arrow::datatypes::{DataType, Field}; -use datafusion_common::{ - plan_err, Column, Result, ScalarValue, Span, Spans, TableReference, -}; +use datafusion_common::{plan_err, Column, Result, ScalarValue, Spans, TableReference}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use sqlparser::ast::NullTreatment; @@ -295,7 +293,7 @@ pub fn not_in_subquery(expr: Expr, subquery: Arc) -> Expr { Subquery { subquery, outer_ref_columns, - spans: None, + spans: Spans::new(), }, true, )) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index fcaa84951e88..931b7fcbdb41 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -56,8 +56,8 @@ use datafusion_common::tree_node::{ use datafusion_common::{ aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence, - FunctionalDependencies, ParamValues, Result, ScalarValue, Span, Spans, - TableReference, UnnestOptions, + FunctionalDependencies, ParamValues, Result, ScalarValue, Spans, TableReference, + UnnestOptions, }; use indexmap::IndexSet; diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 6d48ce3e80a2..c9c0b7a3b789 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1053,7 +1053,7 @@ mod test { use crate::test::{assert_analyzed_plan_eq, assert_analyzed_plan_with_config_eq}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TransformedResult, TreeNode}; - use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue}; + use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue, Spans}; use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction}; use datafusion_expr::logical_plan::{EmptyRelation, Projection, Sort}; use datafusion_expr::test::function_stub::avg_udaf; @@ -2093,7 +2093,7 @@ mod test { Subquery { subquery: empty_int32, outer_ref_columns: vec![], - spans: None, + spans: Spans::new(), }, false, )); @@ -2119,7 +2119,7 @@ mod test { Subquery { subquery: empty_int64, outer_ref_columns: vec![], - spans: None, + spans: Spans::new(), }, false, )); @@ -2144,7 +2144,7 @@ mod test { Subquery { subquery: empty_inside, outer_ref_columns: vec![], - spans: None, + spans: Spans::new(), }, false, )); diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 9c83dc5bd627..152080b3ca75 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -75,7 +75,7 @@ impl SqlToRel<'_, S> { self.validate_single_column( &sub_plan, spans.clone(), - "IN subquery should only return one column", + "Too many columns! The IN subquery should only return one column", "Select only one column in the IN subquery", )?; @@ -117,7 +117,7 @@ impl SqlToRel<'_, S> { self.validate_single_column( &sub_plan, spans.clone(), - "The subquery should only return one column", + "Too many columns! The subquery should only return one column", "Select only one column in the subquery", )?; @@ -137,13 +137,10 @@ impl SqlToRel<'_, S> { ) -> Result<()> { if sub_plan.schema().fields().len() > 1 { let sub_schema = sub_plan.schema(); - let fields = sub_schema.fields(); let field_names = sub_schema.field_names(); plan_err!("{}: {}", error_message, field_names.join(", ")).map_err(|err| { let diagnostic = self.build_multi_column_diagnostic( - fields.len(), - &field_names, spans, error_message, help_message, @@ -157,30 +154,25 @@ impl SqlToRel<'_, S> { fn build_multi_column_diagnostic( &self, - column_count: usize, - column_names: &[String], spans: Spans, error_message: &str, help_message: &str, ) -> Diagnostic { - let primary_span = spans.first().clone(); - let columns_info = format!( - "Found {} columns: {}", - column_count, - column_names.join(", ") - ); - - let mut diagnostic = Diagnostic::new_error(error_message, primary_span); - if spans.0.len() > 1 { - diagnostic.add_note(columns_info, primary_span); - for (i, span) in spans.iter().skip(1).enumerate() { - diagnostic - .add_note(format!("Extra column {}", i + 1), Some(span.clone())); - } + // Create a span that encompasses all columns if possible + let full_span = if spans.0.len() > 1 { + spans.0.first().and_then(|first_span| { + spans.0.last().map(|last_span| Span { + start: first_span.start, + end: last_span.end, + }) + }) } else { - diagnostic.add_note(columns_info, primary_span); + spans.first() + }; + let mut diagnostic = Diagnostic::new_error(error_message, full_span); + for (i, span) in spans.iter().skip(1).enumerate() { + diagnostic.add_note(format!("Extra column {}", i + 1), Some(span.clone())); } - diagnostic.add_help(help_message, None); diagnostic } diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 3160c71c78a5..8078261d9152 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -21,7 +21,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{ - not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, TableReference, + not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, Spans, TableReference, }; use datafusion_expr::builder::subquery_alias; use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder}; @@ -211,7 +211,7 @@ impl SqlToRel<'_, S> { LogicalPlan::Subquery(Subquery { subquery: input, outer_ref_columns, - spans: None, + spans: Spans::new(), }), alias, ) @@ -219,7 +219,7 @@ impl SqlToRel<'_, S> { plan => Ok(LogicalPlan::Subquery(Subquery { subquery: Arc::new(plan), outer_ref_columns, - spans: None, + spans: Spans::new(), })), } } diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index 6ba81fe0b717..c350fea28769 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -286,7 +286,6 @@ fn test_invalid_function() -> Result<()> { assert_eq!(diag.span, Some(spans["whole"])); Ok(()) } - #[test] fn test_scalar_subquery_multiple_columns() -> Result<(), Box> { let query = "SELECT (SELECT 1 AS /*x*/x/*x*/, 2 AS /*y*/y/*y*/) AS col"; @@ -295,53 +294,27 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box = diag - .notes - .iter() - .filter(|note| note.message.contains("Found 2 columns")) - .collect(); - assert!( - !column_count_notes.is_empty(), - "Expected note about column count" + "Too many columns! The subquery should only return one column" ); - let extra_column_notes: Vec<_> = diag - .notes - .iter() - .filter(|note| note.message.contains("Extra column")) - .collect(); + let expected_span = Some(Span { + start: spans["x"].start, + end: spans["y"].end, + }); + assert_eq!(diag.span, expected_span); assert_eq!( - extra_column_notes.len(), - 1, - "Expected one note about extra column" + diag.notes + .iter() + .map(|n| (n.message.as_str(), n.span)) + .collect::>(), + vec![("Extra column 1", Some(spans["y"]))] ); - - if let Some(first_note) = diag.notes.first() { - assert_eq!( - first_note.span, - Some(spans["x"]), - "Primary diagnostic span should match the first column's span" - ); - } - - if let Some(extra_column_span) = extra_column_notes - .first() - .and_then(|note| note.span.as_ref()) - { - assert_eq!( - *extra_column_span, spans["y"], - "Extra column note span should match the second column's span" - ); - } - - assert!( + assert_eq!( diag.helps .iter() - .any(|help| help.message.contains("Select only one column")), - "Expected help message" + .map(|h| h.message.as_str()) + .collect::>(), + vec!["Select only one column in the subquery"] ); Ok(()) @@ -354,52 +327,29 @@ fn test_in_subquery_multiple_columns() -> Result<(), Box> let spans = get_spans(query); let diag = do_query(query); - assert_eq!(diag.message, "IN subquery should only return one column"); - - let column_count_notes: Vec<_> = diag - .notes - .iter() - .filter(|note| note.message.contains("Found 2 columns")) - .collect(); - assert!( - !column_count_notes.is_empty(), - "Expected note about column count" + assert_eq!( + diag.message, + "Too many columns! The IN subquery should only return one column" ); - let extra_column_notes: Vec<_> = diag - .notes - .iter() - .filter(|note| note.message.contains("Extra column")) - .collect(); + let expected_span = Some(Span { + start: spans["id"].start, + end: spans["first"].end, + }); + assert_eq!(diag.span, expected_span); assert_eq!( - extra_column_notes.len(), - 1, - "Expected one note about extra column" + diag.notes + .iter() + .map(|n| (n.message.as_str(), n.span)) + .collect::>(), + vec![("Extra column 1", Some(spans["first"]))] ); - - if let Some(first_note) = diag.notes.first() { - assert_eq!( - first_note.span, - Some(spans["id"]), - "Primary diagnostic span should match the first column's span" - ); - } - - if let Some(extra_column_span) = extra_column_notes - .first() - .and_then(|note| note.span.as_ref()) - { - assert_eq!( - *extra_column_span, spans["first"], - "Extra column note span should match the second column's span" - ); - } - - assert!( + assert_eq!( diag.helps .iter() - .any(|help| help.message.contains("Select only one column")), - "Expected help message" + .map(|h| h.message.as_str()) + .collect::>(), + vec!["Select only one column in the IN subquery"] ); Ok(()) diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 0e8c86690dd6..d62ba7043d7d 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -24,7 +24,8 @@ use datafusion::arrow::datatypes::{ }; use datafusion::common::{ not_impl_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, - substrait_datafusion_err, substrait_err, DFSchema, DFSchemaRef, TableReference, + substrait_datafusion_err, substrait_err, DFSchema, DFSchemaRef, Spans, + TableReference, }; use datafusion::datasource::provider_as_source; use datafusion::logical_expr::expr::{Exists, InSubquery, Sort, WindowFunctionParams}; @@ -2257,7 +2258,7 @@ pub async fn from_subquery( subquery: Subquery { subquery: Arc::new(haystack_expr), outer_ref_columns: outer_refs, - spans: None, + spans: Spans::new(), }, negated: false, })) @@ -2276,7 +2277,7 @@ pub async fn from_subquery( Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(plan), outer_ref_columns, - spans: None, + spans: Spans::new(), })) } SubqueryType::SetPredicate(predicate) => { @@ -2292,7 +2293,7 @@ pub async fn from_subquery( Subquery { subquery: Arc::new(plan), outer_ref_columns, - spans: None, + spans: Spans::new(), }, false, ))) From ca53574f5d3512008280dba3eb48524d12838ce3 Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Wed, 12 Mar 2025 00:19:55 -0400 Subject: [PATCH 08/11] fix: remove the specific subquery type --- datafusion/sql/src/expr/subquery.rs | 4 ++-- datafusion/sql/tests/cases/diagnostic.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 152080b3ca75..df81436ee775 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -75,8 +75,8 @@ impl SqlToRel<'_, S> { self.validate_single_column( &sub_plan, spans.clone(), - "Too many columns! The IN subquery should only return one column", - "Select only one column in the IN subquery", + "Too many columns! The subquery should only return one column", + "Select only one column in the subquery", )?; let expr_obj = self.sql_to_expr(expr, input_schema, planner_context)?; diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index c350fea28769..5481f046e70a 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -329,7 +329,7 @@ fn test_in_subquery_multiple_columns() -> Result<(), Box> assert_eq!( diag.message, - "Too many columns! The IN subquery should only return one column" + "Too many columns! The subquery should only return one column" ); let expected_span = Some(Span { @@ -349,7 +349,7 @@ fn test_in_subquery_multiple_columns() -> Result<(), Box> .iter() .map(|h| h.message.as_str()) .collect::>(), - vec!["Select only one column in the IN subquery"] + vec!["Select only one column in the subquery"] ); Ok(()) From 0f9a52ed725dd77373f2aa8f41d29a109ceba7ad Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Wed, 12 Mar 2025 01:10:19 -0400 Subject: [PATCH 09/11] fix: use iter_union to create full_span --- datafusion/sql/src/expr/subquery.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index df81436ee775..b8dedcc7431a 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -158,21 +158,13 @@ impl SqlToRel<'_, S> { error_message: &str, help_message: &str, ) -> Diagnostic { - // Create a span that encompasses all columns if possible - let full_span = if spans.0.len() > 1 { - spans.0.first().and_then(|first_span| { - spans.0.last().map(|last_span| Span { - start: first_span.start, - end: last_span.end, - }) - }) - } else { - spans.first() - }; + let full_span = Span::union_iter(spans.0.iter().cloned()); let mut diagnostic = Diagnostic::new_error(error_message, full_span); + for (i, span) in spans.iter().skip(1).enumerate() { diagnostic.add_note(format!("Extra column {}", i + 1), Some(span.clone())); } + diagnostic.add_help(help_message, None); diagnostic } From fc7315fc020e7d0d105ff19c1a4d2bff226cd60b Mon Sep 17 00:00:00 2001 From: changsun20 <110759360+changsun20@users.noreply.github.com> Date: Wed, 12 Mar 2025 11:49:13 -0400 Subject: [PATCH 10/11] fix: CI fail problems --- datafusion/optimizer/src/scalar_subquery_to_join.rs | 8 ++------ datafusion/sql/src/expr/subquery.rs | 6 +++--- datafusion/sqllogictest/test_files/subquery.slt | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 3a8aef267be5..5ca215619c10 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -731,9 +731,7 @@ mod tests { .project(vec![col("customer.c_custkey")])? .build()?; - let expected = "Invalid (non-executable) plan after Analyzer\ - \ncaused by\ - \nError during planning: Scalar subquery should only return one column"; + let expected = "Error during planning: Too many columns! The subquery should only return one column"; assert_analyzer_check_err(vec![], plan, expected); Ok(()) } @@ -793,9 +791,7 @@ mod tests { .project(vec![col("customer.c_custkey")])? .build()?; - let expected = "Invalid (non-executable) plan after Analyzer\ - \ncaused by\ - \nError during planning: Scalar subquery should only return one column"; + let expected = "Error during planning: Too many columns! The subquery should only return one column"; assert_analyzer_check_err(vec![], plan, expected); Ok(()) } diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index b8dedcc7431a..225c5d74c2ab 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -86,7 +86,7 @@ impl SqlToRel<'_, S> { Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: spans, + spans, }, negated, ))) @@ -124,7 +124,7 @@ impl SqlToRel<'_, S> { Ok(Expr::ScalarSubquery(Subquery { subquery: Arc::new(sub_plan), outer_ref_columns, - spans: spans, + spans, })) } @@ -162,7 +162,7 @@ impl SqlToRel<'_, S> { let mut diagnostic = Diagnostic::new_error(error_message, full_span); for (i, span) in spans.iter().skip(1).enumerate() { - diagnostic.add_note(format!("Extra column {}", i + 1), Some(span.clone())); + diagnostic.add_note(format!("Extra column {}", i + 1), Some(*span)); } diagnostic.add_help(help_message, None); diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 207bb72fd549..adc0b06dea34 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -438,7 +438,7 @@ logical_plan 08)----------TableScan: t1 projection=[t1_int] #invalid_scalar_subquery -statement error DataFusion error: Invalid \(non-executable\) plan after Analyzer\ncaused by\nError during planning: Scalar subquery should only return one column, but found 2: t2.t2_id, t2.t2_name +statement error DataFusion error: Error during planning: Too many columns! The subquery should only return one column: t2.t2_id, t2.t2_name SELECT t1_id, t1_name, t1_int, (select t2_id, t2_name FROM t2 WHERE t2.t2_id = t1.t1_int) FROM t1 #subquery_not_allowed From ddcf91370939db99a4e6068b4e1056ec202a6b36 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 13 Mar 2025 16:25:43 -0400 Subject: [PATCH 11/11] Update expected error message --- datafusion/optimizer/src/scalar_subquery_to_join.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 5ca215619c10..33f10400d341 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -731,7 +731,7 @@ mod tests { .project(vec![col("customer.c_custkey")])? .build()?; - let expected = "Error during planning: Too many columns! The subquery should only return one column"; + let expected = "Error during planning: Scalar subquery should only return one column, but found 4: orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice"; assert_analyzer_check_err(vec![], plan, expected); Ok(()) } @@ -791,7 +791,7 @@ mod tests { .project(vec![col("customer.c_custkey")])? .build()?; - let expected = "Error during planning: Too many columns! The subquery should only return one column"; + let expected = "Error during planning: Scalar subquery should only return one column, but found 2: orders.o_custkey, orders.o_orderkey"; assert_analyzer_check_err(vec![], plan, expected); Ok(()) }