diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index 4bc3300b75747..5bbe85705ac18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -87,11 +87,11 @@ object CTESubstitution extends Rule[LogicalPlan] { LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { case LegacyBehaviorPolicy.EXCEPTION => assertNoNameConflictsInCTE(plan) - traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs) + traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs, None) case LegacyBehaviorPolicy.LEGACY => (legacyTraverseAndSubstituteCTE(plan, cteDefs), None) case LegacyBehaviorPolicy.CORRECTED => - traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs) + traverseAndSubstituteCTE(plan, forceInline, Seq.empty, cteDefs, None) } if (cteDefs.isEmpty) { substituted @@ -162,7 +162,7 @@ object CTESubstitution extends Rule[LogicalPlan] { messageParameters = Map.empty) } val resolvedCTERelations = resolveCTERelations(relations, isLegacy = true, - forceInline = false, Seq.empty, cteDefs, allowRecursion) + forceInline = false, Seq.empty, cteDefs, None, allowRecursion) substituteCTE(child, alwaysInline = true, resolvedCTERelations, None) } } @@ -202,6 +202,8 @@ object CTESubstitution extends Rule[LogicalPlan] { * @param forceInline always inline the CTE relations if this is true * @param outerCTEDefs already resolved outer CTE definitions with names * @param cteDefs all accumulated CTE definitions + * @param recursiveCTERelationAncestor contains information of whether we are in a recursive CTE, + * as well as what CTE that is. * @return the plan where CTE substitution is applied and optionally the last substituted `With` * where CTE definitions will be gathered to */ @@ -209,7 +211,9 @@ object CTESubstitution extends Rule[LogicalPlan] { plan: LogicalPlan, forceInline: Boolean, outerCTEDefs: Seq[(String, CTERelationDef)], - cteDefs: ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = { + cteDefs: ArrayBuffer[CTERelationDef], + recursiveCTERelationAncestor: Option[(String, CTERelationDef)] + ): (LogicalPlan, Option[LogicalPlan]) = { var firstSubstituted: Option[LogicalPlan] = None val newPlan = plan.resolveOperatorsDownWithPruning( _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) { @@ -220,18 +224,31 @@ object CTESubstitution extends Rule[LogicalPlan] { errorClass = "RECURSIVE_CTE_WHEN_INLINING_IS_FORCED", messageParameters = Map.empty) } - val resolvedCTERelations = + + val tempCteDefs = ArrayBuffer.empty[CTERelationDef] + val resolvedCTERelations = if (recursiveCTERelationAncestor.isDefined) { + resolveCTERelations(relations, isLegacy = false, forceInline = false, outerCTEDefs, + tempCteDefs, recursiveCTERelationAncestor, allowRecursion) ++ outerCTEDefs + } else { resolveCTERelations(relations, isLegacy = false, forceInline, outerCTEDefs, cteDefs, - allowRecursion) ++ outerCTEDefs + recursiveCTERelationAncestor, allowRecursion) ++ outerCTEDefs + } val substituted = substituteCTE( - traverseAndSubstituteCTE(child, forceInline, resolvedCTERelations, cteDefs)._1, + traverseAndSubstituteCTE(child, forceInline, resolvedCTERelations, cteDefs, + recursiveCTERelationAncestor)._1, + // If we are resolving CTEs in a recursive CTE, we need to inline it in case the + // CTE contains the self reference. forceInline, resolvedCTERelations, None) if (firstSubstituted.isEmpty) { firstSubstituted = Some(substituted) } - substituted + if (recursiveCTERelationAncestor.isDefined) { + withCTEDefs(substituted, tempCteDefs.toSeq) + } else { + substituted + } case other => other.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) { @@ -247,6 +264,7 @@ object CTESubstitution extends Rule[LogicalPlan] { forceInline: Boolean, outerCTEDefs: Seq[(String, CTERelationDef)], cteDefs: ArrayBuffer[CTERelationDef], + recursiveCTERelationAncestor: Option[(String, CTERelationDef)], allowRecursion: Boolean): Seq[(String, CTERelationDef)] = { val alwaysInline = isLegacy || forceInline var resolvedCTERelations = if (alwaysInline) { @@ -255,6 +273,21 @@ object CTESubstitution extends Rule[LogicalPlan] { outerCTEDefs } for ((name, relation) <- relations) { + // If recursion is allowed (RECURSIVE keyword specified) + // then it has higher priority than outer or previous relations. + // Therefore, we construct a `CTERelationDef` for the current relation. + // Later if we encounter unresolved relation which we need to find which CTE Def it is + // referencing to, we first check if it is a reference to this one. If yes, then we set the + // reference as being recursive. + val recursiveCTERelation = if (allowRecursion) { + Some(name -> CTERelationDef(relation)) + } else { + // If there is an outer recursive CTE relative to this one, and this one isn't recursive, + // then the self reference with the first-check priority is going to be the CteRelationDef + // of this recursive ancestor. + recursiveCTERelationAncestor + } + val innerCTEResolved = if (isLegacy) { // In legacy mode, outer CTE relations take precedence. Here we don't resolve the inner // `With` nodes, later we will substitute `UnresolvedRelation`s with outer CTE relations. @@ -305,26 +338,20 @@ object CTESubstitution extends Rule[LogicalPlan] { } else { resolvedCTERelations } - traverseAndSubstituteCTE(relation, forceInline, nonConflictingCTERelations, cteDefs)._1 + traverseAndSubstituteCTE(relation, forceInline, nonConflictingCTERelations, + cteDefs, recursiveCTERelation)._1 } - // If recursion is allowed (RECURSIVE keyword specified) - // then it has higher priority than outer or previous relations. - // Therefore, we construct a `CTERelationDef` for the current relation. - // Later if we encounter unresolved relation which we need to find which CTE Def it is - // referencing to, we first check if it is a reference to this one. If yes, then we set the - // reference as being recursive. - val recursiveCTERelation = if (allowRecursion) { - Some(name -> CTERelationDef(relation)) - } else { - None - } // CTE definition can reference a previous one or itself if recursion allowed. val substituted = substituteCTE(innerCTEResolved, alwaysInline, resolvedCTERelations, recursiveCTERelation) - val cteRelation = recursiveCTERelation + val cteRelation = if (allowRecursion) { + recursiveCTERelation .map(_._2.copy(child = substituted)) .getOrElse(CTERelationDef(substituted)) + } else { + CTERelationDef(substituted) + } if (!alwaysInline) { cteDefs += cteRelation } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala index 5a5acb24c6971..6ec465f7ffe76 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala @@ -83,6 +83,26 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = loop)) } + // Simple case of duplicating (UNION ALL) clause. + case alias @ SubqueryAlias(_, withCTE @ WithCTE( + Union(Seq(anchor, recursion), false, false), innerCteDefs)) => + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, None)) + } + val loop = UnionLoop( + cteDef.id, + anchor, + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, None)) + cteDef.copy(child = alias.copy(child = withCTE.copy( + plan = loop, cteDefs = newInnerCteDefs))) + } + // The case of CTE name followed by a parenthesized list of column name(s), eg. // WITH RECURSIVE t(n). case alias @ SubqueryAlias(_, @@ -100,7 +120,31 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = columnAlias.copy(child = loop))) } - // If the recursion is described with an UNION (deduplicating) clause then the + // The case of CTE name followed by a parenthesized list of column name(s), eg. + // WITH RECURSIVE t(n). + case alias @ SubqueryAlias(_, + columnAlias @ UnresolvedSubqueryColumnAliases( + colNames, + withCTE @ WithCTE(Union(Seq(anchor, recursion), false, false), innerCteDefs) + )) => + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, Some(colNames))) + } + val loop = UnionLoop( + cteDef.id, + anchor, + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, Some(colNames))) + cteDef.copy(child = alias.copy(child = columnAlias.copy( + child = withCTE.copy(plan = loop, cteDefs = newInnerCteDefs)))) + } + + // If the recursion is described with a UNION (deduplicating) clause then the // recursive term should not return those rows that have been calculated previously, // and we exclude those rows from the current iteration result. case alias @ SubqueryAlias(_, @@ -123,6 +167,34 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = loop)) } + // UNION case with CTEs inside. + case alias @ SubqueryAlias(_, withCTE @ WithCTE( + Distinct(Union(Seq(anchor, recursion), false, false)), innerCteDefs)) => + cteDef.failAnalysis( + errorClass = "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + messageParameters = Map.empty) + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, None)) + } + val loop = UnionLoop( + cteDef.id, + Distinct(anchor), + Except( + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, None), + UnionLoopRef(cteDef.id, anchor.output, true), + isAll = false + ) + ) + cteDef.copy(child = alias.copy(child = withCTE.copy( + plan = loop, cteDefs = newInnerCteDefs))) + } + // The case of CTE name followed by a parenthesized list of column name(s). case alias @ SubqueryAlias(_, columnAlias@UnresolvedSubqueryColumnAliases( @@ -147,6 +219,37 @@ object ResolveWithCTE extends Rule[LogicalPlan] { cteDef.copy(child = alias.copy(child = columnAlias.copy(child = loop))) } + // The case of CTE name followed by a parenthesized list of column name(s). + case alias @ SubqueryAlias(_, + columnAlias@UnresolvedSubqueryColumnAliases( + colNames, + WithCTE(Distinct(Union(Seq(anchor, recursion), false, false)), innerCteDefs) + )) => + cteDef.failAnalysis( + errorClass = "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + messageParameters = Map.empty) + if (!anchor.resolved) { + cteDef + } else { + // We need to check whether any of the inner CTEs has a self reference and replace + // it if needed + val newInnerCteDefs = innerCteDefs.map { innerCteDef => + innerCteDef.copy(child = rewriteRecursiveCTERefs( + innerCteDef.child, anchor, cteDef.id, Some(colNames))) + } + val loop = UnionLoop( + cteDef.id, + Distinct(anchor), + Except( + rewriteRecursiveCTERefs(recursion, anchor, cteDef.id, Some(colNames)), + UnionLoopRef(cteDef.id, anchor.output, true), + isAll = false + ) + ) + cteDef.copy(child = alias.copy(child = columnAlias.copy( + child = withCTE.copy(plan = loop, cteDefs = newInnerCteDefs)))) + } + case other => // We do not support cases of sole Union (needs a SubqueryAlias above it), nor // Project (as UnresolvedSubqueryColumnAliases have not been substituted with the diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out index 33e486e6dc884..f95f7a00bb4c1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out @@ -378,28 +378,32 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1 -- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 100, - "stopIndex" : 101, - "fragment" : "t1" - } ] -} +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- UnionLoop xxxx +: :- Project [1 AS level#x] +: : +- OneRowRelation +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 10) +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [level#x], false +: +- Project [(level + 1)#x] +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [(level + 1)#x], false, false ++- Project [level#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [level#x], false, false -- !query @@ -424,21 +428,34 @@ WITH ) SELECT * FROM t2 -- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 159, - "stopIndex" : 160, - "fragment" : "t1" - } ] -} +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [1 AS 1#x] +: +- OneRowRelation +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- UnionLoop xxxx +: :- Project [1 AS level#x] +: : +- OneRowRelation +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t3 +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 10) +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [level#x], false +: +- Project [(level + 1)#x] +: +- SubqueryAlias t3 +: +- CTERelationRef xxxx, true, [(level + 1)#x], false, false +:- CTERelationDef xxxx, false +: +- SubqueryAlias t2 +: +- Project [level#x] +: +- SubqueryAlias t1 +: +- CTERelationRef xxxx, true, [level#x], false, false ++- Project [level#x] + +- SubqueryAlias t2 + +- CTERelationRef xxxx, true, [level#x], false, false -- !query @@ -962,24 +979,25 @@ SELECT * FROM r2 -- !query analysis WithCTE :- CTERelationDef xxxx, false -: +- SubqueryAlias r1 -: +- UnionLoop xxxx -: :- Project [0 AS innerlevel#x] -: : +- OneRowRelation -: +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] -: +- Filter (innerlevel#x < 3) -: +- SubqueryAlias r1 -: +- UnionLoopRef xxxx, [innerlevel#x], false -:- CTERelationDef xxxx, false : +- SubqueryAlias r2 -: +- UnionLoop xxxx -: :- Project [0 AS outerlevel#x, innerlevel#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias r1 -: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false -: +- Project [(outerlevel#x + 1) AS (outerlevel + 1)#x, innerlevel#x] -: +- Filter (outerlevel#x < 3) -: +- SubqueryAlias r2 -: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false +: : +- UnionLoop xxxx +: : :- Project [0 AS innerlevel#x] +: : : +- OneRowRelation +: : +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] +: : +- Filter (innerlevel#x < 3) +: : +- SubqueryAlias r1 +: : +- UnionLoopRef xxxx, [innerlevel#x], false +: +- UnionLoop xxxx +: :- Project [0 AS outerlevel#x, innerlevel#x] +: : +- SubqueryAlias r1 +: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false +: +- Project [(outerlevel#x + 1) AS (outerlevel + 1)#x, innerlevel#x] +: +- Filter (outerlevel#x < 3) +: +- SubqueryAlias r2 +: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false +- Project [outerlevel#x, innerlevel#x] +- SubqueryAlias r2 +- CTERelationRef xxxx, true, [outerlevel#x, innerlevel#x], false, false @@ -1001,25 +1019,26 @@ SELECT * FROM r WithCTE :- CTERelationDef xxxx, false : +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false -: +- SubqueryAlias r : +- Project [level#x AS level#x] -: +- Union false, false -: :- Project [level#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias r -: : +- CTERelationRef xxxx, true, [level#x], false, false -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- CTERelationRef xxxx, true, [level#x], false, false +: : +- Project [col1#x AS level#x] +: : +- UnionLoop xxxx +: : :- LocalRelation [col1#x] +: : +- Project [(level#x + 1) AS (level + 1)#x] +: : +- Filter (level#x < 3) +: : +- SubqueryAlias r +: : +- Project [col1#x AS level#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: +- Union false, false +: :- Project [level#x] +: : +- SubqueryAlias r +: : +- CTERelationRef xxxx, true, [level#x], false, false +: +- Project [(level#x + 1) AS (level + 1)#x] +: +- Filter (level#x < 3) +: +- SubqueryAlias r +: +- CTERelationRef xxxx, true, [level#x], false, false +- Project [level#x] +- SubqueryAlias r +- CTERelationRef xxxx, true, [level#x], false, false @@ -1041,21 +1060,22 @@ SELECT * FROM r WithCTE :- CTERelationDef xxxx, false : +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(level#x + 1) AS (level + 1)#x] -: +- Filter (level#x < 3) -: +- SubqueryAlias r -: +- Project [col1#x AS level#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false -: +- SubqueryAlias r : +- Project [level#x AS level#x] : +- UnionLoop xxxx -: :- Project [level#x] -: : +- SubqueryAlias r -: : +- CTERelationRef xxxx, true, [level#x], false, false +: :- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias r +: : : +- Project [col1#x AS level#x] +: : : +- UnionLoop xxxx +: : : :- LocalRelation [col1#x] +: : : +- Project [(level#x + 1) AS (level + 1)#x] +: : : +- Filter (level#x < 3) +: : : +- SubqueryAlias r +: : : +- Project [col1#x AS level#x] +: : : +- UnionLoopRef xxxx, [col1#x], false +: : +- Project [level#x] +: : +- SubqueryAlias r +: : +- CTERelationRef xxxx, true, [level#x], false, false : +- Project [(level#x + 1) AS (level + 1)#x] : +- Filter (level#x < 3) : +- SubqueryAlias r @@ -1300,3 +1320,151 @@ WithCTE +- Project [n#x] +- SubqueryAlias t2 +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [col1#x AS n#x] +: +- UnionLoop xxxx +: :- LocalRelation [col1#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [(n + 1)#x AS j#x] +: : +- Project [(n#x + 1) AS (n + 1)#x] +: : +- SubqueryAlias t1 +: : +- Project [col1#x AS n#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t3 +: : +- Project [j#x AS k#x] +: : +- Project [j#x] +: : +- SubqueryAlias t2 +: : +- CTERelationRef xxxx, true, [j#x], false, false +: +- Project [k#x] +: +- Filter (k#x <= 5) +: +- SubqueryAlias t3 +: +- CTERelationRef xxxx, true, [k#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias r2 +: +- Project [outerlevel#x AS outerlevel1#x, innerlevel#x AS innerlevel1#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias r1 +: : +- UnionLoop xxxx +: : :- Project [0 AS innerlevel#x] +: : : +- OneRowRelation +: : +- Project [(innerlevel#x + 1) AS (innerlevel + 1)#x] +: : +- Filter (innerlevel#x < 3) +: : +- SubqueryAlias r1 +: : +- UnionLoopRef xxxx, [innerlevel#x], false +: +- UnionLoop xxxx +: :- Project [0 AS outerlevel#x, innerlevel#x] +: : +- SubqueryAlias r1 +: : +- CTERelationRef xxxx, true, [innerlevel#x], false, false +: +- Project [(outerlevel1#x + 1) AS (outerlevel1 + 1)#x, innerlevel1#x] +: +- Filter (outerlevel1#x < 3) +: +- SubqueryAlias r2 +: +- Project [outerlevel#x AS outerlevel1#x, innerlevel#x AS innerlevel1#x] +: +- UnionLoopRef xxxx, [outerlevel#x, innerlevel#x], false ++- Project [outerlevel1#x, innerlevel1#x] + +- SubqueryAlias r2 + +- CTERelationRef xxxx, true, [outerlevel1#x, innerlevel1#x], false, false + + +-- !query +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- Project [1#x AS n#x] +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [n#x AS n#x] +: : +- Project [n#x] +: : +- SubqueryAlias t1 +: : +- Project [1#x AS n#x] +: : +- UnionLoopRef xxxx, [1#x], false +: +- UnionLoop xxxx +: :- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [(n#x + 1) AS (n + 1)#x] +: +- Filter (n#x < 5) +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [n#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false + + +-- !query +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias t1 +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias t2 +: : +- Project [n#x AS n#x] +: : +- Project [n#x] +: : +- SubqueryAlias t1 +: : +- UnionLoopRef xxxx, [n#x], false +: +- UnionLoop xxxx +: :- Project [1 AS n#x] +: : +- OneRowRelation +: +- Project [(n#x + 1) AS (n + 1)#x] +: +- Filter (n#x < 5) +: +- SubqueryAlias t2 +: +- CTERelationRef xxxx, true, [n#x], false, false ++- Project [n#x] + +- SubqueryAlias t1 + +- CTERelationRef xxxx, true, [n#x], false, false diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out index 2fdced93217df..32388ed10e735 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out @@ -1503,27 +1503,28 @@ SELECT * FROM t -- !query analysis WithCTE :- CTERelationDef xxxx, false -: +- SubqueryAlias s -: +- Project [col1#x AS i#x] -: +- UnionLoop xxxx -: :- LocalRelation [col1#x] -: +- Project [(i#x + 1) AS (i + 1)#x] -: +- Filter (i#x < 10) -: +- SubqueryAlias s -: +- Project [col1#x AS i#x] -: +- UnionLoopRef xxxx, [col1#x], false -:- CTERelationDef xxxx, false : +- SubqueryAlias t : +- Project [i#x AS j#x] -: +- UnionLoop xxxx -: :- Project [i#x] +: +- WithCTE +: :- CTERelationDef xxxx, false : : +- SubqueryAlias s -: : +- CTERelationRef xxxx, true, [i#x], false, false -: +- Project [(j#x + 1) AS (j + 1)#x] -: +- Filter (j#x < 10) -: +- SubqueryAlias t -: +- Project [i#x AS j#x] -: +- UnionLoopRef xxxx, [i#x], false +: : +- Project [col1#x AS i#x] +: : +- UnionLoop xxxx +: : :- LocalRelation [col1#x] +: : +- Project [(i#x + 1) AS (i + 1)#x] +: : +- Filter (i#x < 10) +: : +- SubqueryAlias s +: : +- Project [col1#x AS i#x] +: : +- UnionLoopRef xxxx, [col1#x], false +: +- UnionLoop xxxx +: :- Project [i#x] +: : +- SubqueryAlias s +: : +- CTERelationRef xxxx, true, [i#x], false, false +: +- Project [(j#x + 1) AS (j + 1)#x] +: +- Filter (j#x < 10) +: +- SubqueryAlias t +: +- Project [i#x AS j#x] +: +- UnionLoopRef xxxx, [i#x], false +- Project [j#x] +- SubqueryAlias t +- CTERelationRef xxxx, true, [j#x], false, false @@ -1622,7 +1623,7 @@ SELECT * FROM outermost ORDER BY 1 -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + "errorClass" : "INVALID_RECURSIVE_REFERENCE.NUMBER", "sqlState" : "42836", "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql index c45f62196430a..97638b9424645 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql @@ -143,20 +143,18 @@ WITH SELECT * FROM t2; --- recursive reference is not allowed in a nested CTE --- TABLE_OR_VIEW_NOT_FOUND is thrown now, although it some check should be added to exactly inform --- that this is not allowed +-- recursive reference in a nested CTE WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1; --- recursive reference and conflicting outer CTEs are not allowed in a nested CTE +-- recursive reference and conflicting outer CTEs in a nested CTE SET spark.sql.legacy.ctePrecedencePolicy=CORRECTED; WITH t1 AS (SELECT 1), @@ -499,4 +497,51 @@ t2(n) AS ( UNION ALL SELECT n + 1 FROM t2, t1 WHERE n + 1 = a ) -SELECT * FROM t2; \ No newline at end of file +SELECT * FROM t2; + +-- Multiple CTEs within rCTE +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1; + +-- Column aliases with CTEs inside rCTEs +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2; + +-- An inner cte that is defined for both the anchor and recursion but called only in the recursion +-- with subquery alias +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1; + +-- An inner cte that is defined for both the anchor and recursion but called only in the recursion +-- without query alias +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out index cc4e01e11ca6e..6b12d6a159bc9 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out @@ -358,30 +358,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException WITH RECURSIVE t1 AS ( SELECT 1 AS level - UNION ( + UNION ALL ( WITH t2 AS (SELECT level + 1 FROM t1 WHERE level < 10) SELECT * FROM t2 ) ) SELECT * FROM t1 -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 100, - "stopIndex" : 101, - "fragment" : "t1" - } ] -} +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 -- !query @@ -408,23 +403,18 @@ WITH ) SELECT * FROM t2 -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", - "sqlState" : "42P01", - "messageParameters" : { - "relationName" : "`t1`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 159, - "stopIndex" : 160, - "fragment" : "t1" - } ] -} +1 +10 +2 +3 +4 +5 +6 +7 +8 +9 -- !query @@ -1198,3 +1188,95 @@ struct 1 2 3 + + +-- !query +WITH RECURSIVE t1 (n) AS ( + VALUES(1) + UNION ALL + ( + WITH t2(j) AS ( + SELECT n + 1 FROM t1 + ), + t3(k) AS ( + SELECT j FROM t2 + ) + SELECT k FROM t3 WHERE k <= 5 + ) +) +SELECT n FROM t1 +-- !query schema +struct +-- !query output +1 +2 +3 +4 +5 + + +-- !query +WITH RECURSIVE r2(outerlevel1, innerlevel1) AS ( + WITH RECURSIVE r1 AS ( + SELECT 0 AS innerlevel + UNION ALL + SELECT innerlevel + 1 FROM r1 WHERE innerlevel < 3 + ) + SELECT 0 AS outerlevel, innerlevel FROM r1 + UNION ALL + SELECT outerlevel1 + 1, innerlevel1 FROM r2 WHERE outerlevel1 < 3 +) +SELECT * FROM r2 +-- !query schema +struct +-- !query output +0 0 +0 1 +0 2 +0 3 +1 0 +1 1 +1 2 +1 3 +2 0 +2 1 +2 2 +2 3 +3 0 +3 1 +3 2 +3 3 + + +-- !query +WITH RECURSIVE t1(n) AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query schema +struct +-- !query output +1 +2 +3 +4 +5 + + +-- !query +WITH RECURSIVE t1 AS ( + WITH t2(n) AS (SELECT * FROM t1) + SELECT 1 AS n + UNION ALL + SELECT n+1 FROM t2 WHERE n < 5) +SELECT * FROM t1 +-- !query schema +struct +-- !query output +1 +2 +3 +4 +5 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out index 53d1be3b14473..cfb021aeca75e 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out @@ -1381,7 +1381,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE", + "errorClass" : "INVALID_RECURSIVE_REFERENCE.NUMBER", "sqlState" : "42836", "queryContext" : [ { "objectType" : "",