1717
1818package org .apache .spark .sql .connect .pipelines
1919
20- import scala .jdk .CollectionConverters ._
21- import scala .util .Using
22-
2320import io .grpc .stub .StreamObserver
24-
2521import org .apache .spark .connect .proto
26- import org .apache .spark .connect .proto .{ ExecutePlanResponse , PipelineCommandResult , Relation , ResolvedIdentifier }
22+ import org .apache .spark .connect .proto ._
2723import org .apache .spark .internal .Logging
2824import org .apache .spark .sql .AnalysisException
2925import org .apache .spark .sql .catalyst .TableIdentifier
3026import org .apache .spark .sql .catalyst .plans .logical .LogicalPlan
27+ import org .apache .spark .sql .classic .DataFrame
3128import org .apache .spark .sql .connect .common .DataTypeProtoConverter
3229import org .apache .spark .sql .connect .service .SessionHolder
3330import org .apache .spark .sql .pipelines .Language .Python
3431import org .apache .spark .sql .pipelines .common .RunState .{CANCELED , FAILED }
35- import org .apache .spark .sql .pipelines .graph .{ AllTables , FlowAnalysis , GraphIdentifierManager , GraphRegistrationContext , IdentifierHelper , NoTables , PipelineUpdateContextImpl , QueryContext , QueryOrigin , QueryOriginType , Sink , SinkImpl , SomeTables , SqlGraphRegistrationContext , Table , TableFilter , TemporaryView , UnresolvedFlow }
32+ import org .apache .spark .sql .pipelines .graph ._
3633import org .apache .spark .sql .pipelines .logging .{PipelineEvent , RunProgress }
3734import org .apache .spark .sql .types .StructType
3835
36+ import scala .jdk .CollectionConverters ._
37+ import scala .util .Using
38+
3939/** Handler for SparkConnect PipelineCommands */
4040private [connect] object PipelinesHandler extends Logging {
4141
@@ -47,8 +47,6 @@ private[connect] object PipelinesHandler extends Logging {
4747 * Command to be handled
4848 * @param responseObserver
4949 * The response observer where the response will be sent
50- * @param sparkSession
51- * The spark session
5250 * @param transformRelationFunc
5351 * Function used to convert a relation to a LogicalPlan. This is used when determining the
5452 * LogicalPlan that a flow returns.
@@ -108,7 +106,6 @@ private[connect] object PipelinesHandler extends Logging {
108106 identifierBuilder.addNamespace(ns)
109107 }
110108 identifierBuilder.setTableName(resolvedFlow.identifier)
111- val identifier = identifierBuilder.build()
112109 PipelineCommandResult
113110 .newBuilder()
114111 .setDefineFlowResult(
@@ -129,6 +126,24 @@ private[connect] object PipelinesHandler extends Logging {
129126 }
130127 }
131128
129+ def executeSQL (
130+ sessionHolder : SessionHolder ,
131+ plan : LogicalPlan ,
132+ pipelineAnalysisContext : PipelineAnalysisContext
133+ ): DataFrame = {
134+ val graphRegistrationContext = {
135+ sessionHolder.dataflowGraphRegistry.getDataflowGraphOrThrow(
136+ pipelineAnalysisContext.getDataflowGraphId)
137+ }
138+ val pipelineSqlProcessor = new PipelineSqlProcessor (graphRegistrationContext)
139+ val context = ExternalQueryAnalysisContext (
140+ queryContext = QueryContext (
141+ currentCatalog = Option (graphRegistrationContext.defaultCatalog),
142+ currentDatabase = Option (graphRegistrationContext.defaultDatabase)),
143+ spark = sessionHolder.session)
144+ pipelineSqlProcessor.processSparkSqlQuery(queryPlan = plan, context = context)
145+ }
146+
132147 private def createDataflowGraph (
133148 cmd : proto.PipelineCommand .CreateDataflowGraph ,
134149 sessionHolder : SessionHolder ): String = {
@@ -161,7 +176,7 @@ private[connect] object PipelinesHandler extends Logging {
161176
162177 val graphElementRegistry =
163178 sessionHolder.dataflowGraphRegistry.getDataflowGraphOrThrow(dataflowGraphId)
164- val sqlGraphElementRegistrationContext = new SqlGraphRegistrationContext (graphElementRegistry)
179+ val sqlGraphElementRegistrationContext = new PipelineSqlProcessor (graphElementRegistry)
165180 sqlGraphElementRegistrationContext.processSqlFile(
166181 cmd.getSqlText,
167182 cmd.getSqlFilePath,
@@ -293,8 +308,7 @@ private[connect] object PipelinesHandler extends Logging {
293308 val rawDestinationIdentifier = GraphIdentifierManager
294309 .parseTableIdentifier(name = flow.getTargetDatasetName, spark = sessionHolder.session)
295310 val flowWritesToView =
296- graphElementRegistry
297- .getViews()
311+ graphElementRegistry.getViews
298312 .filter(_.isInstanceOf [TemporaryView ])
299313 .exists(_.identifier == rawDestinationIdentifier)
300314 val flowWritesToSink =
@@ -304,7 +318,7 @@ private[connect] object PipelinesHandler extends Logging {
304318 // If the flow is created implicitly as part of defining a view or that it writes to a sink,
305319 // then we do not qualify the flow identifier and the flow destination. This is because
306320 // views and sinks are not permitted to have multipart
307- val isImplicitFlowForTempView = ( isImplicitFlow && flowWritesToView)
321+ val isImplicitFlowForTempView = isImplicitFlow && flowWritesToView
308322 val Seq (flowIdentifier, destinationIdentifier) =
309323 Seq (rawFlowIdentifier, rawDestinationIdentifier).map { rawIdentifier =>
310324 if (isImplicitFlowForTempView || flowWritesToSink) {
@@ -330,8 +344,8 @@ private[connect] object PipelinesHandler extends Logging {
330344 once = false ,
331345 queryContext = QueryContext (Option (defaultCatalog), Option (defaultDatabase)),
332346 origin = QueryOrigin (
333- filePath = Option .when(flow.getSourceCodeLocation.hasFileName)(
334- flow.getSourceCodeLocation.getFileName),
347+ filePath = Option
348+ .when(flow.getSourceCodeLocation.hasFileName)( flow.getSourceCodeLocation.getFileName),
335349 line = Option .when(flow.getSourceCodeLocation.hasLineNumber)(
336350 flow.getSourceCodeLocation.getLineNumber),
337351 objectType = Option (QueryOriginType .Flow .toString),
0 commit comments