Skip to content

[SPARK-52223][CONNECT] Add SDP Spark Connect Protos #50942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
359 changes: 180 additions & 179 deletions python/pyspark/sql/connect/proto/base_pb2.py

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions python/pyspark/sql/connect/proto/base_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import pyspark.sql.connect.proto.commands_pb2
import pyspark.sql.connect.proto.common_pb2
import pyspark.sql.connect.proto.expressions_pb2
import pyspark.sql.connect.proto.ml_pb2
import pyspark.sql.connect.proto.pipelines_pb2
import pyspark.sql.connect.proto.relations_pb2
import pyspark.sql.connect.proto.types_pb2
import sys
Expand Down Expand Up @@ -1583,6 +1584,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
EXECUTION_PROGRESS_FIELD_NUMBER: builtins.int
CHECKPOINT_COMMAND_RESULT_FIELD_NUMBER: builtins.int
ML_COMMAND_RESULT_FIELD_NUMBER: builtins.int
PIPELINE_EVENT_RESULT_FIELD_NUMBER: builtins.int
PIPELINE_COMMAND_RESULT_FIELD_NUMBER: builtins.int
EXTENSION_FIELD_NUMBER: builtins.int
METRICS_FIELD_NUMBER: builtins.int
OBSERVED_METRICS_FIELD_NUMBER: builtins.int
Expand Down Expand Up @@ -1650,6 +1653,14 @@ class ExecutePlanResponse(google.protobuf.message.Message):
def ml_command_result(self) -> pyspark.sql.connect.proto.ml_pb2.MlCommandResult:
"""ML command response"""
@property
def pipeline_event_result(self) -> pyspark.sql.connect.proto.pipelines_pb2.PipelineEventResult:
"""Response containing pipeline event that is streamed back to the client during a pipeline run"""
@property
def pipeline_command_result(
self,
) -> pyspark.sql.connect.proto.pipelines_pb2.PipelineCommandResult:
"""Pipeline command response"""
@property
def extension(self) -> google.protobuf.any_pb2.Any:
"""Support arbitrary result objects."""
@property
Expand Down Expand Up @@ -1692,6 +1703,10 @@ class ExecutePlanResponse(google.protobuf.message.Message):
execution_progress: global___ExecutePlanResponse.ExecutionProgress | None = ...,
checkpoint_command_result: global___CheckpointCommandResult | None = ...,
ml_command_result: pyspark.sql.connect.proto.ml_pb2.MlCommandResult | None = ...,
pipeline_event_result: pyspark.sql.connect.proto.pipelines_pb2.PipelineEventResult
| None = ...,
pipeline_command_result: pyspark.sql.connect.proto.pipelines_pb2.PipelineCommandResult
| None = ...,
extension: google.protobuf.any_pb2.Any | None = ...,
metrics: global___ExecutePlanResponse.Metrics | None = ...,
observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics]
Expand All @@ -1717,6 +1732,10 @@ class ExecutePlanResponse(google.protobuf.message.Message):
b"metrics",
"ml_command_result",
b"ml_command_result",
"pipeline_command_result",
b"pipeline_command_result",
"pipeline_event_result",
b"pipeline_event_result",
"response_type",
b"response_type",
"result_complete",
Expand Down Expand Up @@ -1758,6 +1777,10 @@ class ExecutePlanResponse(google.protobuf.message.Message):
b"observed_metrics",
"operation_id",
b"operation_id",
"pipeline_command_result",
b"pipeline_command_result",
"pipeline_event_result",
b"pipeline_event_result",
"response_id",
b"response_id",
"response_type",
Expand Down Expand Up @@ -1798,6 +1821,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
"execution_progress",
"checkpoint_command_result",
"ml_command_result",
"pipeline_event_result",
"pipeline_command_result",
"extension",
]
| None
Expand Down
211 changes: 106 additions & 105 deletions python/pyspark/sql/connect/proto/commands_pb2.py

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions python/pyspark/sql/connect/proto/commands_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import google.protobuf.message
import pyspark.sql.connect.proto.common_pb2
import pyspark.sql.connect.proto.expressions_pb2
import pyspark.sql.connect.proto.ml_pb2
import pyspark.sql.connect.proto.pipelines_pb2
import pyspark.sql.connect.proto.relations_pb2
import sys
import typing
Expand Down Expand Up @@ -107,6 +108,7 @@ class Command(google.protobuf.message.Message):
MERGE_INTO_TABLE_COMMAND_FIELD_NUMBER: builtins.int
ML_COMMAND_FIELD_NUMBER: builtins.int
EXECUTE_EXTERNAL_COMMAND_FIELD_NUMBER: builtins.int
PIPELINE_COMMAND_FIELD_NUMBER: builtins.int
EXTENSION_FIELD_NUMBER: builtins.int
@property
def register_function(
Expand Down Expand Up @@ -153,6 +155,8 @@ class Command(google.protobuf.message.Message):
@property
def execute_external_command(self) -> global___ExecuteExternalCommand: ...
@property
def pipeline_command(self) -> pyspark.sql.connect.proto.pipelines_pb2.PipelineCommand: ...
@property
def extension(self) -> google.protobuf.any_pb2.Any:
"""This field is used to mark extensions to the protocol. When plugins generate arbitrary
Commands they can add them here. During the planning the correct resolution is done.
Expand Down Expand Up @@ -183,6 +187,7 @@ class Command(google.protobuf.message.Message):
merge_into_table_command: global___MergeIntoTableCommand | None = ...,
ml_command: pyspark.sql.connect.proto.ml_pb2.MlCommand | None = ...,
execute_external_command: global___ExecuteExternalCommand | None = ...,
pipeline_command: pyspark.sql.connect.proto.pipelines_pb2.PipelineCommand | None = ...,
extension: google.protobuf.any_pb2.Any | None = ...,
) -> None: ...
def HasField(
Expand All @@ -206,6 +211,8 @@ class Command(google.protobuf.message.Message):
b"merge_into_table_command",
"ml_command",
b"ml_command",
"pipeline_command",
b"pipeline_command",
"register_data_source",
b"register_data_source",
"register_function",
Expand Down Expand Up @@ -251,6 +258,8 @@ class Command(google.protobuf.message.Message):
b"merge_into_table_command",
"ml_command",
b"ml_command",
"pipeline_command",
b"pipeline_command",
"register_data_source",
b"register_data_source",
"register_function",
Expand Down Expand Up @@ -297,6 +306,7 @@ class Command(google.protobuf.message.Message):
"merge_into_table_command",
"ml_command",
"execute_external_command",
"pipeline_command",
"extension",
]
| None
Expand Down
91 changes: 91 additions & 0 deletions python/pyspark/sql/connect/proto/pipelines_pb2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: spark/connect/pipelines.proto
# Protobuf Python Version: 5.28.3
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder

_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/pipelines.proto"
)
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2


DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n\x1dspark/connect/pipelines.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"\x8c\x11\n\x0fPipelineCommand\x12h\n\x15\x63reate_dataflow_graph\x18\x01 \x01(\x0b\x32\x32.spark.connect.PipelineCommand.CreateDataflowGraphH\x00R\x13\x63reateDataflowGraph\x12U\n\x0e\x64\x65\x66ine_dataset\x18\x02 \x01(\x0b\x32,.spark.connect.PipelineCommand.DefineDatasetH\x00R\rdefineDataset\x12L\n\x0b\x64\x65\x66ine_flow\x18\x03 \x01(\x0b\x32).spark.connect.PipelineCommand.DefineFlowH\x00R\ndefineFlow\x12\x62\n\x13\x64rop_dataflow_graph\x18\x04 \x01(\x0b\x32\x30.spark.connect.PipelineCommand.DropDataflowGraphH\x00R\x11\x64ropDataflowGraph\x12\x46\n\tstart_run\x18\x05 \x01(\x0b\x32\'.spark.connect.PipelineCommand.StartRunH\x00R\x08startRun\x12\x62\n\x19\x64\x65\x66ine_sql_graph_elements\x18\x06 \x01(\x0b\x32%.spark.connect.DefineSqlGraphElementsH\x00R\x16\x64\x65\x66ineSqlGraphElements\x1a\x87\x03\n\x13\x43reateDataflowGraph\x12,\n\x0f\x64\x65\x66\x61ult_catalog\x18\x01 \x01(\tH\x00R\x0e\x64\x65\x66\x61ultCatalog\x88\x01\x01\x12.\n\x10\x64\x65\x66\x61ult_database\x18\x02 \x01(\tH\x01R\x0f\x64\x65\x66\x61ultDatabase\x88\x01\x01\x12Z\n\x08sql_conf\x18\x05 \x03(\x0b\x32?.spark.connect.PipelineCommand.CreateDataflowGraph.SqlConfEntryR\x07sqlConf\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1aQ\n\x08Response\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x12\n\x10_default_catalogB\x13\n\x11_default_database\x1aZ\n\x11\x44ropDataflowGraph\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_id\x1a\xd1\x04\n\rDefineDataset\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12&\n\x0c\x64\x61taset_name\x18\x02 \x01(\tH\x01R\x0b\x64\x61tasetName\x88\x01\x01\x12\x42\n\x0c\x64\x61taset_type\x18\x03 \x01(\x0e\x32\x1a.spark.connect.DatasetTypeH\x02R\x0b\x64\x61tasetType\x88\x01\x01\x12\x1d\n\x07\x63omment\x18\x04 \x01(\tH\x03R\x07\x63omment\x88\x01\x01\x12l\n\x10table_properties\x18\x05 \x03(\x0b\x32\x41.spark.connect.PipelineCommand.DefineDataset.TablePropertiesEntryR\x0ftableProperties\x12%\n\x0epartition_cols\x18\x06 \x03(\tR\rpartitionCols\x12\x34\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x04R\x06schema\x88\x01\x01\x12\x1b\n\x06\x66ormat\x18\x08 \x01(\tH\x05R\x06\x66ormat\x88\x01\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x14\n\x12_dataflow_graph_idB\x0f\n\r_dataset_nameB\x0f\n\r_dataset_typeB\n\n\x08_commentB\t\n\x07_schemaB\t\n\x07_format\x1a\xbc\x03\n\nDefineFlow\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12 \n\tflow_name\x18\x02 \x01(\tH\x01R\x08\x66lowName\x88\x01\x01\x12\x33\n\x13target_dataset_name\x18\x03 \x01(\tH\x02R\x11targetDatasetName\x88\x01\x01\x12\x30\n\x04plan\x18\x04 \x01(\x0b\x32\x17.spark.connect.RelationH\x03R\x04plan\x88\x01\x01\x12Q\n\x08sql_conf\x18\x05 \x03(\x0b\x32\x36.spark.connect.PipelineCommand.DefineFlow.SqlConfEntryR\x07sqlConf\x12\x17\n\x04once\x18\x06 \x01(\x08H\x04R\x04once\x88\x01\x01\x1a:\n\x0cSqlConfEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x14\n\x12_dataflow_graph_idB\x0c\n\n_flow_nameB\x16\n\x14_target_dataset_nameB\x07\n\x05_planB\x07\n\x05_once\x1aQ\n\x08StartRun\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x0e\n\x0c\x63ommand_type"\xc7\x01\n\x16\x44\x65\x66ineSqlGraphElements\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x12\'\n\rsql_file_path\x18\x02 \x01(\tH\x01R\x0bsqlFilePath\x88\x01\x01\x12\x1e\n\x08sql_text\x18\x03 \x01(\tH\x02R\x07sqlText\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\x10\n\x0e_sql_file_pathB\x0b\n\t_sql_text"\x8e\x02\n\x15PipelineCommandResult\x12\x81\x01\n\x1c\x63reate_dataflow_graph_result\x18\x01 \x01(\x0b\x32>.spark.connect.PipelineCommandResult.CreateDataflowGraphResultH\x00R\x19\x63reateDataflowGraphResult\x1a\x62\n\x19\x43reateDataflowGraphResult\x12/\n\x11\x64\x61taflow_graph_id\x18\x01 \x01(\tH\x00R\x0f\x64\x61taflowGraphId\x88\x01\x01\x42\x14\n\x12_dataflow_graph_idB\r\n\x0bresult_type"I\n\x13PipelineEventResult\x12\x32\n\x05\x65vent\x18\x01 \x01(\x0b\x32\x1c.spark.connect.PipelineEventR\x05\x65vent"k\n\rPipelineEvent\x12!\n\ttimestamp\x18\x01 \x01(\tH\x00R\ttimestamp\x88\x01\x01\x12\x1d\n\x07message\x18\x02 \x01(\tH\x01R\x07message\x88\x01\x01\x42\x0c\n\n_timestampB\n\n\x08_message*a\n\x0b\x44\x61tasetType\x12\x1c\n\x18\x44\x41TASET_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MATERIALIZED_VIEW\x10\x01\x12\t\n\x05TABLE\x10\x02\x12\x12\n\x0eTEMPORARY_VIEW\x10\x03\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
)

_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(
DESCRIPTOR, "pyspark.sql.connect.proto.pipelines_pb2", _globals
)
if not _descriptor._USE_C_DESCRIPTORS:
_globals["DESCRIPTOR"]._loaded_options = None
_globals["DESCRIPTOR"]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._loaded_options = None
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_options = b"8\001"
_globals["_PIPELINECOMMAND_DEFINEDATASET_TABLEPROPERTIESENTRY"]._loaded_options = None
_globals["_PIPELINECOMMAND_DEFINEDATASET_TABLEPROPERTIESENTRY"]._serialized_options = b"8\001"
_globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._loaded_options = None
_globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_options = b"8\001"
_globals["_DATASETTYPE"]._serialized_start = 2956
_globals["_DATASETTYPE"]._serialized_end = 3053
_globals["_PIPELINECOMMAND"]._serialized_start = 107
_globals["_PIPELINECOMMAND"]._serialized_end = 2295
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_start = 670
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH"]._serialized_end = 1061
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_start = 879
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_SQLCONFENTRY"]._serialized_end = 937
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_RESPONSE"]._serialized_start = 939
_globals["_PIPELINECOMMAND_CREATEDATAFLOWGRAPH_RESPONSE"]._serialized_end = 1020
_globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_start = 1063
_globals["_PIPELINECOMMAND_DROPDATAFLOWGRAPH"]._serialized_end = 1153
_globals["_PIPELINECOMMAND_DEFINEDATASET"]._serialized_start = 1156
_globals["_PIPELINECOMMAND_DEFINEDATASET"]._serialized_end = 1749
_globals["_PIPELINECOMMAND_DEFINEDATASET_TABLEPROPERTIESENTRY"]._serialized_start = 1593
_globals["_PIPELINECOMMAND_DEFINEDATASET_TABLEPROPERTIESENTRY"]._serialized_end = 1659
_globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_start = 1752
_globals["_PIPELINECOMMAND_DEFINEFLOW"]._serialized_end = 2196
_globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_start = 879
_globals["_PIPELINECOMMAND_DEFINEFLOW_SQLCONFENTRY"]._serialized_end = 937
_globals["_PIPELINECOMMAND_STARTRUN"]._serialized_start = 2198
_globals["_PIPELINECOMMAND_STARTRUN"]._serialized_end = 2279
_globals["_DEFINESQLGRAPHELEMENTS"]._serialized_start = 2298
_globals["_DEFINESQLGRAPHELEMENTS"]._serialized_end = 2497
_globals["_PIPELINECOMMANDRESULT"]._serialized_start = 2500
_globals["_PIPELINECOMMANDRESULT"]._serialized_end = 2770
_globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_start = 2657
_globals["_PIPELINECOMMANDRESULT_CREATEDATAFLOWGRAPHRESULT"]._serialized_end = 2755
_globals["_PIPELINEEVENTRESULT"]._serialized_start = 2772
_globals["_PIPELINEEVENTRESULT"]._serialized_end = 2845
_globals["_PIPELINEEVENT"]._serialized_start = 2847
_globals["_PIPELINEEVENT"]._serialized_end = 2954
# @@protoc_insertion_point(module_scope)
Loading