-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-54971] Add WITH SCHEMA EVOLUTION syntax for SQL INSERT #53732
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
654e4ae
a10ea97
6963dd7
4308f30
bde23ab
5a2062c
877b88f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1096,7 +1096,7 @@ class Analyzer( | |
|
|
||
| def apply(plan: LogicalPlan) | ||
| : LogicalPlan = plan.resolveOperatorsUpWithPruning(AlwaysProcess.fn, ruleId) { | ||
| case i @ InsertIntoStatement(table, _, _, _, _, _, _) => | ||
| case i @ InsertIntoStatement(table, _, _, _, _, _, _, _) => | ||
| val relation = table match { | ||
| case u: UnresolvedRelation if !u.isStreaming => | ||
| resolveRelation(u).getOrElse(u) | ||
|
|
@@ -1231,7 +1231,7 @@ class Analyzer( | |
| object ResolveInsertInto extends ResolveInsertionBase { | ||
| override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning( | ||
| AlwaysProcess.fn, ruleId) { | ||
| case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _, _) | ||
| case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _, _, _) | ||
| if i.query.resolved => | ||
| // ifPartitionNotExists is append with validation, but validation is not supported | ||
| if (i.ifPartitionNotExists) { | ||
|
|
@@ -1249,27 +1249,50 @@ class Analyzer( | |
| val partCols = partitionColumnNames(r.table) | ||
| validatePartitionSpec(partCols, i.partitionSpec) | ||
|
|
||
| val schemaEvolutionWriteOption: Map[String, String] = | ||
| if (i.withSchemaEvolution) Map("mergeSchema" -> "true") else Map.empty | ||
|
|
||
| val staticPartitions = i.partitionSpec.filter(_._2.isDefined).transform((_, v) => v.get) | ||
| val query = addStaticPartitionColumns(r, projectByName.getOrElse(i.query), staticPartitions, | ||
| isByName) | ||
|
|
||
| if (!i.overwrite) { | ||
| if (isByName) { | ||
| AppendData.byName(r, query) | ||
| AppendData.byName( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not properly model the schemaEvolution flag in AppendData/ etc?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @szehon-ho You mean we should add a flag
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a dedicated flag would be cleaner: But: dataframe operations have always been using I would use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, thanks |
||
| r, | ||
| query, | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } else { | ||
| AppendData.byPosition(r, query) | ||
| AppendData.byPosition( | ||
| r, | ||
| query, | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } | ||
| } else if (conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC) { | ||
| if (isByName) { | ||
| OverwritePartitionsDynamic.byName(r, query) | ||
| OverwritePartitionsDynamic.byName( | ||
| r, | ||
| query, | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } else { | ||
| OverwritePartitionsDynamic.byPosition(r, query) | ||
| OverwritePartitionsDynamic.byPosition( | ||
| r, | ||
| query, | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } | ||
| } else { | ||
| if (isByName) { | ||
| OverwriteByExpression.byName(r, query, staticDeleteExpression(r, staticPartitions)) | ||
| OverwriteByExpression.byName( | ||
| table = r, | ||
| df = query, | ||
| deleteExpr = staticDeleteExpression(r, staticPartitions), | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } else { | ||
| OverwriteByExpression.byPosition(r, query, staticDeleteExpression(r, staticPartitions)) | ||
| OverwriteByExpression.byPosition( | ||
| table = r, | ||
| query = query, | ||
| deleteExpr = staticDeleteExpression(r, staticPartitions), | ||
| writeOptions = schemaEvolutionWriteOption) | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not add a new bool field to
AppendData, like what we did forInsertIntoStatement? TheMergeIntoTablealso has awithSchemaEvolutionflag.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey @cloud-fan, thank you very much for your review! We also raised this point and this was our discussion on it
#53732 (comment)