-
Couldn't load subscription status.
- Fork 4.8k
HIVE-29272: Query-based MINOR compaction should not consider minOpenW… #6143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,8 @@ | |
| import org.apache.hadoop.hive.ql.io.AcidUtils; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; | ||
| import org.apache.hive.common.util.HiveStringUtils; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.ArrayList; | ||
|
|
@@ -39,6 +41,9 @@ | |
| import java.util.stream.Collectors; | ||
|
|
||
| abstract class CompactionQueryBuilder { | ||
|
|
||
| private static final Logger LOG = LoggerFactory.getLogger(CompactionQueryBuilder.class.getName()); | ||
|
|
||
| // required fields, set in constructor | ||
| protected Operation operation; | ||
| protected String resultTableName; | ||
|
|
@@ -317,15 +322,20 @@ protected void addTblProperties(StringBuilder query, Map<String, String> tblProp | |
|
|
||
| private void buildAddClauseForAlter(StringBuilder query) { | ||
| if (validWriteIdList == null || dir == null) { | ||
| LOG.info("There is no delta to be added as partition to the temp external table used by the minor compaction. " + | ||
| "This may result an empty compaction directory."); | ||
| query.setLength(0); | ||
| return; // avoid NPEs, don't throw an exception but return an empty query | ||
| } | ||
| long minWriteID = validWriteIdList.getMinOpenWriteId() == null ? 1 : validWriteIdList.getMinOpenWriteId(); | ||
| long highWatermark = validWriteIdList.getHighWatermark(); | ||
| List<AcidUtils.ParsedDelta> deltas = dir.getCurrentDirectories().stream().filter( | ||
| delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark && delta.getMinWriteId() >= minWriteID) | ||
| delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we remove the minOpenWriteId, we won't compact inflight writes. ValidCompactorWriteIdList doest the trick tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(msc.getValidWriteIds( so basically highWatermark is already capped by minOpenWriteId - 1. similar code from AbortedTxnCleaner 1 thing I have doubts is this: MergeCompactor#getOutputDirPath minOpenWriteId > highWatermark, unless it's not null, right? i think that is wrong and should be derived from actual compacted/merged deltas |
||
| .collect(Collectors.toList()); | ||
| if (deltas.isEmpty()) { | ||
| String warnMsg = String.format("No %s delta is found below the highWaterMark %s to be added as partition " + | ||
| "to the temp external table, used by the minor compaction. This may result an empty compaction directory.", | ||
| isDeleteDelta ? "delete" : "", highWatermark); | ||
| LOG.warn(warnMsg); | ||
| query.setLength(0); // no alter query needed; clear StringBuilder | ||
| return; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe use warn level?