Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3709,4 +3709,97 @@ public void testMajorCompactionUpdateMissingColumnStatsOfPartition() throws Exce

Assert.assertEquals(3, StatsSetupConst.getColumnsHavingStats(partition.getParameters()).size());
}

@Test
public void testMinorWithAbortedAndOpenTnx() throws Exception {
String dbName = "default";
String tableName = "testAbortedAndOpenTnxTbl";
// Create test table
TestDataProvider testDataProvider = new TestDataProvider();
testDataProvider.createFullAcidTable(tableName, false, false);
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);

// Abort the first insert transaction
driver.getConf().setBoolVar(HiveConf.ConfVars.HIVE_TEST_MODE_ROLLBACK_TXN, true);
testDataProvider.insertOnlyTestData(tableName, 1);
driver.getConf().setBoolVar(HiveConf.ConfVars.HIVE_TEST_MODE_ROLLBACK_TXN, false);
// Do threee successful insert to create 3 deltas
testDataProvider.insertOnlyTestData(tableName, 3);

// Start an insert and leave it open when the compaction is running
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tableName)
.withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
.withTransactionBatchSize(1).connect();
connection.beginTransaction();
connection.write("4,4".getBytes());
// Run query-based MINOR compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
// Finish the open transaction
connection.commitTransaction();
connection.close();
List<String> expectedData = testDataProvider.getAllData(tableName, false);
// Run cleaner. It is expected to delete all deltas except the one created by the compaction and the one belong to the open transaction.
CompactorTestUtil.runCleaner(conf);

verifySuccessfulCompaction(1);
List<String> resultData = testDataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, resultData);
List<String> deltas = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
Assert.assertEquals(2, deltas.size());
Assert.assertEquals("Delta directory names are not matching after compaction",
Arrays.asList("delta_0000002_0000004_v0000007", "delta_0000005_0000005"), deltas);
for (String delta: deltas) {
// Check if none of the delta directories are empty
List<String> files = CompactorTestUtil.getBucketFileNames(fs, table, null, delta);
Assert.assertFalse(files.isEmpty());
}
}

@Test
public void testMinorWithOpenTnx() throws Exception {
String dbName = "default";
String tableName = "testAbortedAndOpenTnxTbl";
// Create test table
TestDataProvider testDataProvider = new TestDataProvider();
testDataProvider.createFullAcidTable(tableName, false, false);
IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
Table table = metaStoreClient.getTable(dbName, tableName);
FileSystem fs = FileSystem.get(conf);

// Do threee successful insert to create 3 deltas
testDataProvider.insertOnlyTestData(tableName, 3);

// Start an insert and leave it open when the compaction is running
StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tableName)
.withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
.withTransactionBatchSize(1).connect();
connection.beginTransaction();
connection.write("4,4".getBytes());
// Run query-based MINOR compaction
CompactorTestUtil.runCompaction(conf, dbName, tableName, CompactionType.MINOR, true);
// Finish the open transaction
connection.commitTransaction();
connection.close();
List<String> expectedData = testDataProvider.getAllData(tableName, false);
// Run cleaner. It is expected to delete all deltas except the one created by the compaction and the one belong to the open transaction.
CompactorTestUtil.runCleaner(conf);

verifySuccessfulCompaction(1);
List<String> resultData = testDataProvider.getAllData(tableName);
Assert.assertEquals(expectedData, resultData);
List<String> deltas = CompactorTestUtil.getBaseOrDeltaNames(fs, AcidUtils.deltaFileFilter, table, null);
Assert.assertEquals(2, deltas.size());
Assert.assertEquals("Delta directory names are not matching after compaction",
Arrays.asList("delta_0000001_0000003_v0000006", "delta_0000004_0000004"), deltas);
for (String delta: deltas) {
// Check if none of the delta directories are empty
List<String> files = CompactorTestUtil.getBucketFileNames(fs, table, null, delta);
Assert.assertFalse(files.isEmpty());
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hive.common.util.HiveStringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.ArrayList;
Expand All @@ -39,6 +41,9 @@
import java.util.stream.Collectors;

abstract class CompactionQueryBuilder {

private static final Logger LOG = LoggerFactory.getLogger(CompactionQueryBuilder.class.getName());

// required fields, set in constructor
protected Operation operation;
protected String resultTableName;
Expand Down Expand Up @@ -317,15 +322,20 @@ protected void addTblProperties(StringBuilder query, Map<String, String> tblProp

private void buildAddClauseForAlter(StringBuilder query) {
if (validWriteIdList == null || dir == null) {
LOG.info("There is no delta to be added as partition to the temp external table used by the minor compaction. " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe use warn level?

"This may result an empty compaction directory.");
query.setLength(0);
return; // avoid NPEs, don't throw an exception but return an empty query
}
long minWriteID = validWriteIdList.getMinOpenWriteId() == null ? 1 : validWriteIdList.getMinOpenWriteId();
long highWatermark = validWriteIdList.getHighWatermark();
List<AcidUtils.ParsedDelta> deltas = dir.getCurrentDirectories().stream().filter(
delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark && delta.getMinWriteId() >= minWriteID)
delta -> delta.isDeleteDelta() == isDeleteDelta && delta.getMaxWriteId() <= highWatermark)
Copy link
Member

@deniskuzZ deniskuzZ Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we remove the minOpenWriteId, we won't compact inflight writes. ValidCompactorWriteIdList doest the trick

tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(msc.getValidWriteIds(
Collections.singletonList(fullTableName),
validTxnList.writeToString()).get(0));

if (tableValidWriteIds.isSetMinOpenWriteId()) {
  long minOpenWriteId = tableValidWriteIds.getMinOpenWriteId();
  return new ValidCompactorWriteIdList(fullTableName, exceptions, bitSet, minOpenWriteId - 1, minOpenWriteId);
} else {
  return new ValidCompactorWriteIdList(fullTableName, exceptions, bitSet, tableValidWriteIds.getWriteIdHighWaterMark());
}

so basically highWatermark is already capped by minOpenWriteId - 1.

similar code from AbortedTxnCleaner

 info.highestWriteId = Math.min(
        isNull(validWriteIdList.getMinOpenWriteId()) ?
            Long.MAX_VALUE : validWriteIdList.getMinOpenWriteId() - 1,
        validWriteIdList.getHighWatermark());

1 thing I have doubts is this:

MergeCompactor#getOutputDirPath
long minOpenWriteId = writeIds.getMinOpenWriteId() == null ? 1 : writeIds.getMinOpenWriteId();

minOpenWriteId > highWatermark, unless it's not null, right?

i think that is wrong and should be derived from actual compacted/merged deltas

.collect(Collectors.toList());
if (deltas.isEmpty()) {
String warnMsg = String.format("No %s delta is found below the highWaterMark %s to be added as partition " +
"to the temp external table, used by the minor compaction. This may result an empty compaction directory.",
isDeleteDelta ? "delete" : "", highWatermark);
LOG.warn(warnMsg);
query.setLength(0); // no alter query needed; clear StringBuilder
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ protected HiveConf setUpDriverSession(HiveConf hiveConf) {
* Clean up the empty table dir of 'tmpTableName'.
*/
@Override protected void commitCompaction(String tmpTableName, HiveConf conf) throws IOException, HiveException {
Util.cleanupEmptyTableDir(conf, tmpTableName);
Util.cleanupEmptyTableDir(conf, tmpTableName + "_result");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ public void testAlter() {
queryBuilder.setIsDeleteDelta(true);
String query = queryBuilder.build();
String expectedQuery =
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_2') location '/compaction/test/table/test_delta_2' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
Assert.assertEquals(expectedQuery, query);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ public void testAlter() {
queryBuilder.setIsDeleteDelta(true);
String query = queryBuilder.build();
String expectedQuery =
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_2') location '/compaction/test/table/test_delta_2' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
Assert.assertEquals(expectedQuery, query);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ public void testAlterMajorCompaction() {
queryBuilder.setIsDeleteDelta(true);
String query = queryBuilder.build();
String expectedQuery =
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_2') location '/compaction/test/table/test_delta_2' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
Assert.assertEquals(expectedQuery, query);
}

Expand All @@ -386,7 +386,7 @@ public void testAlterMinorCompaction() {
queryBuilder.setIsDeleteDelta(true);
String query = queryBuilder.build();
String expectedQuery =
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_2') location '/compaction/test/table/test_delta_2' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
Assert.assertEquals(expectedQuery, query);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public void testAlter() {
queryBuilder.setIsDeleteDelta(true);
String query = queryBuilder.build();
String expectedQuery =
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
"ALTER table comp_test_result_table add partition (file_name='test_delta_1') location '/compaction/test/table/test_delta_1' partition (file_name='test_delta_2') location '/compaction/test/table/test_delta_2' partition (file_name='test_delta_3') location '/compaction/test/table/test_delta_3' ";
Assert.assertEquals(expectedQuery, query);
}

Expand Down