Skip to content

Commit

Permalink
[improvement](statistics)Support get index row count and table delta …
Browse files Browse the repository at this point in the history
…rows. (#38492)

Provide get row count for a given index id and get table delta rows API
in TableStatsMeta class. This is for Nereids stats calculator to fetch
row count.
  • Loading branch information
Jibing-Li authored and dataroaring committed Aug 6, 2024
1 parent b295cec commit a4853a6
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 7 deletions.
7 changes: 6 additions & 1 deletion fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4599,7 +4599,12 @@ show_param ::=
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames opt_col_list:cols
{:
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached, null);
:}
/* show index stats */
| KW_INDEX KW_STATS table_name:tbl ident:id
{:
RESULT = new ShowTableStatsStmt(tbl, null, null, false, id);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ public void analyze(Analyzer analyzer) throws UserException {
throw new AnalysisException(optional.get() + " is invalid statistics");
}

if (!properties.containsKey(StatsType.ROW_COUNT.getValue())) {
throw new AnalysisException("Set column stats must set row_count. e.g. 'row_count'='5'");
}

// get statsTypeToValue
properties.forEach((key, value) -> {
StatsType statsType = StatsType.fromString(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("row_count")
.build();

private static final ImmutableList<String> INDEX_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("table_name")
.add("index_name")
.add("row_count")
.build();

private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("index_name")
Expand All @@ -82,15 +89,17 @@ public class ShowTableStatsStmt extends ShowStmt {
private final List<String> columnNames;
private final PartitionNames partitionNames;
private final boolean cached;
private final String indexName;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
PartitionNames partitionNames, boolean cached) {
PartitionNames partitionNames, boolean cached, String indexName) {
this.tableName = tableName;
this.columnNames = columnNames;
this.partitionNames = partitionNames;
this.cached = cached;
this.indexName = indexName;
}

public TableName getTableName() {
Expand Down Expand Up @@ -141,8 +150,10 @@ public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();

ImmutableList<String> titles;
// If columnNames != null, partitionNames is also not null. Guaranteed in analyze()
if (columnNames != null) {
if (indexName != null) {
titles = INDEX_TITLE_NAMES;
} else if (columnNames != null) {
// If columnNames != null, partitionNames is also not null. Guaranteed in analyze()
titles = COLUMN_PARTITION_TITLE_NAMES;
} else if (partitionNames != null) {
titles = PARTITION_TITLE_NAMES;
Expand All @@ -160,6 +171,9 @@ public TableIf getTable() {
}

public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (indexName != null) {
return constructIndexResultSet(tableStatistic);
}
if (partitionNames == null) {
return constructTableResultSet(tableStatistic);
}
Expand Down Expand Up @@ -238,6 +252,28 @@ public ShowResultSet constructPartitionResultSet(TableStatsMeta tableStatistic)
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
}
OlapTable olapTable = (OlapTable) table;
Long indexId = olapTable.getIndexIdByName(indexName);
if (indexId == null) {
throw new RuntimeException(String.format("Index %s not exist.", indexName));
}
long rowCount = tableStatistic.getRowCount(olapTable.getIndexIdByName(indexName));
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

public class AnalysisInfo implements Writable {

Expand Down Expand Up @@ -196,6 +197,8 @@ public enum ScheduleType {
@SerializedName("ep")
public final boolean enablePartition;

public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
Set<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMethod analysisMethod, AnalysisType analysisType,
Expand Down Expand Up @@ -350,4 +353,8 @@ public void markFailed() {
public TableIf getTable() {
return StatisticsUtil.findTable(catalogId, dbId, tblId);
}

public void addIndexRowCount(long indexId, long rowCount) {
indexesRowCount.put(indexId, rowCount);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,14 @@ protected void runQuery(String sql) {
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
// Update index row count after analyze.
if (this instanceof OlapAnalysisTask) {
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
// For sync job, get jobInfo from job.jobInfo.
jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId;
jobInfo.addIndexRowCount(indexId, colStatsData.count);
}
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
job.appendBuf(this, Collections.singletonList(colStatsData));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.analysis.TableName;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
Expand Down Expand Up @@ -313,12 +314,13 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
long indexId = alterColumnStatsStmt.getIndexId();
if (rowCount == null) {
throw new RuntimeException("Row count is null.");
}
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
if (rowCount != null) {
builder.setCount(Double.parseDouble(rowCount));
}
builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
Expand Down Expand Up @@ -372,10 +374,15 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
.setRowCount((long) Double.parseDouble(rowCount))
.setJobColumns(Sets.newHashSet())
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
if (objects.table instanceof OlapTable) {
indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId;
mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount));
}
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
} else {
// update partition granularity statistics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
Expand Down Expand Up @@ -82,6 +85,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
@SerializedName("pur")
public ConcurrentMap<Long, Long> partitionUpdateRows = new ConcurrentHashMap<>();

@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
Expand Down Expand Up @@ -154,6 +160,10 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
}
jobType = analyzedJob.jobType;
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
}
rowCount = analyzedJob.rowCount;
if (rowCount == 0 && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
return;
Expand All @@ -178,5 +188,37 @@ public void gsonPostProcess() throws IOException {
if (partitionUpdateRows == null) {
partitionUpdateRows = new ConcurrentHashMap<>();
}
if (indexesRowCount == null) {
indexesRowCount = new ConcurrentHashMap<>();
}
}

public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}

private void clearStaleIndexRowCount(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
while (iterator.hasNext()) {
long key = iterator.next();
if (indexIds.contains(key)) {
iterator.remove();
}
}
}

public long getBaseIndexDeltaRowCount(OlapTable table) {
if (colToColStatsMeta == null) {
return -1;
}
long maxUpdateRows = 0;
String baseIndexName = table.getIndexNameById(table.getBaseIndexId());
for (Map.Entry<Pair<String, String>, ColStatsMeta> entry : colToColStatsMeta.entrySet()) {
if (entry.getKey().first.equals(baseIndexName) && entry.getValue().updatedRows > maxUpdateRows) {
maxUpdateRows = entry.getValue().updatedRows;
}
}
return updatedRows.get() - maxUpdateRows;
}
}
39 changes: 39 additions & 0 deletions regression-test/suites/statistics/test_analyze_mv.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,28 @@ suite("test_analyze_mv") {

sql """analyze table mvTestDup with sync;"""

// Test show index row count
def result_row = sql """show index stats mvTestDup mvTestDup"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv1"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv2"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv2", result_row[0][1])
assertEquals("6", result_row[0][2])
result_row = sql """show index stats mvTestDup mv3"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("4", result_row[0][2])

// Compare show whole table column stats result with show single column.
def result_all = sql """show column stats mvTestDup"""
assertEquals(12, result_all.size())
Expand Down Expand Up @@ -411,6 +433,23 @@ suite("test_analyze_mv") {
assertEquals("4001", result_sample[0][8])
assertEquals("FULL", result_sample[0][9])

// Test alter table index row count.
sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mvTestDup;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("150000000", result_row[0][2])
sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv1;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("3443", result_row[0][2])
sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv3;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("234234", result_row[0][2])

sql """drop stats mvTestDup"""
result_sample = sql """show column stats mvTestDup"""
assertEquals(0, result_sample.size())
Expand Down

0 comments on commit a4853a6

Please sign in to comment.