Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e496e0d
First step optimizing tsdb doc values codec merging.
martijnvg Mar 21, 2025
9bd2907
[CI] Auto commit changes from spotless
Mar 21, 2025
65d97e5
actually use OrdinalMap when merging sorted and sorted dv
martijnvg Mar 21, 2025
7369a22
fix test
martijnvg Mar 21, 2025
3b7822d
[CI] Auto commit changes from spotless
Mar 21, 2025
ce4b326
fix test (2)
martijnvg Mar 21, 2025
486ea20
fix lost of stuff
martijnvg Mar 21, 2025
16c0a00
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 21, 2025
984513a
iter
martijnvg Mar 24, 2025
5a575d6
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 24, 2025
3b53705
iter test
martijnvg Mar 24, 2025
9fb38b6
moving code around
martijnvg Mar 24, 2025
1e0e2f8
benchmark iter
martijnvg Mar 25, 2025
65741c4
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 25, 2025
1ec6308
Check for deleted docs before getting doc value instances.
martijnvg Mar 25, 2025
ccae570
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 25, 2025
5e7cc11
remove doc value skipper check
martijnvg Mar 25, 2025
744a665
Remove getEntryFunction lamda and delegate to doc value instance dire…
martijnvg Mar 25, 2025
176fac7
lower doc count in benchmark
martijnvg Mar 25, 2025
ec998a3
added node setting to control whether optimized merge is enabled.
martijnvg Mar 25, 2025
5425079
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 25, 2025
066b778
Update docs/changelog/125403.yaml
martijnvg Mar 25, 2025
722f85e
[CI] Auto commit changes from spotless
Mar 25, 2025
2bb9867
register node setting
martijnvg Mar 25, 2025
5bcc62c
fix npe
martijnvg Mar 25, 2025
27efdd2
iter
martijnvg Mar 26, 2025
646c566
Revert node setting for jvm env variable.
martijnvg Mar 26, 2025
98c0874
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 26, 2025
71201c8
more tests
martijnvg Mar 27, 2025
c41b1f9
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_3
martijnvg Mar 27, 2025
e6fe87a
iter
martijnvg Mar 27, 2025
020bae7
remove unused field
martijnvg Mar 27, 2025
d8b3c15
fixed bug
martijnvg Mar 27, 2025
a63e853
addresses
martijnvg Mar 26, 2025
fae99b5
disi
martijnvg Mar 26, 2025
6208339
Make it really work:
martijnvg Mar 28, 2025
621ec0d
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_5
martijnvg Mar 29, 2025
44e8bb4
Merge remote-tracking branch 'es/main' into mergeSortedNumericField_5
martijnvg Mar 31, 2025
e370f43
IndexedDISIBuilder
martijnvg Mar 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.benchmark.index.codec.tsdb;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.profile.AsyncProfiler;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.io.IOException;
import java.nio.file.Files;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

@BenchmarkMode(Mode.SampleTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
@Fork(1)
@Threads(1)
@Warmup(iterations = 0)
@Measurement(iterations = 1)
public class TSDBDocValuesMergeBenchmark {

@Param("20431204")
private int nDocs;

@Param("1000")
private int deltaTime;

@Param("42")
private int seed;

private static final String TIMESTAMP_FIELD = "@timestamp";
private static final String HOSTNAME_FIELD = "host.name";
private static final long BASE_TIMESTAMP = 1704067200000L;

private IndexWriter indexWriterWithoutOptimizedMerge;
private IndexWriter indexWriterWithOptimizedMerge;
private ExecutorService executorService;

public static void main(String[] args) throws RunnerException {
final Options options = new OptionsBuilder().include(TSDBDocValuesMergeBenchmark.class.getSimpleName())
.addProfiler(AsyncProfiler.class)
.build();

new Runner(options).run();
}

@Setup(Level.Trial)
public void setup() throws IOException {
executorService = Executors.newSingleThreadExecutor();

final Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-"));
final Directory tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-"));

indexWriterWithoutOptimizedMerge = createIndex(tempDirectoryWithoutDocValuesSkipper, false);
indexWriterWithOptimizedMerge = createIndex(tempDirectoryWithDocValuesSkipper, true);
}

private IndexWriter createIndex(final Directory directory, final boolean optimizedMergeEnabled) throws IOException {

final IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
// NOTE: index sort config matching LogsDB's sort order
config.setIndexSort(
new Sort(
new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false),
new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true)
)
);
ES87TSDBDocValuesFormat docValuesFormat = new ES87TSDBDocValuesFormat(4096, optimizedMergeEnabled);
config.setCodec(new Lucene101Codec() {

@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return docValuesFormat;
}
});

long counter1 = 0;
long counter2 = 10_000_000;
long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 };
long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 };
int numHosts = 1000;
String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" };

final Random random = new Random(seed);
IndexWriter indexWriter = new IndexWriter(directory, config);
for (int i = 0; i < nDocs; i++) {
final Document doc = new Document();

final int batchIndex = i / numHosts;
final String hostName = "host-" + batchIndex;
// Slightly vary the timestamp in each document
final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime);

doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName)));
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp));
doc.add(new SortedNumericDocValuesField("counter_1", counter1++));
doc.add(new SortedNumericDocValuesField("counter_2", counter2++));
doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length]));
doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length]));
int numTags = tags.length % (i + 1);
for (int j = 0; j < numTags; j++) {
doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j])));
}

indexWriter.addDocument(doc);
}
indexWriter.commit();
return indexWriter;
}

@Benchmark
public void forceMergeWithoutOptimizedMerge() throws IOException {
forceMerge(indexWriterWithoutOptimizedMerge);
}

@Benchmark
public void forceMergeWithOptimizedMerge() throws IOException {
forceMerge(indexWriterWithOptimizedMerge);
}

private void forceMerge(final IndexWriter indexWriter) throws IOException {
indexWriter.forceMerge(1);
}

@TearDown(Level.Trial)
public void tearDown() {
if (executorService != null) {
executorService.shutdown();
try {
if (executorService.awaitTermination(30, TimeUnit.SECONDS) == false) {
executorService.shutdownNow();
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
}
}
}
}
5 changes: 5 additions & 0 deletions docs/changelog/125403.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 125403
summary: First step optimizing tsdb doc values codec merging
area: Codec
type: enhancement
issues: []
Loading
Loading