Skip to content

Commit

Permalink
tests: Reproduce #18089 in robustness tests
Browse files Browse the repository at this point in the history
1) Use SleepBeforeSendWatchResponse failpoint to simulate slow watch
2) Decrease compact period from 200ms to 100ms to increase the probability of compacting on Delete
3) Introduce a new traffic pattern of 50/50 Put and Delete

With these three changes the `make test-robustness-issue18089` command can reproduce issue 18089.

Signed-off-by: Jiayin Mao <[email protected]>
  • Loading branch information
jmao-dd committed Jan 16, 2025
1 parent 75f2ae1 commit 7590a7e
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 8 deletions.
2 changes: 1 addition & 1 deletion tests/robustness/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ The purpose of these tests is to rigorously validate that etcd maintains its [KV
| Duplicated watch event due to bug in TXN caching [#17247] | Jan 2024 | main branch | Robustness | Yes, prevented regression in v3.6 | |
| Watch events lost during stream starvation [#17529] | Mar 2024 | v3.4 or earlier | User | Yes, after covering of slow watch | `make test-robustness-issue17529` |
| Revision decreasing caused by crash during compaction [#17780] | Apr 2024 | v3.4 or earlier | Robustness | Yes, after covering compaction | |
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | |
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue18089` |
| Inconsistency when reading compacted revision in TXN [#18667] | Oct 2024 | v3.4 or earlier | User | | |

[#13766]: https://github.com/etcd-io/etcd/issues/13766
Expand Down
2 changes: 1 addition & 1 deletion tests/robustness/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ var testRunner = framework.E2eTestRunner

var (
WaitBeforeFailpoint = time.Second
WaitJitter = traffic.CompactionPeriod
WaitJitter = traffic.DefaultCompactionPeriod
WaitAfterFailpoint = time.Second
)

Expand Down
5 changes: 5 additions & 0 deletions tests/robustness/makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ test-robustness-issue17780: /tmp/etcd-v3.5.13-compactBeforeSetFinishedCompact/bi
GO_TEST_FLAGS='-v --run=TestRobustnessRegression/Issue17780 --count 200 --failfast --bin-dir=/tmp/etcd-v3.5.13-compactBeforeSetFinishedCompact/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

.PHONY: test-robustness-issue18089
test-robustness-issue18089: /tmp/etcd-v3.5.12-beforeSendWatchResponse/bin
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/Issue18089 -count 100 -failfast --bin-dir=/tmp/etcd-v3.5.12-beforeSendWatchResponse/bin' make test-robustness && \
echo "Failed to reproduce" || echo "Successful reproduction"

# Failpoints

GOPATH = $(shell go env GOPATH)
Expand Down
10 changes: 10 additions & 0 deletions tests/robustness/scenarios/scenarios.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,16 @@ func Regression(t *testing.T) []TestScenario {
e2e.WithGoFailEnabled(true),
),
})
scenarios = append(scenarios, TestScenario{
Name: "Issue18089",
Profile: traffic.LowTraffic.WithCompactionPeriod(100 * time.Millisecond), // Use frequent compaction for high reproduce rate
Failpoint: failpoint.SleepBeforeSendWatchResponse,
Traffic: traffic.EtcdDelete,
Cluster: *e2e.NewConfig(
e2e.WithClusterSize(1),
e2e.WithGoFailEnabled(true),
),
})
if v.Compare(version.V3_5) >= 0 {
opts := []e2e.EPClusterOption{
e2e.WithSnapshotCount(100),
Expand Down
10 changes: 10 additions & 0 deletions tests/robustness/traffic/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ var (
{Choice: Put, Weight: 40},
},
}
EtcdDelete Traffic = etcdTraffic{
keyCount: 10,
largePutSize: 32769,
leaseTTL: DefaultLeaseTTL,
// Please keep the sum of weights equal 100.
requests: []random.ChoiceWeight[etcdRequestType]{
{Choice: Put, Weight: 50},
{Choice: Delete, Weight: 50},
},
}
)

type etcdTraffic struct {
Expand Down
22 changes: 16 additions & 6 deletions tests/robustness/traffic/traffic.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ import (
)

var (
DefaultLeaseTTL int64 = 7200
RequestTimeout = 200 * time.Millisecond
WatchTimeout = time.Second
MultiOpTxnOpCount = 4
CompactionPeriod = 200 * time.Millisecond
DefaultLeaseTTL int64 = 7200
RequestTimeout = 200 * time.Millisecond
WatchTimeout = time.Second
MultiOpTxnOpCount = 4
DefaultCompactionPeriod = 200 * time.Millisecond

LowTraffic = Profile{
MinimalQPS: 100,
Expand Down Expand Up @@ -96,7 +96,11 @@ func SimulateTraffic(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2
defer wg.Done()
defer c.Close()

RunCompactLoop(ctx, c, CompactionPeriod, finish)
compactionPeriod := DefaultCompactionPeriod
if profile.CompactPeriod != time.Duration(0) {
compactionPeriod = profile.CompactPeriod
}
RunCompactLoop(ctx, c, compactionPeriod, finish)
mux.Lock()
reports = append(reports, c.Report())
mux.Unlock()
Expand Down Expand Up @@ -176,13 +180,19 @@ type Profile struct {
MaxNonUniqueRequestConcurrency int
ClientCount int
ForbidCompaction bool
CompactPeriod time.Duration
}

func (p Profile) WithoutCompaction() Profile {
p.ForbidCompaction = true
return p
}

func (p Profile) WithCompactionPeriod(cp time.Duration) Profile {
p.CompactPeriod = cp
return p
}

type Traffic interface {
Run(ctx context.Context, c *client.RecordingClient, qpsLimiter *rate.Limiter, ids identity.Provider, lm identity.LeaseIDStorage, nonUniqueWriteLimiter ConcurrencyLimiter, finish <-chan struct{})
ExpectUniqueRevision() bool
Expand Down

0 comments on commit 7590a7e

Please sign in to comment.