|
| 1 | +// Copyright 2021 Datafuse Labs |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use std::time::Duration; |
| 16 | + |
| 17 | +use databend_common_meta_kvapi::kvapi::KVApi; |
| 18 | +use databend_common_meta_types::raft_types::new_log_id; |
| 19 | +use databend_common_meta_types::Cmd; |
| 20 | +use databend_common_meta_types::LogEntry; |
| 21 | +use databend_common_meta_types::SeqV; |
| 22 | +use databend_common_meta_types::TxnOp; |
| 23 | +use databend_common_meta_types::TxnRequest; |
| 24 | +use databend_meta::meta_service::MetaNode; |
| 25 | +use log::info; |
| 26 | +use maplit::btreeset; |
| 27 | +use test_harness::test; |
| 28 | +use tokio::time::sleep; |
| 29 | + |
| 30 | +use crate::testing::meta_service_test_harness; |
| 31 | +use crate::tests::meta_node::start_meta_node_cluster; |
| 32 | +use crate::tests::meta_node::timeout; |
| 33 | + |
| 34 | +/// Tests timestamp-based expiration consistency across snapshot boundaries. |
| 35 | +/// |
| 36 | +/// **Scenario:** |
| 37 | +/// - Log-1 has timestamp T+180s, included in snapshot |
| 38 | +/// - Log-2 has timestamp T+60s with expiration T+120s, not in snapshot |
| 39 | +/// |
| 40 | +/// **Issue:** After restart, Log-2 uses current time instead of last-seen time (from Log-1) |
| 41 | +/// for expiration checks, potentially causing state inconsistencies between nodes. |
| 42 | +/// |
| 43 | +/// This bug raises in real world if: |
| 44 | +/// - When meta-service restarts, there is one log(`A`) to re-apply that has smaller `expire_at`(`t1`) |
| 45 | +/// than the biggest timestamp(`LogEntry.time_ms`) in the snapshot, |
| 46 | +/// - and all the log entries before `A` have smaller timestamp(`LogEntry.time_ms`) than `t1`. |
| 47 | +#[test(harness = meta_service_test_harness)] |
| 48 | +#[fastrace::trace] |
| 49 | +async fn test_meta_node_log_time_revert_cross_snapshot_boundary() -> anyhow::Result<()> { |
| 50 | + let now_ms = SeqV::<()>::now_ms(); |
| 51 | + |
| 52 | + // Log with later timestamp (T+180s) - will be included in snapshot |
| 53 | + let log_later = LogEntry { |
| 54 | + txid: None, |
| 55 | + time_ms: Some(now_ms + 180_000), |
| 56 | + cmd: Cmd::Transaction(TxnRequest::new(vec![], vec![TxnOp::put( |
| 57 | + "k1", |
| 58 | + b"v1".to_vec(), |
| 59 | + )])), |
| 60 | + }; |
| 61 | + |
| 62 | + // Log with earlier timestamp (T+60s) but expires at T+120s - not in snapshot |
| 63 | + let log_earlier = LogEntry { |
| 64 | + txid: None, |
| 65 | + time_ms: Some(now_ms + 60_000), |
| 66 | + cmd: Cmd::Transaction(TxnRequest::new(vec![], vec![TxnOp::put( |
| 67 | + "k1", |
| 68 | + b"v2".to_vec(), |
| 69 | + ) |
| 70 | + .with_expires_at_ms(Some(now_ms + 120_000))])), |
| 71 | + }; |
| 72 | + |
| 73 | + let result_with_restart = write_two_logs(log_later.clone(), log_earlier.clone(), true).await?; |
| 74 | + let result_without_restart = write_two_logs(log_later, log_earlier, false).await?; |
| 75 | + |
| 76 | + assert_eq!( |
| 77 | + result_with_restart.is_some(), |
| 78 | + result_without_restart.is_some() |
| 79 | + ); |
| 80 | + |
| 81 | + Ok(()) |
| 82 | +} |
| 83 | + |
| 84 | +/// Writes two logs with optional snapshot and restart between them. |
| 85 | +/// |
| 86 | +/// Tests how state machine behavior differs when last-seen time context is lost |
| 87 | +/// during restart. Returns the final value of key "k1". |
| 88 | +async fn write_two_logs( |
| 89 | + first_log: LogEntry, |
| 90 | + second_log: LogEntry, |
| 91 | + restart: bool, |
| 92 | +) -> anyhow::Result<Option<SeqV>> { |
| 93 | + info!("Testing log sequence with restart={}", restart); |
| 94 | + |
| 95 | + let (mut log_index, mut test_contexts) = |
| 96 | + start_meta_node_cluster(btreeset![0], btreeset![]).await?; |
| 97 | + |
| 98 | + let mut tc0 = test_contexts.remove(0); |
| 99 | + let mut meta_node = tc0.meta_node.take().unwrap(); |
| 100 | + |
| 101 | + // Apply first log |
| 102 | + meta_node.raft.client_write(first_log).await?; |
| 103 | + log_index += 1; |
| 104 | + |
| 105 | + // Restart node with snapshot if requested |
| 106 | + if restart { |
| 107 | + info!("Taking snapshot at log_index={}", log_index); |
| 108 | + meta_node.raft.trigger().snapshot().await?; |
| 109 | + meta_node |
| 110 | + .raft |
| 111 | + .wait(timeout()) |
| 112 | + .snapshot(new_log_id(1, 0, log_index), "purged") |
| 113 | + .await?; |
| 114 | + |
| 115 | + info!("Restarting meta-node"); |
| 116 | + meta_node.stop().await?; |
| 117 | + drop(meta_node); |
| 118 | + |
| 119 | + sleep(Duration::from_secs(2)).await; |
| 120 | + |
| 121 | + meta_node = MetaNode::open(&tc0.config.raft_config).await?; |
| 122 | + } |
| 123 | + |
| 124 | + // Apply second log |
| 125 | + meta_node.raft.client_write(second_log).await?; |
| 126 | + |
| 127 | + // Check final state |
| 128 | + let result = meta_node |
| 129 | + .raft_store |
| 130 | + .state_machine |
| 131 | + .read() |
| 132 | + .await |
| 133 | + .kv_api() |
| 134 | + .get_kv("k1") |
| 135 | + .await?; |
| 136 | + |
| 137 | + info!("Final state for k1 (restart={}): {:?}", restart, result); |
| 138 | + |
| 139 | + Ok(result) |
| 140 | +} |
0 commit comments