Skip to content

Commit 8d63e81

Browse files
authored
Merge pull request #583 from dora-rs/fix-575
Don't wait for non-started dynamic nodes on stop
2 parents 01c4047 + 508311e commit 8d63e81

File tree

2 files changed

+56
-4
lines changed

2 files changed

+56
-4
lines changed

binaries/daemon/src/lib.rs

+37-3
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,9 @@ impl Daemon {
325325
}
326326
Event::CtrlC => {
327327
for dataflow in self.running.values_mut() {
328-
dataflow.stop_all(&self.clock, None).await;
328+
dataflow
329+
.stop_all(&mut self.coordinator_connection, &self.clock, None)
330+
.await?;
329331
}
330332
}
331333
}
@@ -496,7 +498,13 @@ impl Daemon {
496498
.send(Some(reply))
497499
.map_err(|_| error!("could not send stop reply from daemon to coordinator"));
498500

499-
dataflow.stop_all(&self.clock, grace_duration).await;
501+
dataflow
502+
.stop_all(
503+
&mut self.coordinator_connection,
504+
&self.clock,
505+
grace_duration,
506+
)
507+
.await?;
500508
RunStatus::Continue
501509
}
502510
DaemonCoordinatorEvent::Destroy => {
@@ -640,6 +648,10 @@ impl Daemon {
640648
if local {
641649
dataflow.pending_nodes.insert(node.id.clone());
642650

651+
if node.kind.dynamic() {
652+
dataflow.dynamic_nodes.insert(node.id.clone());
653+
}
654+
643655
let node_id = node.id.clone();
644656
let node_stderr_most_recent = dataflow
645657
.node_stderr_most_recent
@@ -1464,6 +1476,12 @@ pub struct RunningDataflow {
14641476
open_inputs: BTreeMap<NodeId, BTreeSet<DataId>>,
14651477
running_nodes: BTreeMap<NodeId, RunningNode>,
14661478

1479+
/// List of all dynamic node IDs.
1480+
///
1481+
/// We want to treat dynamic nodes differently in some cases, so we need
1482+
/// to know which nodes are dynamic.
1483+
dynamic_nodes: BTreeSet<NodeId>,
1484+
14671485
open_external_mappings: HashMap<OutputId, BTreeMap<String, BTreeSet<InputId>>>,
14681486

14691487
pending_drop_tokens: HashMap<DropToken, DropTokenInformation>,
@@ -1495,6 +1513,7 @@ impl RunningDataflow {
14951513
timers: BTreeMap::new(),
14961514
open_inputs: BTreeMap::new(),
14971515
running_nodes: BTreeMap::new(),
1516+
dynamic_nodes: BTreeSet::new(),
14981517
open_external_mappings: HashMap::new(),
14991518
pending_drop_tokens: HashMap::new(),
15001519
_timer_handles: Vec::new(),
@@ -1559,7 +1578,21 @@ impl RunningDataflow {
15591578
Ok(())
15601579
}
15611580

1562-
async fn stop_all(&mut self, clock: &HLC, grace_duration: Option<Duration>) {
1581+
async fn stop_all(
1582+
&mut self,
1583+
coordinator_connection: &mut Option<TcpStream>,
1584+
clock: &HLC,
1585+
grace_duration: Option<Duration>,
1586+
) -> eyre::Result<()> {
1587+
self.pending_nodes
1588+
.handle_dataflow_stop(
1589+
coordinator_connection,
1590+
clock,
1591+
&mut self.cascading_error_causes,
1592+
&self.dynamic_nodes,
1593+
)
1594+
.await?;
1595+
15631596
for (_node_id, channel) in self.subscribe_channels.drain() {
15641597
let _ = send_with_timestamp(&channel, daemon_messages::NodeEvent::Stop, clock);
15651598
}
@@ -1586,6 +1619,7 @@ impl RunningDataflow {
15861619
}
15871620
});
15881621
self.stop_sent = true;
1622+
Ok(())
15891623
}
15901624

15911625
fn open_inputs(&self, node_id: &NodeId) -> &BTreeSet<DataId> {

binaries/daemon/src/pending.rs

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::collections::{HashMap, HashSet};
1+
use std::collections::{BTreeSet, HashMap, HashSet};
22

33
use dora_core::{
44
config::NodeId,
@@ -97,6 +97,24 @@ impl PendingNodes {
9797
Ok(log)
9898
}
9999

100+
pub async fn handle_dataflow_stop(
101+
&mut self,
102+
coordinator_connection: &mut Option<TcpStream>,
103+
clock: &HLC,
104+
cascading_errors: &mut CascadingErrorCauses,
105+
dynamic_nodes: &BTreeSet<NodeId>,
106+
) -> eyre::Result<Vec<LogMessage>> {
107+
// remove all local dynamic nodes that are not yet started
108+
for node_id in dynamic_nodes {
109+
if self.local_nodes.remove(node_id) {
110+
self.update_dataflow_status(coordinator_connection, clock, cascading_errors)
111+
.await?;
112+
}
113+
}
114+
115+
Ok(Vec::new())
116+
}
117+
100118
pub async fn handle_external_all_nodes_ready(
101119
&mut self,
102120
exited_before_subscribe: Vec<NodeId>,

0 commit comments

Comments
 (0)