mina_node_testing/cluster/
mod.rs

1//! Cluster Management for Multi-Node Testing
2//!
3//! This module provides the core infrastructure for managing clusters of
4//! Mina nodes during testing scenarios. It supports both Rust and OCaml
5//! node implementations, enabling cross-implementation testing and complex
6//! multi-node scenarios.
7//!
8//! # Key Components
9//!
10//! - [`Cluster`] - Main cluster coordinator managing node lifecycle
11//! - Node addition methods for different node types
12//! - Port allocation and resource management
13//! - Scenario execution and state tracking
14//! - Network debugger integration
15//!
16//! # Node Addition Methods
17//!
18//! - [`Cluster::add_rust_node`] - Add Rust implementation nodes
19//! - [`Cluster::add_ocaml_node`] - Add OCaml implementation nodes
20//!
21//! # Example
22//!
23//! ```rust,no_run
24//! let mut cluster = Cluster::new(ClusterConfig::default());
25//!
26//! // Add Rust node with custom configuration
27//! let rust_node = cluster.add_rust_node(RustNodeTestingConfig::default());
28//!
29//! // Add OCaml node for cross-implementation testing
30//! let ocaml_node = cluster.add_ocaml_node(OcamlNodeTestingConfig::default());
31//! ```
32
33mod config;
34pub use config::{ClusterConfig, ProofKind};
35
36mod p2p_task_spawner;
37
38mod node_id;
39use mina_core::channels::Aborter;
40pub use node_id::{ClusterNodeId, ClusterOcamlNodeId};
41
42pub mod runner;
43
44use std::{
45    collections::{BTreeMap, VecDeque},
46    io::Read,
47    path::{Path, PathBuf},
48    sync::{Arc, Mutex as StdMutex},
49    time::Duration,
50};
51
52use libp2p::futures::{stream::FuturesUnordered, StreamExt};
53
54use ledger::proofs::provers::BlockProver;
55use mina_node_invariants::{InvariantResult, Invariants};
56use mina_node_native::{http_server, NodeServiceBuilder};
57use node::{
58    account::{AccountPublicKey, AccountSecretKey},
59    core::{
60        consensus::ConsensusConstants,
61        constants::constraint_constants,
62        invariants::InvariantsState,
63        log::{info, system_time, warn},
64        requests::RpcId,
65        thread,
66    },
67    event_source::Event,
68    p2p::{
69        channels::ChannelId, identity::SecretKey as P2pSecretKey, P2pConnectionEvent, P2pEvent,
70        P2pLimits, P2pMeshsubConfig, PeerId,
71    },
72    service::{Recorder, Service},
73    snark::{get_srs, BlockVerifier, TransactionVerifier, VerifierSRS},
74    BuildEnv, Config, GlobalConfig, LedgerConfig, P2pConfig, SnarkConfig, State,
75    TransitionFrontierConfig,
76};
77use serde::{de::DeserializeOwned, Serialize};
78use temp_dir::TempDir;
79
80use crate::{
81    network_debugger::Debugger,
82    node::{
83        DaemonJson, Node, NodeTestingConfig, NonDeterministicEvent, OcamlNode, OcamlNodeConfig,
84        OcamlNodeTestingConfig, OcamlStep, RustNodeTestingConfig, TestPeerId,
85    },
86    scenario::{ListenerNode, Scenario, ScenarioId, ScenarioStep},
87    service::{NodeTestingService, PendingEventId},
88};
89
90#[allow(dead_code)]
91fn mina_path<P: AsRef<Path>>(path: P) -> Option<PathBuf> {
92    std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".cache/mina").join(path))
93}
94
95#[allow(dead_code)]
96fn read_index<T: DeserializeOwned>(name: &str) -> Option<T> {
97    mina_path(name)
98        .and_then(|path| {
99            if !path.exists() {
100                return None;
101            }
102            match std::fs::File::open(path) {
103                Ok(v) => Some(v),
104                Err(e) => {
105                    warn!(system_time(); "cannot find verifier index for {name}: {e}");
106                    None
107                }
108            }
109        })
110        .and_then(|mut file| {
111            let mut buf = Vec::new();
112            file.read_to_end(&mut buf).ok().and(Some(buf))
113        })
114        .and_then(|bytes| match postcard::from_bytes(&bytes) {
115            Ok(v) => Some(v),
116            Err(e) => {
117                warn!(system_time(); "cannot read verifier index for {name}: {e}");
118                None
119            }
120        })
121}
122
123#[allow(dead_code)]
124fn write_index<T: Serialize>(name: &str, index: &T) -> Option<()> {
125    mina_path(name)
126        .and_then(|path| {
127            let Some(parent) = path.parent() else {
128                warn!(system_time(); "cannot get parent for {path:?}");
129                return None;
130            };
131            if let Err(e) = std::fs::create_dir_all(parent) {
132                warn!(system_time(); "cannot create parent dir for {parent:?}: {e}");
133                return None;
134            }
135            match std::fs::File::create(&path) {
136                Ok(v) => Some(v),
137                Err(e) => {
138                    warn!(system_time(); "cannot create file {path:?}: {e}");
139                    None
140                }
141            }
142        })
143        .and_then(|file| match postcard::to_io(index, file) {
144            Ok(_) => Some(()),
145            Err(e) => {
146                warn!(system_time(); "cannot write verifier index for {name}: {e}");
147                None
148            }
149        })
150}
151
152lazy_static::lazy_static! {
153    static ref VERIFIER_SRS: Arc<VerifierSRS> = get_srs();
154}
155
156/// Manages a cluster of Mina nodes for testing scenarios.
157///
158/// The `Cluster` struct coordinates multiple node instances, handling
159/// resource allocation, configuration, and lifecycle management. It supports
160/// both Rust and OCaml node implementations for comprehensive testing.
161///
162/// # Default Behaviors
163///
164/// - **Port allocation**: Automatically assigns available ports from the
165///   configured range, testing availability before assignment
166/// - **Keypair management**: Uses deterministic keypairs for Rust nodes and
167///   rotates through predefined keypairs for OCaml nodes
168/// - **Resource isolation**: Each node gets isolated temporary directories
169/// - **Verifier indices**: Shared verifier SRS and indices across all nodes
170/// - **Network debugging**: Optional debugger integration for CI environments
171///
172/// # Node Addition
173///
174/// The cluster provides specialized methods for adding different node types:
175/// - Rust nodes via [`add_rust_node`](Self::add_rust_node)
176/// - OCaml nodes via [`add_ocaml_node`](Self::add_ocaml_node)
177pub struct Cluster {
178    /// Cluster-wide configuration settings
179    pub config: ClusterConfig,
180    /// Current scenario execution state
181    scenario: ClusterScenarioRun,
182    /// Iterator over available ports for node allocation
183    available_ports: Box<dyn Iterator<Item = u16> + Send>,
184    /// Registry of account secret keys for deterministic testing
185    account_sec_keys: BTreeMap<AccountPublicKey, AccountSecretKey>,
186    /// Collection of active Rust nodes
187    nodes: Vec<Node>,
188    /// Collection of active OCaml nodes (Option for lifecycle management)
189    ocaml_nodes: Vec<Option<OcamlNode>>,
190    /// Genesis timestamp for deterministic time progression
191    initial_time: Option<redux::Timestamp>,
192
193    /// Counter for generating unique RPC request IDs
194    rpc_counter: usize,
195    /// Index for rotating OCaml LibP2P keypairs
196    ocaml_libp2p_keypair_i: usize,
197
198    /// Shared verifier SRS for proof verification
199    verifier_srs: Arc<VerifierSRS>,
200    /// Block verifier index for consensus validation
201    block_verifier_index: BlockVerifier,
202    /// Transaction verifier index for transaction validation
203    work_verifier_index: TransactionVerifier,
204
205    /// Optional network traffic debugger
206    debugger: Option<Debugger>,
207    /// Shared state for invariant checking across nodes
208    invariants_state: Arc<StdMutex<InvariantsState>>,
209}
210
211/// Tracks the execution state of scenario chains within a cluster.
212///
213/// Manages the progression through scenario steps and maintains history
214/// of completed scenarios for debugging and analysis.
215#[derive(Serialize)]
216pub struct ClusterScenarioRun {
217    /// Queue of scenarios to be executed (supports scenario inheritance)
218    chain: VecDeque<Scenario>,
219    /// History of completed scenarios
220    finished: Vec<Scenario>,
221    /// Current step index within the active scenario
222    cur_step: usize,
223}
224
225impl Cluster {
226    pub fn new(config: ClusterConfig) -> Self {
227        let available_ports = config
228            .port_range()
229            .filter(|port| std::net::TcpListener::bind(("0.0.0.0", *port)).is_ok());
230        let debugger = if config.is_use_debugger() {
231            Some(Debugger::drone_ci())
232        } else {
233            None
234        };
235        Self {
236            config,
237            scenario: ClusterScenarioRun {
238                chain: Default::default(),
239                finished: Default::default(),
240                cur_step: 0,
241            },
242            available_ports: Box::new(available_ports),
243            account_sec_keys: Default::default(),
244            nodes: Vec::new(),
245            ocaml_nodes: Vec::new(),
246            initial_time: None,
247
248            rpc_counter: 0,
249            ocaml_libp2p_keypair_i: 0,
250
251            verifier_srs: VERIFIER_SRS.clone(),
252            block_verifier_index: BlockVerifier::make(),
253            work_verifier_index: TransactionVerifier::make(),
254
255            debugger,
256            invariants_state: Arc::new(StdMutex::new(Default::default())),
257        }
258    }
259
260    pub fn available_port(&mut self) -> Option<u16> {
261        self.available_ports.next()
262    }
263
264    pub fn add_account_sec_key(&mut self, sec_key: AccountSecretKey) {
265        self.account_sec_keys.insert(sec_key.public_key(), sec_key);
266    }
267
268    pub fn get_account_sec_key(&self, pub_key: &AccountPublicKey) -> Option<&AccountSecretKey> {
269        self.account_sec_keys.get(pub_key).or_else(|| {
270            AccountSecretKey::deterministic_iter().find(|sec_key| &sec_key.public_key() == pub_key)
271        })
272    }
273
274    pub fn set_initial_time(&mut self, initial_time: redux::Timestamp) {
275        self.initial_time = Some(initial_time)
276    }
277
278    pub fn get_initial_time(&self) -> Option<redux::Timestamp> {
279        self.initial_time
280    }
281
282    /// Add a new Rust implementation node to the cluster.
283    ///
284    /// Creates and configures a Rust Mina node with the specified testing
285    /// configuration. This method handles all aspects of node initialization
286    /// including port allocation, key generation, service setup, and state
287    /// initialization.
288    ///
289    /// # Default Behaviors
290    ///
291    /// - **Port allocation**: HTTP and LibP2P ports automatically assigned
292    ///   from available port range
293    /// - **Peer identity**: Deterministic LibP2P keypair based on node index
294    /// - **Work directory**: Isolated temporary directory per node
295    /// - **Invariants**: Automatic invariant checking enabled
296    /// - **HTTP server**: Spawned on separate thread for API access
297    /// - **Proof verification**: Shared verifier indices and SRS
298    ///
299    /// # Configuration Options
300    ///
301    /// - `peer_id`: Deterministic or custom LibP2P identity
302    /// - `libp2p_port`: Custom P2P port (auto-assigned if None)
303    /// - `initial_peers`: Peer connection targets (supports node references)
304    /// - `block_producer`: Optional block production configuration
305    /// - `genesis`: Genesis ledger and protocol constants
306    /// - `snark_worker`: SNARK work generation settings
307    ///
308    /// # Returns
309    ///
310    /// Returns a [`ClusterNodeId`] that can be used to reference this node
311    /// in scenarios and for inter-node connections.
312    ///
313    /// # Panics
314    ///
315    /// Panics if:
316    /// - No available ports in the configured range
317    /// - Node service initialization fails
318    /// - Invalid genesis configuration
319    pub fn add_rust_node(&mut self, testing_config: RustNodeTestingConfig) -> ClusterNodeId {
320        let rng_seed = [0; 32];
321        let node_config = testing_config.clone();
322        let node_id = ClusterNodeId::new_unchecked(self.nodes.len());
323
324        info!(
325            system_time();
326            "Adding Rust node {} with config: max_peers={}, snark_worker={:?}, \
327             block_producer={}",
328            node_id.index(),
329            testing_config.max_peers,
330            testing_config.snark_worker,
331            testing_config.block_producer.is_some()
332        );
333
334        let work_dir = TempDir::new().unwrap();
335        let shutdown_initiator = Aborter::default();
336        let shutdown_listener = shutdown_initiator.aborted();
337        let p2p_sec_key = match testing_config.peer_id {
338            TestPeerId::Derived => {
339                info!(system_time(); "Using deterministic peer ID for node {}", node_id.index());
340                P2pSecretKey::deterministic(node_id.index())
341            }
342            TestPeerId::Bytes(bytes) => {
343                info!(system_time(); "Using custom peer ID for node {}", node_id.index());
344                P2pSecretKey::from_bytes(bytes)
345            }
346        };
347
348        let http_port = self
349            .available_ports
350            .next()
351            .ok_or_else(|| {
352                anyhow::anyhow!(
353                    "couldn't find available port in port range: {:?}",
354                    self.config.port_range()
355                )
356            })
357            .unwrap();
358        let libp2p_port = testing_config.libp2p_port.unwrap_or_else(|| {
359            self.available_ports
360                .next()
361                .ok_or_else(|| {
362                    anyhow::anyhow!(
363                        "couldn't find available port in port range: {:?}",
364                        self.config.port_range()
365                    )
366                })
367                .unwrap()
368        });
369
370        info!(
371            system_time();
372            "Assigned ports for Rust node {}: HTTP={}, LibP2P={}",
373            node_id.index(),
374            http_port,
375            libp2p_port
376        );
377
378        let (block_producer_sec_key, block_producer_config) = testing_config
379            .block_producer
380            .map(|v| {
381                info!(
382                    system_time();
383                    "Configuring block producer for Rust node {} with public key: {}",
384                    node_id.index(),
385                    v.sec_key.public_key()
386                );
387                (v.sec_key, v.config)
388            })
389            .unzip();
390
391        let initial_peers: Vec<_> = testing_config
392            .initial_peers
393            .into_iter()
394            .map(|node| {
395                let addr = match &node {
396                    ListenerNode::Rust(id) => {
397                        info!(system_time(); "Adding Rust peer {} as initial peer", id.index());
398                        self.node(*id).unwrap().dial_addr()
399                    }
400                    ListenerNode::Ocaml(id) => {
401                        info!(system_time(); "Adding OCaml peer {} as initial peer", id.index());
402                        self.ocaml_node(*id).unwrap().dial_addr()
403                    }
404                    ListenerNode::Custom(addr) => {
405                        info!(system_time(); "Adding custom peer: {:?}", addr);
406                        addr.clone()
407                    }
408                };
409                addr
410            })
411            .collect();
412
413        if !initial_peers.is_empty() {
414            info!(
415                system_time();
416                "Rust node {} configured with {} initial peers",
417                node_id.index(),
418                initial_peers.len()
419            );
420        } else {
421            info!(system_time(); "Rust node {} configured as seed node (no initial peers)", node_id.index());
422        }
423
424        let protocol_constants = testing_config
425            .genesis
426            .protocol_constants()
427            .expect("wrong protocol constants");
428        let consensus_consts =
429            ConsensusConstants::create(constraint_constants(), &protocol_constants);
430
431        let config = Config {
432            ledger: LedgerConfig {},
433            snark: SnarkConfig {
434                // TODO(binier): use cache
435                block_verifier_index: self.block_verifier_index.clone(),
436                block_verifier_srs: self.verifier_srs.clone(),
437                work_verifier_index: self.work_verifier_index.clone(),
438                work_verifier_srs: self.verifier_srs.clone(),
439            },
440            global: GlobalConfig {
441                build: BuildEnv::get().into(),
442                snarker: testing_config.snark_worker,
443                consensus_constants: consensus_consts.clone(),
444                client_port: Some(http_port),
445                testing_run: true,
446            },
447            p2p: P2pConfig {
448                libp2p_port: Some(libp2p_port),
449                listen_port: Some(http_port),
450                identity_pub_key: p2p_sec_key.public_key(),
451                initial_peers,
452                external_addrs: vec![],
453                enabled_channels: ChannelId::iter_all().collect(),
454                peer_discovery: testing_config.peer_discovery,
455                timeouts: testing_config.timeouts,
456                limits: P2pLimits::default().with_max_peers(Some(testing_config.max_peers)),
457                meshsub: P2pMeshsubConfig {
458                    initial_time: testing_config
459                        .initial_time
460                        .checked_sub(redux::Timestamp::ZERO)
461                        .unwrap_or_default(),
462                    ..Default::default()
463                },
464            },
465            transition_frontier: TransitionFrontierConfig::new(testing_config.genesis),
466            block_producer: block_producer_config,
467            archive: None,
468            tx_pool: ledger::transaction_pool::Config {
469                trust_system: (),
470                pool_max_size: 3000,
471                slot_tx_end: None,
472            },
473        };
474
475        let mut service_builder = NodeServiceBuilder::new(rng_seed);
476        service_builder
477            .ledger_init()
478            .p2p_init_with_custom_task_spawner(
479                p2p_sec_key.clone(),
480                p2p_task_spawner::P2pTaskSpawner::new(shutdown_listener.clone()),
481            )
482            .gather_stats()
483            .record(match testing_config.recorder {
484                crate::node::Recorder::None => Recorder::None,
485                crate::node::Recorder::StateWithInputActions => {
486                    Recorder::only_input_actions(work_dir.path())
487                }
488            });
489
490        if let Some(keypair) = block_producer_sec_key {
491            info!(system_time(); "Initializing block producer for Rust node {}", node_id.index());
492            let provers = BlockProver::make(None, None);
493            service_builder.block_producer_init(keypair, Some(provers));
494        }
495
496        let real_service = service_builder
497            .build()
498            .map_err(|err| anyhow::anyhow!("node service build failed! error: {err}"))
499            .unwrap();
500
501        // spawn http-server
502        let runtime = tokio::runtime::Builder::new_current_thread()
503            .enable_all()
504            .build()
505            .unwrap();
506        let shutdown = shutdown_listener.clone();
507        let rpc_sender = real_service.rpc_sender();
508        thread::Builder::new()
509            .name("mina_http_server".to_owned())
510            .spawn(move || {
511                let local_set = tokio::task::LocalSet::new();
512                let task = async {
513                    tokio::select! {
514                        _ = shutdown.wait() => {}
515                        _ = http_server::run(http_port, rpc_sender) => {}
516                    }
517                };
518                local_set.block_on(&runtime, task);
519            })
520            .unwrap();
521
522        let invariants_state = self.invariants_state.clone();
523        let mut service =
524            NodeTestingService::new(real_service, node_id, invariants_state, shutdown_initiator);
525
526        service.set_proof_kind(self.config.proof_kind());
527        if self.config.all_rust_to_rust_use_webrtc() {
528            service.set_rust_to_rust_use_webrtc();
529        }
530        if self.config.is_replay() {
531            service.set_replay();
532        }
533
534        let state = node::State::new(config, &consensus_consts, testing_config.initial_time);
535        fn effects(store: &mut node::Store<NodeTestingService>, action: node::ActionWithMeta) {
536            // if action.action().kind().to_string().starts_with("BlockProducer") {
537            //     dbg!(action.action());
538            // }
539
540            store.service.dyn_effects(store.state.get(), &action);
541            let peer_id = store.state().p2p.my_id();
542            mina_core::log::trace!(action.time(); "{peer_id}: {:?}", action.action().kind());
543
544            for (invariant, res) in Invariants::check_all(store, &action) {
545                // TODO(binier): record instead of panicing.
546                match res {
547                    InvariantResult::Ignored(reason) => {
548                        unreachable!("No invariant should be ignored! ignore reason: {reason:?}");
549                    }
550                    InvariantResult::Violation(violation) => {
551                        panic!(
552                            "Invariant({}) violated! violation: {violation}",
553                            invariant.to_str()
554                        );
555                    }
556                    InvariantResult::Updated => {}
557                    InvariantResult::Ok => {}
558                }
559            }
560
561            node::effects(store, action)
562        }
563        let mut store = node::Store::new(
564            node::reducer,
565            effects,
566            service,
567            testing_config.initial_time.into(),
568            state,
569        );
570        // record initial state.
571        {
572            store
573                .service
574                .recorder()
575                .initial_state(rng_seed, p2p_sec_key, store.state.get());
576        }
577
578        let node = Node::new(work_dir, node_config, store);
579
580        info!(
581            system_time();
582            "Successfully created Rust node {} at ports HTTP={}, LibP2P={}",
583            node_id.index(),
584            http_port,
585            libp2p_port
586        );
587
588        self.nodes.push(node);
589        node_id
590    }
591
592    /// Add a new OCaml implementation node to the cluster.
593    ///
594    /// Creates and spawns an OCaml Mina daemon process with the specified
595    /// configuration. This method handles process spawning, port allocation,
596    /// directory setup, and daemon configuration.
597    ///
598    /// # Default Behaviors
599    ///
600    /// - **Executable selection**: Automatically detects local binary or
601    ///   falls back to default Docker image
602    /// - **Port allocation**: LibP2P, GraphQL, and client ports automatically
603    ///   assigned from available range
604    /// - **Keypair rotation**: Uses predefined LibP2P keypairs, rotating
605    ///   through the set for each new node
606    /// - **Process management**: Spawns daemon with proper environment
607    ///   variables and argument configuration
608    /// - **Logging**: Stdout/stderr forwarded with port-based prefixes
609    /// - **Docker support**: Automatic container management when using Docker
610    ///
611    /// # Configuration Options
612    ///
613    /// - `initial_peers`: List of peer connection targets
614    /// - `daemon_json`: Genesis configuration (file path or in-memory JSON)
615    /// - `block_producer`: Optional block production key
616    ///
617    /// # Docker vs Local Execution
618    ///
619    /// The method automatically determines execution mode:
620    /// 1. Attempts to use locally installed `mina` binary
621    /// 2. Falls back to Docker with default image if binary not found
622    /// 3. Custom Docker images supported via configuration
623    ///
624    /// # Returns
625    ///
626    /// Returns a [`ClusterOcamlNodeId`] for referencing this OCaml node
627    /// in scenarios and peer connections.
628    ///
629    /// # Panics
630    ///
631    /// Panics if:
632    /// - No available ports in the configured range
633    /// - Temporary directory creation fails
634    /// - OCaml daemon process spawn fails
635    pub fn add_ocaml_node(&mut self, testing_config: OcamlNodeTestingConfig) -> ClusterOcamlNodeId {
636        let node_i = self.ocaml_nodes.len();
637
638        info!(
639            system_time();
640            "Adding OCaml node {} with {} initial peers, block_producer={}",
641            node_i,
642            testing_config.initial_peers.len(),
643            testing_config.block_producer.is_some()
644        );
645
646        let executable = self.config.ocaml_node_executable();
647        let mut next_port = || {
648            self.available_ports.next().ok_or_else(|| {
649                anyhow::anyhow!(
650                    "couldn't find available port in port range: {:?}",
651                    self.config.port_range()
652                )
653            })
654        };
655
656        let temp_dir = temp_dir::TempDir::new().expect("failed to create tempdir");
657        let libp2p_port = next_port().unwrap();
658        let graphql_port = next_port().unwrap();
659        let client_port = next_port().unwrap();
660
661        info!(
662            system_time();
663            "Assigned ports for OCaml node {}: LibP2P={}, GraphQL={}, Client={}",
664            node_i,
665            libp2p_port,
666            graphql_port,
667            client_port
668        );
669
670        let node = OcamlNode::start(OcamlNodeConfig {
671            executable,
672            dir: temp_dir,
673            libp2p_keypair_i: self.ocaml_libp2p_keypair_i,
674            libp2p_port,
675            graphql_port,
676            client_port,
677            initial_peers: testing_config.initial_peers,
678            daemon_json: testing_config.daemon_json,
679            block_producer: testing_config.block_producer,
680        })
681        .expect("failed to start ocaml node");
682
683        info!(
684            system_time();
685            "Successfully started OCaml node {} with keypair index {}",
686            node_i,
687            self.ocaml_libp2p_keypair_i
688        );
689
690        self.ocaml_libp2p_keypair_i += 1;
691
692        self.ocaml_nodes.push(Some(node));
693        ClusterOcamlNodeId::new_unchecked(node_i)
694    }
695
696    pub async fn start(&mut self, scenario: Scenario) -> Result<(), anyhow::Error> {
697        let mut parent_id = scenario.info.parent_id.clone();
698        self.scenario.chain.push_back(scenario);
699
700        while let Some(ref id) = parent_id {
701            let scenario = Scenario::load(id).await?;
702            parent_id.clone_from(&scenario.info.parent_id);
703            self.scenario.chain.push_back(scenario);
704        }
705
706        let scenario = self.scenario.cur_scenario();
707
708        for config in scenario.info.nodes.clone() {
709            match config {
710                NodeTestingConfig::Rust(config) => {
711                    self.add_rust_node(config.clone());
712                }
713                NodeTestingConfig::Ocaml(config) => {
714                    self.add_ocaml_node(config.clone());
715                }
716            }
717        }
718
719        Ok(())
720    }
721
722    pub async fn reload_scenarios(&mut self) -> Result<(), anyhow::Error> {
723        for scenario in &mut self.scenario.chain {
724            scenario.reload().await?;
725        }
726        Ok(())
727    }
728
729    pub fn next_scenario_and_step(&self) -> Option<(&ScenarioId, usize)> {
730        self.scenario
731            .peek_i()
732            .map(|(scenario_i, step_i)| (&self.scenario.chain[scenario_i].info.id, step_i))
733    }
734
735    pub fn target_scenario(&self) -> Option<&ScenarioId> {
736        self.scenario.target_scenario().map(|v| &v.info.id)
737    }
738
739    pub fn nodes_iter(&self) -> impl Iterator<Item = (ClusterNodeId, &Node)> {
740        self.nodes
741            .iter()
742            .enumerate()
743            .map(|(i, node)| (ClusterNodeId::new_unchecked(i), node))
744    }
745
746    pub fn ocaml_nodes_iter(&self) -> impl Iterator<Item = (ClusterOcamlNodeId, &OcamlNode)> {
747        self.ocaml_nodes
748            .iter()
749            .enumerate()
750            .filter_map(|(i, node)| node.as_ref().map(|node| (i, node)))
751            .map(|(i, node)| (ClusterOcamlNodeId::new_unchecked(i), node))
752    }
753
754    pub fn node(&self, node_id: ClusterNodeId) -> Option<&Node> {
755        self.nodes.get(node_id.index())
756    }
757
758    pub fn node_by_peer_id(&self, peer_id: PeerId) -> Option<&Node> {
759        self.nodes_iter()
760            .find(|(_, node)| node.peer_id() == peer_id)
761            .map(|(_, node)| node)
762    }
763
764    pub fn node_mut(&mut self, node_id: ClusterNodeId) -> Option<&mut Node> {
765        self.nodes.get_mut(node_id.index())
766    }
767
768    pub fn ocaml_node(&self, node_id: ClusterOcamlNodeId) -> Option<&OcamlNode> {
769        self.ocaml_nodes
770            .get(node_id.index())
771            .map(|opt| opt.as_ref().expect("tried to access removed ocaml node"))
772    }
773
774    pub fn ocaml_node_by_peer_id(&self, peer_id: PeerId) -> Option<&OcamlNode> {
775        self.ocaml_nodes_iter()
776            .find(|(_, node)| node.peer_id() == peer_id)
777            .map(|(_, node)| node)
778    }
779
780    pub fn pending_events(
781        &mut self,
782        poll: bool,
783    ) -> impl Iterator<
784        Item = (
785            ClusterNodeId,
786            &State,
787            impl Iterator<Item = (PendingEventId, &Event)>,
788        ),
789    > {
790        self.nodes.iter_mut().enumerate().map(move |(i, node)| {
791            let node_id = ClusterNodeId::new_unchecked(i);
792            let (state, pending_events) = node.pending_events_with_state(poll);
793            (node_id, state, pending_events)
794        })
795    }
796
797    pub fn node_pending_events(
798        &mut self,
799        node_id: ClusterNodeId,
800        poll: bool,
801    ) -> Result<(&State, impl Iterator<Item = (PendingEventId, &Event)>), anyhow::Error> {
802        let node = self
803            .nodes
804            .get_mut(node_id.index())
805            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
806        Ok(node.pending_events_with_state(poll))
807    }
808
809    pub async fn wait_for_pending_events(&mut self) {
810        let mut nodes = &mut self.nodes[..];
811        let mut futures = FuturesUnordered::new();
812
813        while let Some((node, nodes_rest)) = nodes.split_first_mut() {
814            nodes = nodes_rest;
815            futures.push(async { node.wait_for_next_pending_event().await.is_some() });
816        }
817
818        while let Some(has_event) = futures.next().await {
819            if has_event {
820                break;
821            }
822        }
823    }
824
825    pub async fn wait_for_pending_events_with_timeout(&mut self, timeout: Duration) -> bool {
826        let timeout = tokio::time::sleep(timeout);
827
828        tokio::select! {
829            _ = self.wait_for_pending_events() => true,
830            _ = timeout => false,
831        }
832    }
833
834    pub async fn wait_for_pending_event(
835        &mut self,
836        node_id: ClusterNodeId,
837        event_pattern: &str,
838    ) -> anyhow::Result<PendingEventId> {
839        let node = self
840            .nodes
841            .get_mut(node_id.index())
842            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
843        let timeout = tokio::time::sleep(Duration::from_secs(300));
844        tokio::select! {
845            opt = node.wait_for_event(event_pattern) => opt.ok_or_else(|| anyhow::anyhow!("wait_for_event: None")),
846            _ = timeout => {
847                let pending_events = node.pending_events(false).map(|(_, event)| event.to_string()).collect::<Vec<_>>();
848                 Err(anyhow::anyhow!("waiting for event timed out! node {node_id:?}, event: \"{event_pattern}\"\n{pending_events:?}"))
849            }
850        }
851    }
852
853    pub async fn wait_for_event_and_dispatch(
854        &mut self,
855        node_id: ClusterNodeId,
856        event_pattern: &str,
857    ) -> anyhow::Result<bool> {
858        let event_id = self.wait_for_pending_event(node_id, event_pattern).await?;
859        let node = self.nodes.get_mut(node_id.index()).unwrap();
860        Ok(node.take_event_and_dispatch(event_id))
861    }
862
863    pub async fn add_steps_and_save(&mut self, steps: impl IntoIterator<Item = ScenarioStep>) {
864        let scenario = self.scenario.chain.back_mut().unwrap();
865        steps
866            .into_iter()
867            .for_each(|step| scenario.add_step(step).unwrap());
868        scenario.save().await.unwrap();
869    }
870
871    pub async fn exec_to_end(&mut self) -> Result<(), anyhow::Error> {
872        let mut i = 0;
873        let total = self.scenario.cur_scenario().steps.len();
874        loop {
875            info!(system_time(); "Executing step {}/{}", i + 1, total);
876            if !self.exec_next().await? {
877                break Ok(());
878            }
879            i += 1;
880        }
881    }
882
883    pub async fn exec_until(
884        &mut self,
885        target_scenario: ScenarioId,
886        step_i: Option<usize>,
887    ) -> Result<(), anyhow::Error> {
888        if self
889            .scenario
890            .finished
891            .iter()
892            .any(|v| v.info.id == target_scenario)
893        {
894            return Err(anyhow::anyhow!(
895                "cluster already finished '{target_scenario}' scenario"
896            ));
897        }
898
899        while self
900            .scenario
901            .peek()
902            .is_some_and(|(scenario, _)| scenario.info.id != target_scenario)
903        {
904            if !self.exec_next().await? {
905                break;
906            }
907        }
908
909        while self
910            .scenario
911            .peek()
912            .is_some_and(|(scenario, _)| scenario.info.id == target_scenario)
913        {
914            if let Some(step_i) = step_i {
915                if self.scenario.peek_i().unwrap().1 >= step_i {
916                    break;
917                }
918            }
919            if !self.exec_next().await? {
920                break;
921            }
922        }
923
924        Ok(())
925    }
926
927    pub async fn exec_next(&mut self) -> Result<bool, anyhow::Error> {
928        let (_scenario, step) = match self.scenario.peek() {
929            Some(v) => v,
930            None => return Ok(false),
931        };
932        let dispatched = self.exec_step(step.clone()).await?;
933
934        if dispatched {
935            self.scenario.advance();
936        }
937
938        Ok(dispatched)
939    }
940
941    pub async fn exec_step(&mut self, step: ScenarioStep) -> anyhow::Result<bool> {
942        Ok(match step {
943            ScenarioStep::Event { node_id, event } => {
944                return self.wait_for_event_and_dispatch(node_id, &event).await;
945            }
946            ScenarioStep::ManualEvent { node_id, event } => self
947                .nodes
948                .get_mut(node_id.index())
949                .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?
950                .dispatch_event(*event),
951            ScenarioStep::NonDeterministicEvent { node_id, event } => {
952                let event = match *event {
953                    NonDeterministicEvent::P2pConnectionClosed(peer_id) => {
954                        let node = self
955                            .nodes
956                            .get_mut(node_id.index())
957                            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
958                        node.p2p_disconnect(peer_id);
959                        let event =
960                            Event::P2p(P2pEvent::Connection(P2pConnectionEvent::Closed(peer_id)));
961                        return self
962                            .wait_for_event_and_dispatch(node_id, &event.to_string())
963                            .await;
964                    }
965                    NonDeterministicEvent::P2pConnectionFinalized(peer_id, res) => {
966                        let node = self
967                            .nodes
968                            .get(node_id.index())
969                            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
970                        let res_is_ok = res.is_ok();
971                        let event = Event::P2p(P2pEvent::Connection(
972                            P2pConnectionEvent::Finalized(peer_id, res),
973                        ));
974
975                        if res_is_ok {
976                            let is_peer_connected =
977                                node.state().p2p.get_ready_peer(&peer_id).is_some();
978                            if is_peer_connected {
979                                // we are already connected, so skip the extra event.
980                                return Ok(true);
981                            }
982                            eprintln!("non_deterministic_wait_for_event_and_dispatch({node_id:?}): {event}");
983                            return self
984                                .wait_for_event_and_dispatch(node_id, &event.to_string())
985                                .await;
986                        } else {
987                            event
988                        }
989                    }
990                    NonDeterministicEvent::RpcReadonly(id, req) => Event::Rpc(id, req),
991                };
992                eprintln!("non_deterministic_event_dispatch({node_id:?}): {event}");
993                self.nodes
994                    .get_mut(node_id.index())
995                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?
996                    .dispatch_event(event)
997            }
998            ScenarioStep::AddNode { config } => match *config {
999                NodeTestingConfig::Rust(config) => {
1000                    self.add_rust_node(config);
1001                    // TODO(binier): wait for node ports to be opened instead.
1002                    tokio::time::sleep(Duration::from_secs(2)).await;
1003                    true
1004                }
1005                NodeTestingConfig::Ocaml(config) => {
1006                    // before starting ocaml node, read and save secret
1007                    // keys from daemon.json.
1008                    let mut json_owned = None;
1009                    let json = match &config.daemon_json {
1010                        DaemonJson::Custom(path) => {
1011                            let bytes = tokio::fs::read(path).await.map_err(|err| {
1012                                anyhow::anyhow!(
1013                                    "error reading daemon.json from path({path}): {err}"
1014                                )
1015                            })?;
1016                            let json = serde_json::from_slice(&bytes).map_err(|err| {
1017                                anyhow::anyhow!(
1018                                    "failed to parse damon.json from path({path}): {err}"
1019                                )
1020                            })?;
1021                            json_owned.insert(json)
1022                        }
1023                        DaemonJson::InMem(json) => json,
1024                    };
1025                    let accounts = json["ledger"]["accounts"].as_array().ok_or_else(|| {
1026                        anyhow::anyhow!("daemon.json `.ledger.accounts` is not array")
1027                    })?;
1028
1029                    accounts
1030                        .iter()
1031                        .filter_map(|account| account["sk"].as_str())
1032                        .filter_map(|sk| sk.parse().ok())
1033                        .for_each(|sk| self.add_account_sec_key(sk));
1034
1035                    self.add_ocaml_node(config);
1036                    true
1037                }
1038            },
1039            ScenarioStep::ConnectNodes { dialer, listener } => {
1040                let listener_addr = match listener {
1041                    ListenerNode::Rust(listener) => {
1042                        let listener = self
1043                            .nodes
1044                            .get(listener.index())
1045                            .ok_or_else(|| anyhow::anyhow!("node {listener:?} not found"))?;
1046
1047                        listener.dial_addr()
1048                    }
1049                    ListenerNode::Ocaml(listener) => {
1050                        let listener = self
1051                            .ocaml_nodes
1052                            .get(listener.index())
1053                            .ok_or_else(|| anyhow::anyhow!("ocaml node {listener:?} not found"))?
1054                            .as_ref()
1055                            .ok_or_else(|| {
1056                                anyhow::anyhow!("tried to access removed ocaml node {listener:?}")
1057                            })?;
1058
1059                        listener.dial_addr()
1060                    }
1061                    ListenerNode::Custom(addr) => addr.clone(),
1062                };
1063
1064                self.rpc_counter += 1;
1065                let rpc_id = RpcId::new_unchecked(usize::MAX, self.rpc_counter);
1066                let dialer = self
1067                    .nodes
1068                    .get_mut(dialer.index())
1069                    .ok_or_else(|| anyhow::anyhow!("node {dialer:?} not found"))?;
1070
1071                let req = node::rpc::RpcRequest::P2pConnectionOutgoing(listener_addr);
1072                dialer.dispatch_event(Event::Rpc(rpc_id, Box::new(req)))
1073            }
1074            ScenarioStep::CheckTimeouts { node_id } => {
1075                let node = self
1076                    .nodes
1077                    .get_mut(node_id.index())
1078                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
1079                node.check_timeouts();
1080                true
1081            }
1082            ScenarioStep::AdvanceTime { by_nanos } => {
1083                for node in &mut self.nodes {
1084                    node.advance_time(by_nanos)
1085                }
1086                true
1087            }
1088            ScenarioStep::AdvanceNodeTime { node_id, by_nanos } => {
1089                let node = self
1090                    .nodes
1091                    .get_mut(node_id.index())
1092                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
1093                node.advance_time(by_nanos);
1094                true
1095            }
1096            ScenarioStep::Ocaml { node_id, step } => {
1097                let node = self.ocaml_nodes.get_mut(node_id.index());
1098                let node =
1099                    node.ok_or_else(|| anyhow::anyhow!("ocaml node {node_id:?} not found"))?;
1100                if matches!(step, OcamlStep::KillAndRemove) {
1101                    let mut node = node.take().ok_or_else(|| {
1102                        anyhow::anyhow!("tried to access removed ocaml node {node_id:?}")
1103                    })?;
1104                    node.exec(step).await?
1105                } else {
1106                    let node = node.as_mut().ok_or_else(|| {
1107                        anyhow::anyhow!("tried to access removed ocaml node {node_id:?}")
1108                    })?;
1109                    node.exec(step).await?
1110                }
1111            }
1112        })
1113    }
1114
1115    pub fn debugger(&self) -> Option<&Debugger> {
1116        self.debugger.as_ref()
1117    }
1118}
1119
1120impl ClusterScenarioRun {
1121    pub fn target_scenario(&self) -> Option<&Scenario> {
1122        self.chain.back().or_else(|| self.finished.last())
1123    }
1124
1125    pub fn cur_scenario(&self) -> &Scenario {
1126        self.chain.front().unwrap()
1127    }
1128
1129    pub fn peek_i(&self) -> Option<(usize, usize)> {
1130        self.chain
1131            .iter()
1132            .enumerate()
1133            .filter_map(|(i, scenario)| {
1134                let step_i = if i == 0 { self.cur_step } else { 0 };
1135                scenario.steps.get(step_i)?;
1136                Some((i, step_i))
1137            })
1138            .nth(0)
1139    }
1140
1141    pub fn peek(&self) -> Option<(&Scenario, &ScenarioStep)> {
1142        self.peek_i().map(|(scenario_i, step_i)| {
1143            let scenario = &self.chain[scenario_i];
1144            let step = &scenario.steps[step_i];
1145            (scenario, step)
1146        })
1147    }
1148
1149    fn advance(&mut self) {
1150        if let Some((scenario_i, step_i)) = self.peek_i() {
1151            self.finished.extend(self.chain.drain(..scenario_i));
1152            if self.cur_step == step_i {
1153                self.cur_step += 1;
1154            } else {
1155                self.cur_step = step_i;
1156            }
1157        }
1158    }
1159}