mina_node_testing/cluster/
mod.rs

1//! Cluster Management for Multi-Node Testing
2//!
3//! This module provides the core infrastructure for managing clusters of
4//! Mina nodes during testing scenarios. It supports both Rust and OCaml
5//! node implementations, enabling cross-implementation testing and complex
6//! multi-node scenarios.
7//!
8//! # Key Components
9//!
10//! - [`Cluster`] - Main cluster coordinator managing node lifecycle
11//! - Node addition methods for different node types
12//! - Port allocation and resource management
13//! - Scenario execution and state tracking
14//! - Network debugger integration
15//!
16//! # Node Addition Methods
17//!
18//! - [`Cluster::add_rust_node`] - Add Rust implementation nodes
19//! - [`Cluster::add_ocaml_node`] - Add OCaml implementation nodes
20//!
21//! # Example
22//!
23//! ```rust,no_run
24//! let mut cluster = Cluster::new(ClusterConfig::default());
25//!
26//! // Add Rust node with custom configuration
27//! let rust_node = cluster.add_rust_node(RustNodeTestingConfig::default());
28//!
29//! // Add OCaml node for cross-implementation testing
30//! let ocaml_node = cluster.add_ocaml_node(OcamlNodeTestingConfig::default());
31//! ```
32
33mod config;
34pub use config::{ClusterConfig, ProofKind};
35
36mod p2p_task_spawner;
37
38mod node_id;
39use mina_core::channels::Aborter;
40pub use node_id::{ClusterNodeId, ClusterOcamlNodeId};
41
42pub mod runner;
43
44use std::{
45    collections::{BTreeMap, VecDeque},
46    io::Read,
47    path::{Path, PathBuf},
48    sync::{Arc, Mutex as StdMutex},
49    time::Duration,
50};
51
52use libp2p::futures::{stream::FuturesUnordered, StreamExt};
53
54use ledger::proofs::provers::BlockProver;
55use mina_node::{
56    account::{AccountPublicKey, AccountSecretKey},
57    core::{
58        consensus::ConsensusConstants,
59        constants::constraint_constants,
60        invariants::InvariantsState,
61        log::{info, system_time, warn},
62        requests::RpcId,
63        thread,
64    },
65    event_source::Event,
66    p2p::{
67        channels::ChannelId, identity::SecretKey as P2pSecretKey, P2pConnectionEvent, P2pEvent,
68        P2pLimits, P2pMeshsubConfig, PeerId,
69    },
70    service::{Recorder, Service},
71    snark::{get_srs, BlockVerifier, TransactionVerifier, VerifierSRS},
72    BuildEnv, Config, GlobalConfig, LedgerConfig, P2pConfig, SnarkConfig, State,
73    TransitionFrontierConfig,
74};
75use mina_node_invariants::{InvariantResult, Invariants};
76use mina_node_native::{http_server, NodeServiceBuilder};
77use serde::{de::DeserializeOwned, Serialize};
78use temp_dir::TempDir;
79
80use crate::{
81    network_debugger::Debugger,
82    node::{
83        DaemonJson, Node, NodeTestingConfig, NonDeterministicEvent, OcamlNode, OcamlNodeConfig,
84        OcamlNodeTestingConfig, OcamlStep, RustNodeTestingConfig, TestPeerId,
85    },
86    scenario::{ListenerNode, Scenario, ScenarioId, ScenarioStep},
87    service::{NodeTestingService, PendingEventId},
88};
89
90#[allow(dead_code)]
91fn mina_path<P: AsRef<Path>>(path: P) -> Option<PathBuf> {
92    std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".cache/mina").join(path))
93}
94
95#[allow(dead_code)]
96fn read_index<T: DeserializeOwned>(name: &str) -> Option<T> {
97    mina_path(name)
98        .and_then(|path| {
99            if !path.exists() {
100                return None;
101            }
102            match std::fs::File::open(path) {
103                Ok(v) => Some(v),
104                Err(e) => {
105                    warn!(system_time(); "cannot find verifier index for {name}: {e}");
106                    None
107                }
108            }
109        })
110        .and_then(|mut file| {
111            let mut buf = Vec::new();
112            file.read_to_end(&mut buf).ok().and(Some(buf))
113        })
114        .and_then(|bytes| match postcard::from_bytes(&bytes) {
115            Ok(v) => Some(v),
116            Err(e) => {
117                warn!(system_time(); "cannot read verifier index for {name}: {e}");
118                None
119            }
120        })
121}
122
123#[allow(dead_code)]
124fn write_index<T: Serialize>(name: &str, index: &T) -> Option<()> {
125    mina_path(name)
126        .and_then(|path| {
127            let Some(parent) = path.parent() else {
128                warn!(system_time(); "cannot get parent for {path:?}");
129                return None;
130            };
131            if let Err(e) = std::fs::create_dir_all(parent) {
132                warn!(system_time(); "cannot create parent dir for {parent:?}: {e}");
133                return None;
134            }
135            match std::fs::File::create(&path) {
136                Ok(v) => Some(v),
137                Err(e) => {
138                    warn!(system_time(); "cannot create file {path:?}: {e}");
139                    None
140                }
141            }
142        })
143        .and_then(|file| match postcard::to_io(index, file) {
144            Ok(_) => Some(()),
145            Err(e) => {
146                warn!(system_time(); "cannot write verifier index for {name}: {e}");
147                None
148            }
149        })
150}
151
152lazy_static::lazy_static! {
153    static ref VERIFIER_SRS: Arc<VerifierSRS> = get_srs();
154}
155
156/// Manages a cluster of Mina nodes for testing scenarios.
157///
158/// The `Cluster` struct coordinates multiple node instances, handling
159/// resource allocation, configuration, and lifecycle management. It supports
160/// both Rust and OCaml node implementations for comprehensive testing.
161///
162/// # Default Behaviors
163///
164/// - **Port allocation**: Automatically assigns available ports from the
165///   configured range, testing availability before assignment
166/// - **Keypair management**: Uses deterministic keypairs for Rust nodes and
167///   rotates through predefined keypairs for OCaml nodes
168/// - **Resource isolation**: Each node gets isolated temporary directories
169/// - **Verifier indices**: Shared verifier SRS and indices across all nodes
170/// - **Network debugging**: Optional debugger integration for CI environments
171///
172/// # Node Addition
173///
174/// The cluster provides specialized methods for adding different node types:
175/// - Rust nodes via [`add_rust_node`](Self::add_rust_node)
176/// - OCaml nodes via [`add_ocaml_node`](Self::add_ocaml_node)
177pub struct Cluster {
178    /// Cluster-wide configuration settings
179    pub config: ClusterConfig,
180    /// Current scenario execution state
181    scenario: ClusterScenarioRun,
182    /// Iterator over available ports for node allocation
183    available_ports: Box<dyn Iterator<Item = u16> + Send>,
184    /// Registry of account secret keys for deterministic testing
185    account_sec_keys: BTreeMap<AccountPublicKey, AccountSecretKey>,
186    /// Collection of active Rust nodes
187    nodes: Vec<Node>,
188    /// Collection of active OCaml nodes (Option for lifecycle management)
189    ocaml_nodes: Vec<Option<OcamlNode>>,
190    /// Genesis timestamp for deterministic time progression
191    initial_time: Option<redux::Timestamp>,
192
193    /// Counter for generating unique RPC request IDs
194    rpc_counter: usize,
195    /// Index for rotating OCaml LibP2P keypairs
196    ocaml_libp2p_keypair_i: usize,
197
198    /// Shared verifier SRS for proof verification
199    verifier_srs: Arc<VerifierSRS>,
200    /// Block verifier index for consensus validation
201    block_verifier_index: BlockVerifier,
202    /// Transaction verifier index for transaction validation
203    work_verifier_index: TransactionVerifier,
204
205    /// Optional network traffic debugger
206    debugger: Option<Debugger>,
207    /// Shared state for invariant checking across nodes
208    invariants_state: Arc<StdMutex<InvariantsState>>,
209}
210
211/// Tracks the execution state of scenario chains within a cluster.
212///
213/// Manages the progression through scenario steps and maintains history
214/// of completed scenarios for debugging and analysis.
215#[derive(Serialize)]
216pub struct ClusterScenarioRun {
217    /// Queue of scenarios to be executed (supports scenario inheritance)
218    chain: VecDeque<Scenario>,
219    /// History of completed scenarios
220    finished: Vec<Scenario>,
221    /// Current step index within the active scenario
222    cur_step: usize,
223}
224
225impl Cluster {
226    pub fn new(config: ClusterConfig) -> Self {
227        let available_ports = config
228            .port_range()
229            .filter(|port| std::net::TcpListener::bind(("0.0.0.0", *port)).is_ok());
230        let debugger = if config.is_use_debugger() {
231            Some(Debugger::drone_ci())
232        } else {
233            None
234        };
235        Self {
236            config,
237            scenario: ClusterScenarioRun {
238                chain: Default::default(),
239                finished: Default::default(),
240                cur_step: 0,
241            },
242            available_ports: Box::new(available_ports),
243            account_sec_keys: Default::default(),
244            nodes: Vec::new(),
245            ocaml_nodes: Vec::new(),
246            initial_time: None,
247
248            rpc_counter: 0,
249            ocaml_libp2p_keypair_i: 0,
250
251            verifier_srs: VERIFIER_SRS.clone(),
252            block_verifier_index: BlockVerifier::make(),
253            work_verifier_index: TransactionVerifier::make(),
254
255            debugger,
256            invariants_state: Arc::new(StdMutex::new(Default::default())),
257        }
258    }
259
260    pub fn available_port(&mut self) -> Option<u16> {
261        self.available_ports.next()
262    }
263
264    pub fn add_account_sec_key(&mut self, sec_key: AccountSecretKey) {
265        self.account_sec_keys.insert(sec_key.public_key(), sec_key);
266    }
267
268    pub fn get_account_sec_key(&self, pub_key: &AccountPublicKey) -> Option<&AccountSecretKey> {
269        self.account_sec_keys.get(pub_key).or_else(|| {
270            AccountSecretKey::deterministic_iter().find(|sec_key| &sec_key.public_key() == pub_key)
271        })
272    }
273
274    pub fn set_initial_time(&mut self, initial_time: redux::Timestamp) {
275        self.initial_time = Some(initial_time)
276    }
277
278    pub fn get_initial_time(&self) -> Option<redux::Timestamp> {
279        self.initial_time
280    }
281
282    /// Add a new Rust implementation node to the cluster.
283    ///
284    /// Creates and configures a Rust Mina node with the specified testing
285    /// configuration. This method handles all aspects of node initialization
286    /// including port allocation, key generation, service setup, and state
287    /// initialization.
288    ///
289    /// # Default Behaviors
290    ///
291    /// - **Port allocation**: HTTP and LibP2P ports automatically assigned
292    ///   from available port range
293    /// - **Peer identity**: Deterministic LibP2P keypair based on node index
294    /// - **Work directory**: Isolated temporary directory per node
295    /// - **Invariants**: Automatic invariant checking enabled
296    /// - **HTTP server**: Spawned on separate thread for API access
297    /// - **Proof verification**: Shared verifier indices and SRS
298    ///
299    /// # Configuration Options
300    ///
301    /// - `peer_id`: Deterministic or custom LibP2P identity
302    /// - `libp2p_port`: Custom P2P port (auto-assigned if None)
303    /// - `initial_peers`: Peer connection targets (supports node references)
304    /// - `block_producer`: Optional block production configuration
305    /// - `genesis`: Genesis ledger and protocol constants
306    /// - `snark_worker`: SNARK work generation settings
307    ///
308    /// # Returns
309    ///
310    /// Returns a [`ClusterNodeId`] that can be used to reference this node
311    /// in scenarios and for inter-node connections.
312    ///
313    /// # Panics
314    ///
315    /// Panics if:
316    /// - No available ports in the configured range
317    /// - Node service initialization fails
318    /// - Invalid genesis configuration
319    pub fn add_rust_node(&mut self, testing_config: RustNodeTestingConfig) -> ClusterNodeId {
320        let rng_seed = [0; 32];
321        let node_config = testing_config.clone();
322        let node_id = ClusterNodeId::new_unchecked(self.nodes.len());
323
324        info!(
325            system_time();
326            "Adding Rust node {} with config: max_peers={}, snark_worker={:?}, \
327             block_producer={}",
328            node_id.index(),
329            testing_config.max_peers,
330            testing_config.snark_worker,
331            testing_config.block_producer.is_some()
332        );
333
334        let work_dir = TempDir::new().unwrap();
335        let shutdown_initiator = Aborter::default();
336        let shutdown_listener = shutdown_initiator.aborted();
337        let p2p_sec_key = match testing_config.peer_id {
338            TestPeerId::Derived => {
339                info!(system_time(); "Using deterministic peer ID for node {}", node_id.index());
340                P2pSecretKey::deterministic(node_id.index())
341            }
342            TestPeerId::Bytes(bytes) => {
343                info!(system_time(); "Using custom peer ID for node {}", node_id.index());
344                P2pSecretKey::from_bytes(bytes)
345            }
346        };
347
348        let http_port = self
349            .available_ports
350            .next()
351            .ok_or_else(|| {
352                anyhow::anyhow!(
353                    "couldn't find available port in port range: {:?}",
354                    self.config.port_range()
355                )
356            })
357            .unwrap();
358        let libp2p_port = testing_config.libp2p_port.unwrap_or_else(|| {
359            self.available_ports
360                .next()
361                .ok_or_else(|| {
362                    anyhow::anyhow!(
363                        "couldn't find available port in port range: {:?}",
364                        self.config.port_range()
365                    )
366                })
367                .unwrap()
368        });
369
370        info!(
371            system_time();
372            "Assigned ports for Rust node {}: HTTP={}, LibP2P={}",
373            node_id.index(),
374            http_port,
375            libp2p_port
376        );
377
378        let (block_producer_sec_key, block_producer_config) = testing_config
379            .block_producer
380            .map(|v| {
381                info!(
382                    system_time();
383                    "Configuring block producer for Rust node {} with public key: {}",
384                    node_id.index(),
385                    v.sec_key.public_key()
386                );
387                (v.sec_key, v.config)
388            })
389            .unzip();
390
391        let initial_peers: Vec<_> = testing_config
392            .initial_peers
393            .into_iter()
394            .map(|node| {
395                let addr = match &node {
396                    ListenerNode::Rust(id) => {
397                        info!(system_time(); "Adding Rust peer {} as initial peer", id.index());
398                        self.node(*id).unwrap().dial_addr()
399                    }
400                    ListenerNode::Ocaml(id) => {
401                        info!(system_time(); "Adding OCaml peer {} as initial peer", id.index());
402                        self.ocaml_node(*id).unwrap().dial_addr()
403                    }
404                    ListenerNode::Custom(addr) => {
405                        info!(system_time(); "Adding custom peer: {:?}", addr);
406                        addr.clone()
407                    }
408                };
409                addr
410            })
411            .collect();
412
413        if !initial_peers.is_empty() {
414            info!(
415                system_time();
416                "Rust node {} configured with {} initial peers",
417                node_id.index(),
418                initial_peers.len()
419            );
420        } else {
421            info!(system_time(); "Rust node {} configured as seed node (no initial peers)", node_id.index());
422        }
423
424        let protocol_constants = testing_config
425            .genesis
426            .protocol_constants()
427            .expect("wrong protocol constants");
428        let consensus_consts =
429            ConsensusConstants::create(constraint_constants(), &protocol_constants);
430
431        let config = Config {
432            ledger: LedgerConfig {},
433            snark: SnarkConfig {
434                // TODO(binier): use cache
435                block_verifier_index: self.block_verifier_index.clone(),
436                block_verifier_srs: self.verifier_srs.clone(),
437                work_verifier_index: self.work_verifier_index.clone(),
438                work_verifier_srs: self.verifier_srs.clone(),
439            },
440            global: GlobalConfig {
441                build: BuildEnv::get().into(),
442                snarker: testing_config.snark_worker,
443                consensus_constants: consensus_consts.clone(),
444                client_port: Some(http_port),
445                testing_run: true,
446            },
447            p2p: P2pConfig {
448                libp2p_port: Some(libp2p_port),
449                listen_port: Some(http_port),
450                identity_pub_key: p2p_sec_key.public_key(),
451                initial_peers,
452                external_addrs: vec![],
453                enabled_channels: ChannelId::iter_all().collect(),
454                peer_discovery: testing_config.peer_discovery,
455                timeouts: testing_config.timeouts,
456                limits: P2pLimits::default().with_max_peers(Some(testing_config.max_peers)),
457                meshsub: P2pMeshsubConfig {
458                    initial_time: testing_config
459                        .initial_time
460                        .checked_sub(redux::Timestamp::ZERO)
461                        .unwrap_or_default(),
462                    ..Default::default()
463                },
464            },
465            transition_frontier: TransitionFrontierConfig::new(testing_config.genesis),
466            block_producer: block_producer_config,
467            archive: None,
468            tx_pool: ledger::transaction_pool::Config {
469                trust_system: (),
470                pool_max_size: 3000,
471                slot_tx_end: None,
472            },
473        };
474
475        let mut service_builder = NodeServiceBuilder::new(rng_seed);
476        service_builder
477            .ledger_init()
478            .p2p_init_with_custom_task_spawner(
479                p2p_sec_key.clone(),
480                p2p_task_spawner::P2pTaskSpawner::new(shutdown_listener.clone()),
481            )
482            .gather_stats()
483            .record(match testing_config.recorder {
484                crate::node::Recorder::None => Recorder::None,
485                crate::node::Recorder::StateWithInputActions => {
486                    Recorder::only_input_actions(work_dir.path())
487                }
488            });
489
490        if let Some(keypair) = block_producer_sec_key {
491            info!(system_time(); "Initializing block producer for Rust node {}", node_id.index());
492            let provers = BlockProver::make(None, None);
493            service_builder.block_producer_init(keypair, Some(provers));
494        }
495
496        let real_service = service_builder
497            .build()
498            .map_err(|err| anyhow::anyhow!("node service build failed! error: {err}"))
499            .unwrap();
500
501        // spawn http-server
502        let runtime = tokio::runtime::Builder::new_current_thread()
503            .enable_all()
504            .build()
505            .unwrap();
506        let shutdown = shutdown_listener.clone();
507        let rpc_sender = real_service.rpc_sender();
508        thread::Builder::new()
509            .name("mina_http_server".to_owned())
510            .spawn(move || {
511                let local_set = tokio::task::LocalSet::new();
512                let task = async {
513                    tokio::select! {
514                        _ = shutdown.wait() => {}
515                        _ = http_server::run(http_port, rpc_sender) => {}
516                    }
517                };
518                local_set.block_on(&runtime, task);
519            })
520            .unwrap();
521
522        let invariants_state = self.invariants_state.clone();
523        let mut service =
524            NodeTestingService::new(real_service, node_id, invariants_state, shutdown_initiator);
525
526        service.set_proof_kind(self.config.proof_kind());
527        if self.config.all_rust_to_rust_use_webrtc() {
528            service.set_rust_to_rust_use_webrtc();
529        }
530        if self.config.is_replay() {
531            service.set_replay();
532        }
533
534        let state = mina_node::State::new(config, &consensus_consts, testing_config.initial_time);
535        fn effects(
536            store: &mut mina_node::Store<NodeTestingService>,
537            action: mina_node::ActionWithMeta,
538        ) {
539            // if action.action().kind().to_string().starts_with("BlockProducer") {
540            //     dbg!(action.action());
541            // }
542
543            store.service.dyn_effects(store.state.get(), &action);
544            let peer_id = store.state().p2p.my_id();
545            mina_core::log::trace!(action.time(); "{peer_id}: {:?}", action.action().kind());
546
547            for (invariant, res) in Invariants::check_all(store, &action) {
548                // TODO(binier): record instead of panicing.
549                match res {
550                    InvariantResult::Ignored(reason) => {
551                        unreachable!("No invariant should be ignored! ignore reason: {reason:?}");
552                    }
553                    InvariantResult::Violation(violation) => {
554                        panic!(
555                            "Invariant({}) violated! violation: {violation}",
556                            invariant.to_str()
557                        );
558                    }
559                    InvariantResult::Updated => {}
560                    InvariantResult::Ok => {}
561                }
562            }
563
564            mina_node::effects(store, action)
565        }
566        let mut store = mina_node::Store::new(
567            mina_node::reducer,
568            effects,
569            service,
570            testing_config.initial_time.into(),
571            state,
572        );
573        // record initial state.
574        {
575            store
576                .service
577                .recorder()
578                .initial_state(rng_seed, p2p_sec_key, store.state.get());
579        }
580
581        let node = Node::new(work_dir, node_config, store);
582
583        info!(
584            system_time();
585            "Successfully created Rust node {} at ports HTTP={}, LibP2P={}",
586            node_id.index(),
587            http_port,
588            libp2p_port
589        );
590
591        self.nodes.push(node);
592        node_id
593    }
594
595    /// Add a new OCaml implementation node to the cluster.
596    ///
597    /// Creates and spawns an OCaml Mina daemon process with the specified
598    /// configuration. This method handles process spawning, port allocation,
599    /// directory setup, and daemon configuration.
600    ///
601    /// # Default Behaviors
602    ///
603    /// - **Executable selection**: Automatically detects local binary or
604    ///   falls back to default Docker image
605    /// - **Port allocation**: LibP2P, GraphQL, and client ports automatically
606    ///   assigned from available range
607    /// - **Keypair rotation**: Uses predefined LibP2P keypairs, rotating
608    ///   through the set for each new node
609    /// - **Process management**: Spawns daemon with proper environment
610    ///   variables and argument configuration
611    /// - **Logging**: Stdout/stderr forwarded with port-based prefixes
612    /// - **Docker support**: Automatic container management when using Docker
613    ///
614    /// # Configuration Options
615    ///
616    /// - `initial_peers`: List of peer connection targets
617    /// - `daemon_json`: Genesis configuration (file path or in-memory JSON)
618    /// - `block_producer`: Optional block production key
619    ///
620    /// # Docker vs Local Execution
621    ///
622    /// The method automatically determines execution mode:
623    /// 1. Attempts to use locally installed `mina` binary
624    /// 2. Falls back to Docker with default image if binary not found
625    /// 3. Custom Docker images supported via configuration
626    ///
627    /// # Returns
628    ///
629    /// Returns a [`ClusterOcamlNodeId`] for referencing this OCaml node
630    /// in scenarios and peer connections.
631    ///
632    /// # Panics
633    ///
634    /// Panics if:
635    /// - No available ports in the configured range
636    /// - Temporary directory creation fails
637    /// - OCaml daemon process spawn fails
638    pub fn add_ocaml_node(&mut self, testing_config: OcamlNodeTestingConfig) -> ClusterOcamlNodeId {
639        let node_i = self.ocaml_nodes.len();
640
641        info!(
642            system_time();
643            "Adding OCaml node {} with {} initial peers, block_producer={}",
644            node_i,
645            testing_config.initial_peers.len(),
646            testing_config.block_producer.is_some()
647        );
648
649        let executable = self.config.ocaml_node_executable();
650        let mut next_port = || {
651            self.available_ports.next().ok_or_else(|| {
652                anyhow::anyhow!(
653                    "couldn't find available port in port range: {:?}",
654                    self.config.port_range()
655                )
656            })
657        };
658
659        let temp_dir = temp_dir::TempDir::new().expect("failed to create tempdir");
660        let libp2p_port = next_port().unwrap();
661        let graphql_port = next_port().unwrap();
662        let client_port = next_port().unwrap();
663
664        info!(
665            system_time();
666            "Assigned ports for OCaml node {}: LibP2P={}, GraphQL={}, Client={}",
667            node_i,
668            libp2p_port,
669            graphql_port,
670            client_port
671        );
672
673        let node = OcamlNode::start(OcamlNodeConfig {
674            executable,
675            dir: temp_dir,
676            libp2p_keypair_i: self.ocaml_libp2p_keypair_i,
677            libp2p_port,
678            graphql_port,
679            client_port,
680            initial_peers: testing_config.initial_peers,
681            daemon_json: testing_config.daemon_json,
682            block_producer: testing_config.block_producer,
683        })
684        .expect("failed to start ocaml node");
685
686        info!(
687            system_time();
688            "Successfully started OCaml node {} with keypair index {}",
689            node_i,
690            self.ocaml_libp2p_keypair_i
691        );
692
693        self.ocaml_libp2p_keypair_i += 1;
694
695        self.ocaml_nodes.push(Some(node));
696        ClusterOcamlNodeId::new_unchecked(node_i)
697    }
698
699    pub async fn start(&mut self, scenario: Scenario) -> Result<(), anyhow::Error> {
700        let mut parent_id = scenario.info.parent_id.clone();
701        self.scenario.chain.push_back(scenario);
702
703        while let Some(ref id) = parent_id {
704            let scenario = Scenario::load(id).await?;
705            parent_id.clone_from(&scenario.info.parent_id);
706            self.scenario.chain.push_back(scenario);
707        }
708
709        let scenario = self.scenario.cur_scenario();
710
711        for config in scenario.info.nodes.clone() {
712            match config {
713                NodeTestingConfig::Rust(config) => {
714                    self.add_rust_node(config.clone());
715                }
716                NodeTestingConfig::Ocaml(config) => {
717                    self.add_ocaml_node(config.clone());
718                }
719            }
720        }
721
722        Ok(())
723    }
724
725    pub async fn reload_scenarios(&mut self) -> Result<(), anyhow::Error> {
726        for scenario in &mut self.scenario.chain {
727            scenario.reload().await?;
728        }
729        Ok(())
730    }
731
732    pub fn next_scenario_and_step(&self) -> Option<(&ScenarioId, usize)> {
733        self.scenario
734            .peek_i()
735            .map(|(scenario_i, step_i)| (&self.scenario.chain[scenario_i].info.id, step_i))
736    }
737
738    pub fn target_scenario(&self) -> Option<&ScenarioId> {
739        self.scenario.target_scenario().map(|v| &v.info.id)
740    }
741
742    pub fn nodes_iter(&self) -> impl Iterator<Item = (ClusterNodeId, &Node)> {
743        self.nodes
744            .iter()
745            .enumerate()
746            .map(|(i, node)| (ClusterNodeId::new_unchecked(i), node))
747    }
748
749    pub fn ocaml_nodes_iter(&self) -> impl Iterator<Item = (ClusterOcamlNodeId, &OcamlNode)> {
750        self.ocaml_nodes
751            .iter()
752            .enumerate()
753            .filter_map(|(i, node)| node.as_ref().map(|node| (i, node)))
754            .map(|(i, node)| (ClusterOcamlNodeId::new_unchecked(i), node))
755    }
756
757    pub fn node(&self, node_id: ClusterNodeId) -> Option<&Node> {
758        self.nodes.get(node_id.index())
759    }
760
761    pub fn node_by_peer_id(&self, peer_id: PeerId) -> Option<&Node> {
762        self.nodes_iter()
763            .find(|(_, node)| node.peer_id() == peer_id)
764            .map(|(_, node)| node)
765    }
766
767    pub fn node_mut(&mut self, node_id: ClusterNodeId) -> Option<&mut Node> {
768        self.nodes.get_mut(node_id.index())
769    }
770
771    pub fn ocaml_node(&self, node_id: ClusterOcamlNodeId) -> Option<&OcamlNode> {
772        self.ocaml_nodes
773            .get(node_id.index())
774            .map(|opt| opt.as_ref().expect("tried to access removed ocaml node"))
775    }
776
777    pub fn ocaml_node_by_peer_id(&self, peer_id: PeerId) -> Option<&OcamlNode> {
778        self.ocaml_nodes_iter()
779            .find(|(_, node)| node.peer_id() == peer_id)
780            .map(|(_, node)| node)
781    }
782
783    pub fn pending_events(
784        &mut self,
785        poll: bool,
786    ) -> impl Iterator<
787        Item = (
788            ClusterNodeId,
789            &State,
790            impl Iterator<Item = (PendingEventId, &Event)>,
791        ),
792    > {
793        self.nodes.iter_mut().enumerate().map(move |(i, node)| {
794            let node_id = ClusterNodeId::new_unchecked(i);
795            let (state, pending_events) = node.pending_events_with_state(poll);
796            (node_id, state, pending_events)
797        })
798    }
799
800    pub fn node_pending_events(
801        &mut self,
802        node_id: ClusterNodeId,
803        poll: bool,
804    ) -> Result<(&State, impl Iterator<Item = (PendingEventId, &Event)>), anyhow::Error> {
805        let node = self
806            .nodes
807            .get_mut(node_id.index())
808            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
809        Ok(node.pending_events_with_state(poll))
810    }
811
812    pub async fn wait_for_pending_events(&mut self) {
813        let mut nodes = &mut self.nodes[..];
814        let mut futures = FuturesUnordered::new();
815
816        while let Some((node, nodes_rest)) = nodes.split_first_mut() {
817            nodes = nodes_rest;
818            futures.push(async { node.wait_for_next_pending_event().await.is_some() });
819        }
820
821        while let Some(has_event) = futures.next().await {
822            if has_event {
823                break;
824            }
825        }
826    }
827
828    pub async fn wait_for_pending_events_with_timeout(&mut self, timeout: Duration) -> bool {
829        let timeout = tokio::time::sleep(timeout);
830
831        tokio::select! {
832            _ = self.wait_for_pending_events() => true,
833            _ = timeout => false,
834        }
835    }
836
837    pub async fn wait_for_pending_event(
838        &mut self,
839        node_id: ClusterNodeId,
840        event_pattern: &str,
841    ) -> anyhow::Result<PendingEventId> {
842        let node = self
843            .nodes
844            .get_mut(node_id.index())
845            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
846        let timeout = tokio::time::sleep(Duration::from_secs(300));
847        tokio::select! {
848            opt = node.wait_for_event(event_pattern) => opt.ok_or_else(|| anyhow::anyhow!("wait_for_event: None")),
849            _ = timeout => {
850                let pending_events = node.pending_events(false).map(|(_, event)| event.to_string()).collect::<Vec<_>>();
851                 Err(anyhow::anyhow!("waiting for event timed out! node {node_id:?}, event: \"{event_pattern}\"\n{pending_events:?}"))
852            }
853        }
854    }
855
856    pub async fn wait_for_event_and_dispatch(
857        &mut self,
858        node_id: ClusterNodeId,
859        event_pattern: &str,
860    ) -> anyhow::Result<bool> {
861        let event_id = self.wait_for_pending_event(node_id, event_pattern).await?;
862        let node = self.nodes.get_mut(node_id.index()).unwrap();
863        Ok(node.take_event_and_dispatch(event_id))
864    }
865
866    pub async fn add_steps_and_save(&mut self, steps: impl IntoIterator<Item = ScenarioStep>) {
867        let scenario = self.scenario.chain.back_mut().unwrap();
868        steps
869            .into_iter()
870            .for_each(|step| scenario.add_step(step).unwrap());
871        scenario.save().await.unwrap();
872    }
873
874    pub async fn exec_to_end(&mut self) -> Result<(), anyhow::Error> {
875        let mut i = 0;
876        let total = self.scenario.cur_scenario().steps.len();
877        loop {
878            info!(system_time(); "Executing step {}/{}", i + 1, total);
879            if !self.exec_next().await? {
880                break Ok(());
881            }
882            i += 1;
883        }
884    }
885
886    pub async fn exec_until(
887        &mut self,
888        target_scenario: ScenarioId,
889        step_i: Option<usize>,
890    ) -> Result<(), anyhow::Error> {
891        if self
892            .scenario
893            .finished
894            .iter()
895            .any(|v| v.info.id == target_scenario)
896        {
897            return Err(anyhow::anyhow!(
898                "cluster already finished '{target_scenario}' scenario"
899            ));
900        }
901
902        while self
903            .scenario
904            .peek()
905            .is_some_and(|(scenario, _)| scenario.info.id != target_scenario)
906        {
907            if !self.exec_next().await? {
908                break;
909            }
910        }
911
912        while self
913            .scenario
914            .peek()
915            .is_some_and(|(scenario, _)| scenario.info.id == target_scenario)
916        {
917            if let Some(step_i) = step_i {
918                if self.scenario.peek_i().unwrap().1 >= step_i {
919                    break;
920                }
921            }
922            if !self.exec_next().await? {
923                break;
924            }
925        }
926
927        Ok(())
928    }
929
930    pub async fn exec_next(&mut self) -> Result<bool, anyhow::Error> {
931        let (_scenario, step) = match self.scenario.peek() {
932            Some(v) => v,
933            None => return Ok(false),
934        };
935        let dispatched = self.exec_step(step.clone()).await?;
936
937        if dispatched {
938            self.scenario.advance();
939        }
940
941        Ok(dispatched)
942    }
943
944    pub async fn exec_step(&mut self, step: ScenarioStep) -> anyhow::Result<bool> {
945        Ok(match step {
946            ScenarioStep::Event { node_id, event } => {
947                return self.wait_for_event_and_dispatch(node_id, &event).await;
948            }
949            ScenarioStep::ManualEvent { node_id, event } => self
950                .nodes
951                .get_mut(node_id.index())
952                .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?
953                .dispatch_event(*event),
954            ScenarioStep::NonDeterministicEvent { node_id, event } => {
955                let event = match *event {
956                    NonDeterministicEvent::P2pConnectionClosed(peer_id) => {
957                        let node = self
958                            .nodes
959                            .get_mut(node_id.index())
960                            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
961                        node.p2p_disconnect(peer_id);
962                        let event =
963                            Event::P2p(P2pEvent::Connection(P2pConnectionEvent::Closed(peer_id)));
964                        return self
965                            .wait_for_event_and_dispatch(node_id, &event.to_string())
966                            .await;
967                    }
968                    NonDeterministicEvent::P2pConnectionFinalized(peer_id, res) => {
969                        let node = self
970                            .nodes
971                            .get(node_id.index())
972                            .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
973                        let res_is_ok = res.is_ok();
974                        let event = Event::P2p(P2pEvent::Connection(
975                            P2pConnectionEvent::Finalized(peer_id, res),
976                        ));
977
978                        if res_is_ok {
979                            let is_peer_connected =
980                                node.state().p2p.get_ready_peer(&peer_id).is_some();
981                            if is_peer_connected {
982                                // we are already connected, so skip the extra event.
983                                return Ok(true);
984                            }
985                            eprintln!("non_deterministic_wait_for_event_and_dispatch({node_id:?}): {event}");
986                            return self
987                                .wait_for_event_and_dispatch(node_id, &event.to_string())
988                                .await;
989                        } else {
990                            event
991                        }
992                    }
993                    NonDeterministicEvent::RpcReadonly(id, req) => Event::Rpc(id, req),
994                };
995                eprintln!("non_deterministic_event_dispatch({node_id:?}): {event}");
996                self.nodes
997                    .get_mut(node_id.index())
998                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?
999                    .dispatch_event(event)
1000            }
1001            ScenarioStep::AddNode { config } => match *config {
1002                NodeTestingConfig::Rust(config) => {
1003                    self.add_rust_node(config);
1004                    // TODO(binier): wait for node ports to be opened instead.
1005                    tokio::time::sleep(Duration::from_secs(2)).await;
1006                    true
1007                }
1008                NodeTestingConfig::Ocaml(config) => {
1009                    // before starting ocaml node, read and save secret
1010                    // keys from daemon.json.
1011                    let mut json_owned = None;
1012                    let json = match &config.daemon_json {
1013                        DaemonJson::Custom(path) => {
1014                            let bytes = tokio::fs::read(path).await.map_err(|err| {
1015                                anyhow::anyhow!(
1016                                    "error reading daemon.json from path({path}): {err}"
1017                                )
1018                            })?;
1019                            let json = serde_json::from_slice(&bytes).map_err(|err| {
1020                                anyhow::anyhow!(
1021                                    "failed to parse damon.json from path({path}): {err}"
1022                                )
1023                            })?;
1024                            json_owned.insert(json)
1025                        }
1026                        DaemonJson::InMem(json) => json,
1027                    };
1028                    let accounts = json["ledger"]["accounts"].as_array().ok_or_else(|| {
1029                        anyhow::anyhow!("daemon.json `.ledger.accounts` is not array")
1030                    })?;
1031
1032                    accounts
1033                        .iter()
1034                        .filter_map(|account| account["sk"].as_str())
1035                        .filter_map(|sk| sk.parse().ok())
1036                        .for_each(|sk| self.add_account_sec_key(sk));
1037
1038                    self.add_ocaml_node(config);
1039                    true
1040                }
1041            },
1042            ScenarioStep::ConnectNodes { dialer, listener } => {
1043                let listener_addr = match listener {
1044                    ListenerNode::Rust(listener) => {
1045                        let listener = self
1046                            .nodes
1047                            .get(listener.index())
1048                            .ok_or_else(|| anyhow::anyhow!("node {listener:?} not found"))?;
1049
1050                        listener.dial_addr()
1051                    }
1052                    ListenerNode::Ocaml(listener) => {
1053                        let listener = self
1054                            .ocaml_nodes
1055                            .get(listener.index())
1056                            .ok_or_else(|| anyhow::anyhow!("ocaml node {listener:?} not found"))?
1057                            .as_ref()
1058                            .ok_or_else(|| {
1059                                anyhow::anyhow!("tried to access removed ocaml node {listener:?}")
1060                            })?;
1061
1062                        listener.dial_addr()
1063                    }
1064                    ListenerNode::Custom(addr) => addr.clone(),
1065                };
1066
1067                self.rpc_counter += 1;
1068                let rpc_id = RpcId::new_unchecked(usize::MAX, self.rpc_counter);
1069                let dialer = self
1070                    .nodes
1071                    .get_mut(dialer.index())
1072                    .ok_or_else(|| anyhow::anyhow!("node {dialer:?} not found"))?;
1073
1074                let req = mina_node::rpc::RpcRequest::P2pConnectionOutgoing(listener_addr);
1075                dialer.dispatch_event(Event::Rpc(rpc_id, Box::new(req)))
1076            }
1077            ScenarioStep::CheckTimeouts { node_id } => {
1078                let node = self
1079                    .nodes
1080                    .get_mut(node_id.index())
1081                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
1082                node.check_timeouts();
1083                true
1084            }
1085            ScenarioStep::AdvanceTime { by_nanos } => {
1086                for node in &mut self.nodes {
1087                    node.advance_time(by_nanos)
1088                }
1089                true
1090            }
1091            ScenarioStep::AdvanceNodeTime { node_id, by_nanos } => {
1092                let node = self
1093                    .nodes
1094                    .get_mut(node_id.index())
1095                    .ok_or_else(|| anyhow::anyhow!("node {node_id:?} not found"))?;
1096                node.advance_time(by_nanos);
1097                true
1098            }
1099            ScenarioStep::Ocaml { node_id, step } => {
1100                let node = self.ocaml_nodes.get_mut(node_id.index());
1101                let node =
1102                    node.ok_or_else(|| anyhow::anyhow!("ocaml node {node_id:?} not found"))?;
1103                if matches!(step, OcamlStep::KillAndRemove) {
1104                    let mut node = node.take().ok_or_else(|| {
1105                        anyhow::anyhow!("tried to access removed ocaml node {node_id:?}")
1106                    })?;
1107                    node.exec(step).await?
1108                } else {
1109                    let node = node.as_mut().ok_or_else(|| {
1110                        anyhow::anyhow!("tried to access removed ocaml node {node_id:?}")
1111                    })?;
1112                    node.exec(step).await?
1113                }
1114            }
1115        })
1116    }
1117
1118    pub fn debugger(&self) -> Option<&Debugger> {
1119        self.debugger.as_ref()
1120    }
1121}
1122
1123impl ClusterScenarioRun {
1124    pub fn target_scenario(&self) -> Option<&Scenario> {
1125        self.chain.back().or_else(|| self.finished.last())
1126    }
1127
1128    pub fn cur_scenario(&self) -> &Scenario {
1129        self.chain.front().unwrap()
1130    }
1131
1132    pub fn peek_i(&self) -> Option<(usize, usize)> {
1133        self.chain
1134            .iter()
1135            .enumerate()
1136            .filter_map(|(i, scenario)| {
1137                let step_i = if i == 0 { self.cur_step } else { 0 };
1138                scenario.steps.get(step_i)?;
1139                Some((i, step_i))
1140            })
1141            .nth(0)
1142    }
1143
1144    pub fn peek(&self) -> Option<(&Scenario, &ScenarioStep)> {
1145        self.peek_i().map(|(scenario_i, step_i)| {
1146            let scenario = &self.chain[scenario_i];
1147            let step = &scenario.steps[step_i];
1148            (scenario, step)
1149        })
1150    }
1151
1152    fn advance(&mut self) {
1153        if let Some((scenario_i, step_i)) = self.peek_i() {
1154            self.finished.extend(self.chain.drain(..scenario_i));
1155            if self.cur_step == step_i {
1156                self.cur_step += 1;
1157            } else {
1158                self.cur_step = step_i;
1159            }
1160        }
1161    }
1162}