use std::net::SocketAddr; use std::sync::Arc; use std::time::{Duration, Instant}; use sq_cluster::membership::{Membership, MembershipConfig}; use sq_grpc_interface::{ cluster_service_server::ClusterServiceServer, control_plane_service_server::ControlPlaneServiceServer, data_plane_service_server::DataPlaneServiceServer, status_service_client::StatusServiceClient, status_service_server::StatusServiceServer, GetStatusRequest, SubscribeRequest, }; use sq_grpc_interface::data_plane_service_client::DataPlaneServiceClient; use sq_sdk::{ Consumer, ConsumerConfig, Producer, ProducerConfig, GrpcProducer, GrpcProducerConfig, ProducerMessage, }; use sq_server::capnp::CapnpServer; use sq_server::grpc::{cluster, control_plane, data_plane, health}; use sq_server::state::{Config, State}; use tempfile::TempDir; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; // --------------------------------------------------------------------------- // Test harness — extends TestCluster to include capnp server alongside gRPC // --------------------------------------------------------------------------- struct TestNode { grpc_addr: SocketAddr, capnp_addr: SocketAddr, cancel: CancellationToken, pipeline_cancel: CancellationToken, _temp_dir: TempDir, _server_handle: tokio::task::JoinHandle<()>, _capnp_handle: tokio::task::JoinHandle<()>, } impl TestNode { fn grpc_endpoint(&self) -> String { format!("http://{}", self.grpc_addr) } fn capnp_endpoint(&self) -> String { self.capnp_addr.to_string() } } struct TestCluster { nodes: Vec, } impl TestCluster { async fn start(n: usize) -> Self { let mut grpc_listeners = Vec::new(); let mut capnp_listeners = Vec::new(); let mut grpc_addrs = Vec::new(); let mut capnp_addrs = Vec::new(); for _ in 0..n { let grpc_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); let capnp_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); grpc_addrs.push(grpc_listener.local_addr().unwrap()); capnp_addrs.push(capnp_listener.local_addr().unwrap()); grpc_listeners.push(grpc_listener); capnp_listeners.push(capnp_listener); } let mut nodes = Vec::new(); for (i, (grpc_listener, capnp_listener)) in grpc_listeners.into_iter().zip(capnp_listeners).enumerate() { let grpc_addr = grpc_addrs[i]; let capnp_addr = capnp_addrs[i]; let node_id = format!("capnp-stress-node-{}", i + 1); let temp_dir = TempDir::new().unwrap(); let seeds: Vec = grpc_addrs .iter() .enumerate() .filter(|(j, _)| *j != i) .map(|(_, a)| a.to_string()) .collect(); let config = Config { node_id: node_id.clone(), data_dir: temp_dir.path().to_path_buf(), seeds: seeds.clone(), grpc_address: grpc_addr.to_string(), cluster_id: "test-cluster".to_string(), s3_bucket: None, s3_endpoint: None, s3_region: None, sync_policy: sq_models::SyncPolicy::EveryBatch, }; let (state, mut pipeline) = State::new(config).unwrap(); let pipeline_cancel = CancellationToken::new(); let pipeline_cancel_clone = pipeline_cancel.clone(); tokio::spawn(async move { tokio::select! { () = pipeline.run() => {} () = pipeline_cancel_clone.cancelled() => {} } }); let membership = Arc::new(Membership::new(MembershipConfig { node_id: node_id.clone(), address: grpc_addr.to_string(), seeds, ..Default::default() })); let cancel = CancellationToken::new(); // Spawn gRPC server. let cancel_clone = cancel.clone(); let state_clone = state.clone(); let membership_clone = membership.clone(); let incoming = tokio_stream::wrappers::TcpListenerStream::new(grpc_listener); let server_handle = tokio::spawn(async move { tonic::transport::Server::builder() .add_service(StatusServiceServer::new(health::HealthServer { state: state_clone.clone(), })) .add_service(DataPlaneServiceServer::new(data_plane::DataPlaneServer { state: state_clone.clone(), })) .add_service(ControlPlaneServiceServer::new( control_plane::ControlPlaneServer { state: state_clone.clone(), }, )) .add_service(ClusterServiceServer::new(cluster::ClusterServer { state: state_clone, membership: membership_clone, })) .serve_with_incoming_shutdown(incoming, async move { cancel_clone.cancelled().await; }) .await .unwrap(); }); // Spawn capnp server — use the CapnpServer Component's run method directly. let cancel_clone = cancel.clone(); let capnp_state = state.clone(); let capnp_handle = tokio::spawn(async move { let server = CapnpServer { host: capnp_addr, state: capnp_state, }; // We can't use the TcpListener we already bound because CapnpServer binds its own. // Instead, drop the listener and let CapnpServer rebind. drop(capnp_listener); let _ = notmad::Component::run(&server, cancel_clone).await; }); nodes.push(TestNode { grpc_addr, capnp_addr, cancel, pipeline_cancel, _temp_dir: temp_dir, _server_handle: server_handle, _capnp_handle: capnp_handle, }); } // Wait for gRPC to be ready. for node in &nodes { wait_for_ready(&node.grpc_endpoint()).await; } // Give capnp server a moment to bind. tokio::time::sleep(Duration::from_millis(50)).await; TestCluster { nodes } } fn node(&self, index: usize) -> &TestNode { &self.nodes[index] } } impl Drop for TestCluster { fn drop(&mut self) { for node in &self.nodes { node.pipeline_cancel.cancel(); node.cancel.cancel(); } } } async fn wait_for_ready(endpoint: &str) { let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(5); loop { if tokio::time::Instant::now() > deadline { panic!("Server at {} did not become ready in time", endpoint); } if let Ok(mut client) = StatusServiceClient::connect(endpoint.to_string()).await { if client .status(tonic::Request::new(GetStatusRequest {})) .await .is_ok() { return; } } tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; } } // --------------------------------------------------------------------------- // Capnp stress test 1: Single producer — 100K messages via capnp // --------------------------------------------------------------------------- #[tokio::test] async fn capnp_stress_single_producer_100k() { let cluster = TestCluster::start(1).await; let capnp_ep = cluster.node(0).capnp_endpoint(); let mut producer = Producer::connect(ProducerConfig { address: capnp_ep, ..Default::default() }) .await .unwrap(); let total = 100_000u64; let batch_size = 500; let payload = vec![0u8; 128]; let start = Instant::now(); for batch_start in (0..total).step_by(batch_size) { let batch_end = (batch_start + batch_size as u64).min(total); let batch: Vec = (batch_start..batch_end) .map(|_| ProducerMessage::new("capnp-stress-topic", payload.clone())) .collect(); producer.send_batch(batch).await.unwrap(); } let publish_duration = start.elapsed(); let msgs_per_sec = total as f64 / publish_duration.as_secs_f64(); eprintln!( "capnp_stress_single_producer_100k: published {} messages in {:.2}s ({:.0} msg/s, {:.1} MB/s)", total, publish_duration.as_secs_f64(), msgs_per_sec, (total as f64 * 128.0) / (1024.0 * 1024.0) / publish_duration.as_secs_f64() ); // Verify: read back via gRPC subscribe (capnp subscribe is streaming-only). let grpc_ep = cluster.node(0).grpc_endpoint(); let mut client = DataPlaneServiceClient::connect(grpc_ep).await.unwrap(); let response = client .subscribe(tonic::Request::new(SubscribeRequest { topic: "capnp-stress-topic".to_string(), partition: 0, consumer_group: String::new(), start_offset: Some(0), max_batch_size: 1000, })) .await .unwrap(); let mut stream = response.into_inner(); let mut consumed = 0u64; while consumed < total { match tokio::time::timeout(Duration::from_secs(10), stream.next()).await { Ok(Some(Ok(batch))) => consumed += batch.messages.len() as u64, _ => break, } } assert_eq!(consumed, total, "expected all messages to be consumed"); } // --------------------------------------------------------------------------- // Capnp stress test 2: Concurrent producers — 10 producers, 10K messages each // --------------------------------------------------------------------------- #[tokio::test] async fn capnp_stress_concurrent_producers() { let cluster = TestCluster::start(1).await; let capnp_ep = cluster.node(0).capnp_endpoint(); let num_producers = 10; let msgs_per_producer = 10_000u64; let payload = vec![0u8; 64]; let start = Instant::now(); let mut handles = Vec::new(); for p in 0..num_producers { let ep = capnp_ep.clone(); let pl = payload.clone(); handles.push(tokio::spawn(async move { let mut producer = Producer::connect(ProducerConfig { address: ep, producer_id: format!("capnp-producer-{p}"), ..Default::default() }) .await .unwrap(); let topic = format!("capnp-concurrent-{p}"); for batch_start in (0..msgs_per_producer).step_by(100) { let batch_end = (batch_start + 100).min(msgs_per_producer); let batch: Vec = (batch_start..batch_end) .map(|_| ProducerMessage::new(topic.clone(), pl.clone())) .collect(); producer.send_batch(batch).await.unwrap(); } })); } for handle in handles { handle.await.unwrap(); } let duration = start.elapsed(); let total = num_producers as u64 * msgs_per_producer; let msgs_per_sec = total as f64 / duration.as_secs_f64(); eprintln!( "capnp_stress_concurrent_producers: {} producers x {} msgs = {} total in {:.2}s ({:.0} msg/s)", num_producers, msgs_per_producer, total, duration.as_secs_f64(), msgs_per_sec ); } // --------------------------------------------------------------------------- // Capnp stress test 3: Subscribe via capnp — publish then consume // --------------------------------------------------------------------------- #[tokio::test] async fn capnp_stress_subscribe() { let cluster = TestCluster::start(1).await; let capnp_ep = cluster.node(0).capnp_endpoint(); let total = 10_000u64; let payload = vec![0u8; 64]; // Publish via capnp. let mut producer = Producer::connect(ProducerConfig { address: capnp_ep.clone(), ..Default::default() }) .await .unwrap(); for batch_start in (0..total).step_by(500) { let batch_end = (batch_start + 500).min(total); let batch: Vec = (batch_start..batch_end) .map(|_| ProducerMessage::new("capnp-sub-topic", payload.clone())) .collect(); producer.send_batch(batch).await.unwrap(); } // Consume via capnp. let mut consumer = Consumer::connect(ConsumerConfig { address: capnp_ep, topic: "capnp-sub-topic".to_string(), consumer_group: String::new(), auto_commit: false, start_offset: Some(0), max_poll_records: 1000, ..Default::default() }) .await .unwrap(); let mut consumed = 0u64; let start = Instant::now(); while consumed < total { match tokio::time::timeout(Duration::from_secs(10), consumer.poll()).await { Ok(Ok(msgs)) => consumed += msgs.len() as u64, _ => break, } } let consume_duration = start.elapsed(); eprintln!( "capnp_stress_subscribe: consumed {} messages in {:.2}s ({:.0} msg/s)", consumed, consume_duration.as_secs_f64(), consumed as f64 / consume_duration.as_secs_f64() ); assert_eq!(consumed, total, "expected all messages to be consumed"); } // --------------------------------------------------------------------------- // Throughput comparison: gRPC vs capnp // --------------------------------------------------------------------------- async fn bench_grpc_publish(cluster: &TestCluster, total: u64, batch_size: usize) -> f64 { let endpoint = cluster.node(0).grpc_endpoint(); let mut producer = GrpcProducer::connect(GrpcProducerConfig { address: endpoint, ..Default::default() }) .await .unwrap(); let payload = vec![0u8; 128]; let start = Instant::now(); for batch_start in (0..total).step_by(batch_size) { let batch_end = (batch_start + batch_size as u64).min(total); let batch: Vec = (batch_start..batch_end) .map(|_| ProducerMessage::new("bench-grpc", payload.clone())) .collect(); producer.send_batch(batch).await.unwrap(); } total as f64 / start.elapsed().as_secs_f64() } async fn bench_capnp_publish(cluster: &TestCluster, total: u64, batch_size: usize) -> f64 { let endpoint = cluster.node(0).capnp_endpoint(); let mut producer = Producer::connect(ProducerConfig { address: endpoint, ..Default::default() }) .await .unwrap(); let payload = vec![0u8; 128]; let start = Instant::now(); for batch_start in (0..total).step_by(batch_size) { let batch_end = (batch_start + batch_size as u64).min(total); let batch: Vec = (batch_start..batch_end) .map(|_| ProducerMessage::new("bench-capnp", payload.clone())) .collect(); producer.send_batch(batch).await.unwrap(); } total as f64 / start.elapsed().as_secs_f64() } #[tokio::test] async fn capnp_vs_grpc_throughput() { let cluster = TestCluster::start(1).await; let grpc_rate = bench_grpc_publish(&cluster, 100_000, 500).await; let capnp_rate = bench_capnp_publish(&cluster, 100_000, 500).await; eprintln!("=== THROUGHPUT COMPARISON (single producer, 100K msgs x 128B) ==="); eprintln!("gRPC: {:.0} msg/s", grpc_rate); eprintln!("capnp: {:.0} msg/s", capnp_rate); eprintln!("ratio: {:.2}x", capnp_rate / grpc_rate); }