462
crates/sq-server/tests/capnp_stress_test.rs
Normal file
462
crates/sq-server/tests/capnp_stress_test.rs
Normal file
@@ -0,0 +1,462 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use sq_cluster::membership::{Membership, MembershipConfig};
|
||||
use sq_grpc_interface::{
|
||||
cluster_service_server::ClusterServiceServer,
|
||||
control_plane_service_server::ControlPlaneServiceServer,
|
||||
data_plane_service_server::DataPlaneServiceServer,
|
||||
status_service_client::StatusServiceClient,
|
||||
status_service_server::StatusServiceServer,
|
||||
GetStatusRequest, SubscribeRequest,
|
||||
};
|
||||
use sq_grpc_interface::data_plane_service_client::DataPlaneServiceClient;
|
||||
use sq_sdk::{
|
||||
Consumer, ConsumerConfig, Producer, ProducerConfig,
|
||||
GrpcProducer, GrpcProducerConfig, ProducerMessage,
|
||||
};
|
||||
use sq_server::capnp::CapnpServer;
|
||||
use sq_server::grpc::{cluster, control_plane, data_plane, health};
|
||||
use sq_server::state::{Config, State};
|
||||
use tempfile::TempDir;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test harness — extends TestCluster to include capnp server alongside gRPC
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct TestNode {
|
||||
grpc_addr: SocketAddr,
|
||||
capnp_addr: SocketAddr,
|
||||
cancel: CancellationToken,
|
||||
pipeline_cancel: CancellationToken,
|
||||
_temp_dir: TempDir,
|
||||
_server_handle: tokio::task::JoinHandle<()>,
|
||||
_capnp_handle: tokio::task::JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl TestNode {
|
||||
fn grpc_endpoint(&self) -> String {
|
||||
format!("http://{}", self.grpc_addr)
|
||||
}
|
||||
|
||||
fn capnp_endpoint(&self) -> String {
|
||||
self.capnp_addr.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
struct TestCluster {
|
||||
nodes: Vec<TestNode>,
|
||||
}
|
||||
|
||||
impl TestCluster {
|
||||
async fn start(n: usize) -> Self {
|
||||
let mut grpc_listeners = Vec::new();
|
||||
let mut capnp_listeners = Vec::new();
|
||||
let mut grpc_addrs = Vec::new();
|
||||
let mut capnp_addrs = Vec::new();
|
||||
|
||||
for _ in 0..n {
|
||||
let grpc_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let capnp_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
grpc_addrs.push(grpc_listener.local_addr().unwrap());
|
||||
capnp_addrs.push(capnp_listener.local_addr().unwrap());
|
||||
grpc_listeners.push(grpc_listener);
|
||||
capnp_listeners.push(capnp_listener);
|
||||
}
|
||||
|
||||
let mut nodes = Vec::new();
|
||||
for (i, (grpc_listener, capnp_listener)) in
|
||||
grpc_listeners.into_iter().zip(capnp_listeners).enumerate()
|
||||
{
|
||||
let grpc_addr = grpc_addrs[i];
|
||||
let capnp_addr = capnp_addrs[i];
|
||||
let node_id = format!("capnp-stress-node-{}", i + 1);
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
|
||||
let seeds: Vec<String> = grpc_addrs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(j, _)| *j != i)
|
||||
.map(|(_, a)| a.to_string())
|
||||
.collect();
|
||||
|
||||
let config = Config {
|
||||
node_id: node_id.clone(),
|
||||
data_dir: temp_dir.path().to_path_buf(),
|
||||
seeds: seeds.clone(),
|
||||
grpc_address: grpc_addr.to_string(),
|
||||
cluster_id: "test-cluster".to_string(),
|
||||
s3_bucket: None,
|
||||
s3_endpoint: None,
|
||||
s3_region: None,
|
||||
sync_policy: sq_models::SyncPolicy::EveryBatch,
|
||||
};
|
||||
|
||||
let (state, mut pipeline) = State::new(config).unwrap();
|
||||
|
||||
let pipeline_cancel = CancellationToken::new();
|
||||
let pipeline_cancel_clone = pipeline_cancel.clone();
|
||||
tokio::spawn(async move {
|
||||
tokio::select! {
|
||||
() = pipeline.run() => {}
|
||||
() = pipeline_cancel_clone.cancelled() => {}
|
||||
}
|
||||
});
|
||||
|
||||
let membership = Arc::new(Membership::new(MembershipConfig {
|
||||
node_id: node_id.clone(),
|
||||
address: grpc_addr.to_string(),
|
||||
seeds,
|
||||
..Default::default()
|
||||
}));
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// Spawn gRPC server.
|
||||
let cancel_clone = cancel.clone();
|
||||
let state_clone = state.clone();
|
||||
let membership_clone = membership.clone();
|
||||
let incoming = tokio_stream::wrappers::TcpListenerStream::new(grpc_listener);
|
||||
let server_handle = tokio::spawn(async move {
|
||||
tonic::transport::Server::builder()
|
||||
.add_service(StatusServiceServer::new(health::HealthServer {
|
||||
state: state_clone.clone(),
|
||||
}))
|
||||
.add_service(DataPlaneServiceServer::new(data_plane::DataPlaneServer {
|
||||
state: state_clone.clone(),
|
||||
}))
|
||||
.add_service(ControlPlaneServiceServer::new(
|
||||
control_plane::ControlPlaneServer {
|
||||
state: state_clone.clone(),
|
||||
},
|
||||
))
|
||||
.add_service(ClusterServiceServer::new(cluster::ClusterServer {
|
||||
state: state_clone,
|
||||
membership: membership_clone,
|
||||
}))
|
||||
.serve_with_incoming_shutdown(incoming, async move {
|
||||
cancel_clone.cancelled().await;
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
// Spawn capnp server — use the CapnpServer Component's run method directly.
|
||||
let cancel_clone = cancel.clone();
|
||||
let capnp_state = state.clone();
|
||||
let capnp_handle = tokio::spawn(async move {
|
||||
let server = CapnpServer {
|
||||
host: capnp_addr,
|
||||
state: capnp_state,
|
||||
};
|
||||
// We can't use the TcpListener we already bound because CapnpServer binds its own.
|
||||
// Instead, drop the listener and let CapnpServer rebind.
|
||||
drop(capnp_listener);
|
||||
let _ = notmad::Component::run(&server, cancel_clone).await;
|
||||
});
|
||||
|
||||
nodes.push(TestNode {
|
||||
grpc_addr,
|
||||
capnp_addr,
|
||||
cancel,
|
||||
pipeline_cancel,
|
||||
_temp_dir: temp_dir,
|
||||
_server_handle: server_handle,
|
||||
_capnp_handle: capnp_handle,
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for gRPC to be ready.
|
||||
for node in &nodes {
|
||||
wait_for_ready(&node.grpc_endpoint()).await;
|
||||
}
|
||||
|
||||
// Give capnp server a moment to bind.
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
|
||||
TestCluster { nodes }
|
||||
}
|
||||
|
||||
fn node(&self, index: usize) -> &TestNode {
|
||||
&self.nodes[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestCluster {
|
||||
fn drop(&mut self) {
|
||||
for node in &self.nodes {
|
||||
node.pipeline_cancel.cancel();
|
||||
node.cancel.cancel();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_ready(endpoint: &str) {
|
||||
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(5);
|
||||
loop {
|
||||
if tokio::time::Instant::now() > deadline {
|
||||
panic!("Server at {} did not become ready in time", endpoint);
|
||||
}
|
||||
if let Ok(mut client) = StatusServiceClient::connect(endpoint.to_string()).await {
|
||||
if client
|
||||
.status(tonic::Request::new(GetStatusRequest {}))
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp stress test 1: Single producer — 100K messages via capnp
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn capnp_stress_single_producer_100k() {
|
||||
let cluster = TestCluster::start(1).await;
|
||||
let capnp_ep = cluster.node(0).capnp_endpoint();
|
||||
|
||||
let mut producer = Producer::connect(ProducerConfig {
|
||||
address: capnp_ep,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let total = 100_000u64;
|
||||
let batch_size = 500;
|
||||
let payload = vec![0u8; 128];
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
for batch_start in (0..total).step_by(batch_size) {
|
||||
let batch_end = (batch_start + batch_size as u64).min(total);
|
||||
let batch: Vec<ProducerMessage> = (batch_start..batch_end)
|
||||
.map(|_| ProducerMessage::new("capnp-stress-topic", payload.clone()))
|
||||
.collect();
|
||||
producer.send_batch(batch).await.unwrap();
|
||||
}
|
||||
|
||||
let publish_duration = start.elapsed();
|
||||
let msgs_per_sec = total as f64 / publish_duration.as_secs_f64();
|
||||
|
||||
eprintln!(
|
||||
"capnp_stress_single_producer_100k: published {} messages in {:.2}s ({:.0} msg/s, {:.1} MB/s)",
|
||||
total,
|
||||
publish_duration.as_secs_f64(),
|
||||
msgs_per_sec,
|
||||
(total as f64 * 128.0) / (1024.0 * 1024.0) / publish_duration.as_secs_f64()
|
||||
);
|
||||
|
||||
// Verify: read back via gRPC subscribe (capnp subscribe is streaming-only).
|
||||
let grpc_ep = cluster.node(0).grpc_endpoint();
|
||||
let mut client = DataPlaneServiceClient::connect(grpc_ep).await.unwrap();
|
||||
let response = client
|
||||
.subscribe(tonic::Request::new(SubscribeRequest {
|
||||
topic: "capnp-stress-topic".to_string(),
|
||||
partition: 0,
|
||||
consumer_group: String::new(),
|
||||
start_offset: Some(0),
|
||||
max_batch_size: 1000,
|
||||
}))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut stream = response.into_inner();
|
||||
let mut consumed = 0u64;
|
||||
while consumed < total {
|
||||
match tokio::time::timeout(Duration::from_secs(10), stream.next()).await {
|
||||
Ok(Some(Ok(batch))) => consumed += batch.messages.len() as u64,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(consumed, total, "expected all messages to be consumed");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp stress test 2: Concurrent producers — 10 producers, 10K messages each
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn capnp_stress_concurrent_producers() {
|
||||
let cluster = TestCluster::start(1).await;
|
||||
let capnp_ep = cluster.node(0).capnp_endpoint();
|
||||
|
||||
let num_producers = 10;
|
||||
let msgs_per_producer = 10_000u64;
|
||||
let payload = vec![0u8; 64];
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for p in 0..num_producers {
|
||||
let ep = capnp_ep.clone();
|
||||
let pl = payload.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
let mut producer = Producer::connect(ProducerConfig {
|
||||
address: ep,
|
||||
producer_id: format!("capnp-producer-{p}"),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let topic = format!("capnp-concurrent-{p}");
|
||||
for batch_start in (0..msgs_per_producer).step_by(100) {
|
||||
let batch_end = (batch_start + 100).min(msgs_per_producer);
|
||||
let batch: Vec<ProducerMessage> = (batch_start..batch_end)
|
||||
.map(|_| ProducerMessage::new(topic.clone(), pl.clone()))
|
||||
.collect();
|
||||
producer.send_batch(batch).await.unwrap();
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
let duration = start.elapsed();
|
||||
let total = num_producers as u64 * msgs_per_producer;
|
||||
let msgs_per_sec = total as f64 / duration.as_secs_f64();
|
||||
|
||||
eprintln!(
|
||||
"capnp_stress_concurrent_producers: {} producers x {} msgs = {} total in {:.2}s ({:.0} msg/s)",
|
||||
num_producers,
|
||||
msgs_per_producer,
|
||||
total,
|
||||
duration.as_secs_f64(),
|
||||
msgs_per_sec
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Capnp stress test 3: Subscribe via capnp — publish then consume
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[tokio::test]
|
||||
async fn capnp_stress_subscribe() {
|
||||
let cluster = TestCluster::start(1).await;
|
||||
let capnp_ep = cluster.node(0).capnp_endpoint();
|
||||
let total = 10_000u64;
|
||||
let payload = vec![0u8; 64];
|
||||
|
||||
// Publish via capnp.
|
||||
let mut producer = Producer::connect(ProducerConfig {
|
||||
address: capnp_ep.clone(),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for batch_start in (0..total).step_by(500) {
|
||||
let batch_end = (batch_start + 500).min(total);
|
||||
let batch: Vec<ProducerMessage> = (batch_start..batch_end)
|
||||
.map(|_| ProducerMessage::new("capnp-sub-topic", payload.clone()))
|
||||
.collect();
|
||||
producer.send_batch(batch).await.unwrap();
|
||||
}
|
||||
|
||||
// Consume via capnp.
|
||||
let mut consumer = Consumer::connect(ConsumerConfig {
|
||||
address: capnp_ep,
|
||||
topic: "capnp-sub-topic".to_string(),
|
||||
consumer_group: String::new(),
|
||||
auto_commit: false,
|
||||
start_offset: Some(0),
|
||||
max_poll_records: 1000,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut consumed = 0u64;
|
||||
let start = Instant::now();
|
||||
|
||||
while consumed < total {
|
||||
match tokio::time::timeout(Duration::from_secs(10), consumer.poll()).await {
|
||||
Ok(Ok(msgs)) => consumed += msgs.len() as u64,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let consume_duration = start.elapsed();
|
||||
eprintln!(
|
||||
"capnp_stress_subscribe: consumed {} messages in {:.2}s ({:.0} msg/s)",
|
||||
consumed,
|
||||
consume_duration.as_secs_f64(),
|
||||
consumed as f64 / consume_duration.as_secs_f64()
|
||||
);
|
||||
|
||||
assert_eq!(consumed, total, "expected all messages to be consumed");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Throughput comparison: gRPC vs capnp
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn bench_grpc_publish(cluster: &TestCluster, total: u64, batch_size: usize) -> f64 {
|
||||
let endpoint = cluster.node(0).grpc_endpoint();
|
||||
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
|
||||
address: endpoint,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let payload = vec![0u8; 128];
|
||||
let start = Instant::now();
|
||||
|
||||
for batch_start in (0..total).step_by(batch_size) {
|
||||
let batch_end = (batch_start + batch_size as u64).min(total);
|
||||
let batch: Vec<ProducerMessage> = (batch_start..batch_end)
|
||||
.map(|_| ProducerMessage::new("bench-grpc", payload.clone()))
|
||||
.collect();
|
||||
producer.send_batch(batch).await.unwrap();
|
||||
}
|
||||
|
||||
total as f64 / start.elapsed().as_secs_f64()
|
||||
}
|
||||
|
||||
async fn bench_capnp_publish(cluster: &TestCluster, total: u64, batch_size: usize) -> f64 {
|
||||
let endpoint = cluster.node(0).capnp_endpoint();
|
||||
let mut producer = Producer::connect(ProducerConfig {
|
||||
address: endpoint,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let payload = vec![0u8; 128];
|
||||
let start = Instant::now();
|
||||
|
||||
for batch_start in (0..total).step_by(batch_size) {
|
||||
let batch_end = (batch_start + batch_size as u64).min(total);
|
||||
let batch: Vec<ProducerMessage> = (batch_start..batch_end)
|
||||
.map(|_| ProducerMessage::new("bench-capnp", payload.clone()))
|
||||
.collect();
|
||||
producer.send_batch(batch).await.unwrap();
|
||||
}
|
||||
|
||||
total as f64 / start.elapsed().as_secs_f64()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn capnp_vs_grpc_throughput() {
|
||||
let cluster = TestCluster::start(1).await;
|
||||
|
||||
let grpc_rate = bench_grpc_publish(&cluster, 100_000, 500).await;
|
||||
let capnp_rate = bench_capnp_publish(&cluster, 100_000, 500).await;
|
||||
|
||||
eprintln!("=== THROUGHPUT COMPARISON (single producer, 100K msgs x 128B) ===");
|
||||
eprintln!("gRPC: {:.0} msg/s", grpc_rate);
|
||||
eprintln!("capnp: {:.0} msg/s", capnp_rate);
|
||||
eprintln!("ratio: {:.2}x", capnp_rate / grpc_rate);
|
||||
}
|
||||
Reference in New Issue
Block a user