feat: add capnp

Signed-off-by: kjuulh <contact@kjuulh.io>
This commit is contained in:
2026-02-27 12:15:35 +01:00
parent 3162971c89
commit 749ae245c7
115 changed files with 16596 additions and 31 deletions

View File

@@ -0,0 +1,763 @@
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use sq_cluster::membership::{Membership, MembershipConfig};
use sq_grpc_interface::{
cluster_service_client::ClusterServiceClient,
cluster_service_server::ClusterServiceServer,
control_plane_service_client::ControlPlaneServiceClient,
control_plane_service_server::ControlPlaneServiceServer,
data_plane_service_client::DataPlaneServiceClient,
data_plane_service_server::DataPlaneServiceServer,
status_service_client::StatusServiceClient,
status_service_server::StatusServiceServer,
ClusterNodeInfo, CreateTopicRequest, DeleteTopicRequest, DescribeTopicRequest,
FetchSegmentRequest, GetStatusRequest, HeartbeatRequest, JoinRequest, ListTopicsRequest,
ReplicateEntriesRequest, SubscribeRequest,
};
use sq_sdk::{GrpcConsumer, GrpcConsumerConfig, GrpcProducer, GrpcProducerConfig};
use sq_server::grpc::{cluster, control_plane, data_plane, health};
use sq_server::state::{Config, State};
use tempfile::TempDir;
use tokio_stream::StreamExt;
use tokio_util::sync::CancellationToken;
// ---------------------------------------------------------------------------
// Test harness
// ---------------------------------------------------------------------------
struct TestNode {
addr: SocketAddr,
#[allow(dead_code)]
node_id: String,
#[allow(dead_code)]
state: State,
membership: Arc<Membership>,
cancel: CancellationToken,
pipeline_cancel: CancellationToken,
_temp_dir: TempDir,
_server_handle: tokio::task::JoinHandle<()>,
}
impl TestNode {
fn endpoint(&self) -> String {
format!("http://{}", self.addr)
}
}
struct TestCluster {
nodes: Vec<TestNode>,
}
impl TestCluster {
/// Start a cluster of `n` real SQ server nodes on random ports.
async fn start(n: usize) -> Self {
// Phase 1: Bind all listeners to get ports before starting servers.
let mut listeners = Vec::new();
let mut addrs = Vec::new();
for _ in 0..n {
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
addrs.push(addr);
listeners.push(listener);
}
// Phase 2: Start each node.
let mut nodes = Vec::new();
for (i, listener) in listeners.into_iter().enumerate() {
let addr = addrs[i];
let node_id = format!("node-{}", i + 1);
let temp_dir = TempDir::new().unwrap();
// Seeds: all addresses except our own.
let seeds: Vec<String> = addrs
.iter()
.enumerate()
.filter(|(j, _)| *j != i)
.map(|(_, a)| a.to_string())
.collect();
let config = Config {
node_id: node_id.clone(),
data_dir: temp_dir.path().to_path_buf(),
seeds: seeds.clone(),
grpc_address: addr.to_string(),
cluster_id: "test-cluster".to_string(),
s3_bucket: None,
s3_endpoint: None,
s3_region: None,
sync_policy: sq_models::SyncPolicy::EveryBatch,
};
let (state, mut pipeline) = State::new(config).unwrap();
// Spawn the write pipeline for this node.
let pipeline_cancel = CancellationToken::new();
let pipeline_cancel_clone = pipeline_cancel.clone();
tokio::spawn(async move {
tokio::select! {
() = pipeline.run() => {}
() = pipeline_cancel_clone.cancelled() => {}
}
});
let membership = Arc::new(Membership::new(MembershipConfig {
node_id: node_id.clone(),
address: addr.to_string(),
seeds,
..Default::default()
}));
let cancel = CancellationToken::new();
let cancel_clone = cancel.clone();
let state_clone = state.clone();
let membership_clone = membership.clone();
let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
let server_handle = tokio::spawn(async move {
tonic::transport::Server::builder()
.add_service(StatusServiceServer::new(health::HealthServer {
state: state_clone.clone(),
}))
.add_service(DataPlaneServiceServer::new(data_plane::DataPlaneServer {
state: state_clone.clone(),
}))
.add_service(ControlPlaneServiceServer::new(
control_plane::ControlPlaneServer {
state: state_clone.clone(),
},
))
.add_service(ClusterServiceServer::new(cluster::ClusterServer {
state: state_clone,
membership: membership_clone,
}))
.serve_with_incoming_shutdown(incoming, async move {
cancel_clone.cancelled().await;
})
.await
.unwrap();
});
nodes.push(TestNode {
addr,
node_id,
state,
membership,
cancel,
pipeline_cancel,
_temp_dir: temp_dir,
_server_handle: server_handle,
});
}
// Phase 3: Wait for all servers to be ready.
for node in &nodes {
wait_for_ready(&node.endpoint()).await;
}
TestCluster { nodes }
}
fn node(&self, index: usize) -> &TestNode {
&self.nodes[index]
}
}
impl Drop for TestCluster {
fn drop(&mut self) {
for node in &self.nodes {
node.pipeline_cancel.cancel();
node.cancel.cancel();
}
}
}
/// Poll the Status RPC until the server responds, with a timeout.
async fn wait_for_ready(endpoint: &str) {
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(5);
loop {
if tokio::time::Instant::now() > deadline {
panic!("Server at {} did not become ready in time", endpoint);
}
match StatusServiceClient::connect(endpoint.to_string()).await {
Ok(mut client) => {
if client
.status(tonic::Request::new(GetStatusRequest {}))
.await
.is_ok()
{
return;
}
}
Err(_) => {}
}
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
}
}
/// Collect messages from a subscribe stream with a timeout.
async fn collect_messages(
endpoint: &str,
topic: &str,
start_offset: u64,
expected_count: usize,
) -> Vec<sq_grpc_interface::ConsumedMessage> {
let mut client = DataPlaneServiceClient::connect(endpoint.to_string())
.await
.unwrap();
let response = client
.subscribe(tonic::Request::new(SubscribeRequest {
topic: topic.to_string(),
partition: 0,
consumer_group: String::new(),
start_offset: Some(start_offset),
max_batch_size: 200,
}))
.await
.unwrap();
let mut stream = response.into_inner();
let mut messages = Vec::new();
while messages.len() < expected_count {
match tokio::time::timeout(Duration::from_secs(5), stream.next()).await {
Ok(Some(Ok(batch))) => messages.extend(batch.messages),
_ => break,
}
}
messages
}
// ---------------------------------------------------------------------------
// Test 1: Single node, 1000 messages via SDK
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_single_node_publish_consume_1000() {
let cluster = TestCluster::start(1).await;
let endpoint = cluster.node(0).endpoint();
// Publish 1000 messages via SDK Producer.
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
address: endpoint.clone(),
..Default::default()
})
.await
.unwrap();
for i in 0..1000u64 {
let result = producer
.send("orders", None, format!("msg-{i}").as_bytes())
.await
.unwrap();
assert_eq!(result.offset, i);
assert_eq!(result.topic, "orders");
}
// Consume all 1000 via raw subscribe.
let messages = collect_messages(&endpoint, "orders", 0, 1000).await;
assert_eq!(messages.len(), 1000);
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
assert_eq!(msg.value, format!("msg-{i}").as_bytes());
}
}
// ---------------------------------------------------------------------------
// Test 2: Multi-topic isolation
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_multi_topic_isolation() {
let cluster = TestCluster::start(1).await;
let endpoint = cluster.node(0).endpoint();
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
address: endpoint.clone(),
..Default::default()
})
.await
.unwrap();
let topics = ["alpha", "beta", "gamma"];
let counts: [usize; 3] = [50, 100, 25];
// Publish to each topic.
for (topic, count) in topics.iter().zip(counts.iter()) {
for i in 0..*count {
producer
.send(topic, None, format!("{topic}-{i}").as_bytes())
.await
.unwrap();
}
}
// Consume from each topic and verify isolation.
for (topic, expected_count) in topics.iter().zip(counts.iter()) {
let messages = collect_messages(&endpoint, topic, 0, *expected_count).await;
assert_eq!(
messages.len(),
*expected_count,
"topic {topic} expected {expected_count} messages, got {}",
messages.len()
);
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
assert_eq!(msg.value, format!("{topic}-{i}").as_bytes());
}
}
}
// ---------------------------------------------------------------------------
// Test 3: Consumer group offset resume
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_consumer_group_offset_resume() {
let cluster = TestCluster::start(1).await;
let endpoint = cluster.node(0).endpoint();
// Publish 20 messages.
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
address: endpoint.clone(),
..Default::default()
})
.await
.unwrap();
for i in 0..20u64 {
producer
.send("events", None, format!("msg-{i}").as_bytes())
.await
.unwrap();
}
// Consumer 1: consume with auto_commit, collect at least 10 messages.
{
let mut consumer = GrpcConsumer::connect(GrpcConsumerConfig {
address: endpoint.clone(),
consumer_group: "test-group".to_string(),
topic: "events".to_string(),
auto_commit: true,
..Default::default()
})
.await
.unwrap();
let mut received = Vec::new();
let deadline = tokio::time::Instant::now() + Duration::from_secs(5);
while received.len() < 10 && tokio::time::Instant::now() < deadline {
let msgs = consumer.poll().await.unwrap();
if msgs.is_empty() {
tokio::time::sleep(Duration::from_millis(50)).await;
continue;
}
received.extend(msgs);
}
assert!(
received.len() >= 10,
"expected at least 10 messages, got {}",
received.len()
);
}
// Consumer 2: reconnect with same group, should resume from committed offset.
{
let mut consumer = GrpcConsumer::connect(GrpcConsumerConfig {
address: endpoint.clone(),
consumer_group: "test-group".to_string(),
topic: "events".to_string(),
auto_commit: false,
..Default::default()
})
.await
.unwrap();
let deadline = tokio::time::Instant::now() + Duration::from_secs(5);
let mut msgs = Vec::new();
while msgs.is_empty() && tokio::time::Instant::now() < deadline {
msgs = consumer.poll().await.unwrap();
if msgs.is_empty() {
tokio::time::sleep(Duration::from_millis(50)).await;
}
}
assert!(
!msgs.is_empty(),
"expected messages from resumed consumer"
);
// Should start from at least offset 9 (last committed by auto_commit).
assert!(
msgs[0].offset >= 9,
"expected resume from offset >= 9, got {}",
msgs[0].offset
);
}
}
// ---------------------------------------------------------------------------
// Test 4: Topic management CRUD
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_topic_management_crud() {
let cluster = TestCluster::start(1).await;
let endpoint = cluster.node(0).endpoint();
let mut client = ControlPlaneServiceClient::connect(endpoint.clone())
.await
.unwrap();
// Create topic.
let resp = client
.create_topic(tonic::Request::new(CreateTopicRequest {
name: "orders".to_string(),
partitions: 4,
replication_factor: 3,
}))
.await
.unwrap();
assert_eq!(resp.into_inner().name, "orders");
// Duplicate should fail.
let err = client
.create_topic(tonic::Request::new(CreateTopicRequest {
name: "orders".to_string(),
partitions: 4,
replication_factor: 3,
}))
.await
.unwrap_err();
assert_eq!(err.code(), tonic::Code::AlreadyExists);
// Create another.
client
.create_topic(tonic::Request::new(CreateTopicRequest {
name: "events".to_string(),
partitions: 1,
replication_factor: 1,
}))
.await
.unwrap();
// List topics.
let resp = client
.list_topics(tonic::Request::new(ListTopicsRequest {}))
.await
.unwrap();
let topics = resp.into_inner().topics;
assert_eq!(topics.len(), 2);
let names: Vec<&str> = topics.iter().map(|t| t.name.as_str()).collect();
assert!(names.contains(&"orders"));
assert!(names.contains(&"events"));
// Describe topic.
let resp = client
.describe_topic(tonic::Request::new(DescribeTopicRequest {
name: "orders".to_string(),
}))
.await
.unwrap()
.into_inner();
let topic = resp.topic.unwrap();
assert_eq!(topic.name, "orders");
assert_eq!(topic.partitions, 4);
assert_eq!(topic.replication_factor, 3);
assert_eq!(resp.partition_info.len(), 4);
// Describe non-existent topic.
let err = client
.describe_topic(tonic::Request::new(DescribeTopicRequest {
name: "nonexistent".to_string(),
}))
.await
.unwrap_err();
assert_eq!(err.code(), tonic::Code::NotFound);
// Delete topic.
client
.delete_topic(tonic::Request::new(DeleteTopicRequest {
name: "orders".to_string(),
}))
.await
.unwrap();
// Verify deleted.
let resp = client
.list_topics(tonic::Request::new(ListTopicsRequest {}))
.await
.unwrap();
assert_eq!(resp.into_inner().topics.len(), 1);
// Delete non-existent should fail.
let err = client
.delete_topic(tonic::Request::new(DeleteTopicRequest {
name: "orders".to_string(),
}))
.await
.unwrap_err();
assert_eq!(err.code(), tonic::Code::NotFound);
}
// ---------------------------------------------------------------------------
// Test 5: Three-node join discovery
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_three_node_join_discovery() {
let cluster = TestCluster::start(3).await;
// Node-2 joins node-1.
let mut client = ClusterServiceClient::connect(cluster.node(0).endpoint())
.await
.unwrap();
let resp = client
.join(tonic::Request::new(JoinRequest {
node_id: "node-2".to_string(),
address: cluster.nodes[1].addr.to_string(),
}))
.await
.unwrap();
let members = resp.into_inner().members;
assert!(
members.len() >= 2,
"after node-2 join, node-1 should know >= 2 members, got {}",
members.len()
);
let ids: Vec<&str> = members.iter().map(|m| m.node_id.as_str()).collect();
assert!(ids.contains(&"node-1"));
assert!(ids.contains(&"node-2"));
// Node-3 joins node-1.
let resp = client
.join(tonic::Request::new(JoinRequest {
node_id: "node-3".to_string(),
address: cluster.nodes[2].addr.to_string(),
}))
.await
.unwrap();
let members = resp.into_inner().members;
assert!(
members.len() >= 3,
"after node-3 join, node-1 should know >= 3 members, got {}",
members.len()
);
let ids: Vec<&str> = members.iter().map(|m| m.node_id.as_str()).collect();
assert!(ids.contains(&"node-1"));
assert!(ids.contains(&"node-2"));
assert!(ids.contains(&"node-3"));
// Verify via membership handle.
let all = cluster.node(0).membership.all_members().await;
assert_eq!(all.len(), 3);
}
// ---------------------------------------------------------------------------
// Test 6: Cross-node heartbeat gossip
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_cross_node_heartbeat_gossip() {
let cluster = TestCluster::start(3).await;
// Node-2 and node-3 join node-1.
let mut client1 = ClusterServiceClient::connect(cluster.node(0).endpoint())
.await
.unwrap();
client1
.join(tonic::Request::new(JoinRequest {
node_id: "node-2".to_string(),
address: cluster.nodes[1].addr.to_string(),
}))
.await
.unwrap();
client1
.join(tonic::Request::new(JoinRequest {
node_id: "node-3".to_string(),
address: cluster.nodes[2].addr.to_string(),
}))
.await
.unwrap();
// Node-1 now knows about all 3. Send heartbeat to node-2 carrying this info.
let all_members = cluster.node(0).membership.all_members().await;
let known: Vec<ClusterNodeInfo> = all_members
.iter()
.map(|m| ClusterNodeInfo {
node_id: m.node_id.clone(),
address: m.address.clone(),
status: m.status.to_string(),
})
.collect();
let mut client2 = ClusterServiceClient::connect(cluster.node(1).endpoint())
.await
.unwrap();
let resp = client2
.heartbeat(tonic::Request::new(HeartbeatRequest {
node_id: "node-1".to_string(),
known_members: known,
}))
.await
.unwrap();
// Node-2 should now know about all 3 nodes via gossip.
let node2_members = resp.into_inner().members;
assert!(
node2_members.len() >= 3,
"node-2 should know >= 3 members after gossip, got {}",
node2_members.len()
);
}
// ---------------------------------------------------------------------------
// Test 7: Cross-node replication via RPC
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_cross_node_replication_via_rpc() {
let cluster = TestCluster::start(2).await;
// Publish 10 messages to node-1 via SDK.
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
address: cluster.node(0).endpoint(),
..Default::default()
})
.await
.unwrap();
let mut entry_data = Vec::new();
for i in 0..10u64 {
let value = format!("replicated-{i}");
producer
.send("repl-topic", None, value.as_bytes())
.await
.unwrap();
entry_data.push(value.into_bytes());
}
// Replicate the same data to node-2 via ClusterService RPC.
let mut cluster_client = ClusterServiceClient::connect(cluster.node(1).endpoint())
.await
.unwrap();
let resp = cluster_client
.replicate_entries(tonic::Request::new(ReplicateEntriesRequest {
topic: "repl-topic".to_string(),
partition: 0,
entries: entry_data,
}))
.await
.unwrap();
let last_offset = resp.into_inner().last_replicated_offset;
assert_eq!(last_offset, 9);
// Read from node-2 to verify the data is there.
let messages = collect_messages(&cluster.node(1).endpoint(), "repl-topic", 0, 10).await;
assert_eq!(messages.len(), 10);
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
assert_eq!(msg.value, format!("replicated-{i}").as_bytes());
}
}
// ---------------------------------------------------------------------------
// Test 8: FetchSegment recovery
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_fetch_segment_recovery() {
let cluster = TestCluster::start(1).await;
let endpoint = cluster.node(0).endpoint();
// Write 50 messages.
let mut producer = GrpcProducer::connect(GrpcProducerConfig {
address: endpoint.clone(),
..Default::default()
})
.await
.unwrap();
for i in 0..50u64 {
producer
.send("recovery-topic", None, format!("data-{i}").as_bytes())
.await
.unwrap();
}
// Fetch via FetchSegment stream.
let mut client = ClusterServiceClient::connect(endpoint)
.await
.unwrap();
let response = client
.fetch_segment(tonic::Request::new(FetchSegmentRequest {
topic: "recovery-topic".to_string(),
partition: 0,
from_offset: 0,
}))
.await
.unwrap();
let mut stream = response.into_inner();
let mut all_chunks = Vec::new();
while let Ok(Some(Ok(resp))) =
tokio::time::timeout(Duration::from_secs(5), stream.next()).await
{
all_chunks.extend(resp.chunk);
}
// Decode the wire format: offset(8 LE) + value_len(4 LE) + value
let mut cursor = 0;
let mut decoded = Vec::new();
while cursor + 12 <= all_chunks.len() {
let offset = u64::from_le_bytes(all_chunks[cursor..cursor + 8].try_into().unwrap());
let value_len =
u32::from_le_bytes(all_chunks[cursor + 8..cursor + 12].try_into().unwrap()) as usize;
cursor += 12;
assert!(cursor + value_len <= all_chunks.len());
let value = all_chunks[cursor..cursor + value_len].to_vec();
cursor += value_len;
decoded.push((offset, value));
}
assert_eq!(decoded.len(), 50);
for (i, (offset, value)) in decoded.iter().enumerate() {
assert_eq!(*offset, i as u64);
assert_eq!(value, format!("data-{i}").as_bytes());
}
}
// ---------------------------------------------------------------------------
// Test 9: Node status returns correct id
// ---------------------------------------------------------------------------
#[tokio::test]
async fn test_node_status_returns_correct_id() {
let cluster = TestCluster::start(3).await;
for (i, node) in cluster.nodes.iter().enumerate() {
let mut client = StatusServiceClient::connect(node.endpoint()).await.unwrap();
let resp = client
.status(tonic::Request::new(GetStatusRequest {}))
.await
.unwrap();
let expected = format!("node-{}", i + 1);
assert_eq!(
resp.into_inner().node_id,
expected,
"node at index {} should have id '{}'",
i,
expected
);
}
}