feat: add capnp

Signed-off-by: kjuulh <contact@kjuulh.io>
This commit is contained in:
2026-02-27 12:15:35 +01:00
parent 3162971c89
commit 749ae245c7
115 changed files with 16596 additions and 31 deletions

View File

@@ -7,6 +7,9 @@ edition.workspace = true
anyhow = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
thiserror = { workspace = true }
[dev-dependencies]
tokio = { workspace = true, features = ["full", "test-util"] }
sq-storage = { workspace = true }
sq-models = { workspace = true }

131
crates/sq-sim/src/clock.rs Normal file
View File

@@ -0,0 +1,131 @@
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
/// Trait abstracting time for deterministic simulation.
pub trait Clock: Send + Sync {
fn now(&self) -> Instant;
fn elapsed_since(&self, earlier: Instant) -> Duration {
self.now().duration_since(earlier)
}
}
/// Real clock delegating to `std::time::Instant`.
#[derive(Clone)]
pub struct RealClock;
impl Clock for RealClock {
fn now(&self) -> Instant {
Instant::now()
}
}
/// Deterministic clock for simulation testing.
/// Time only advances when explicitly ticked.
#[derive(Clone)]
pub struct SimClock {
inner: Arc<SimClockInner>,
}
struct SimClockInner {
/// We store a "base" real instant and an offset in nanos.
/// `now()` returns `base + offset`.
base: Instant,
offset_nanos: Mutex<u128>,
}
impl SimClock {
pub fn new() -> Self {
Self {
inner: Arc::new(SimClockInner {
base: Instant::now(),
offset_nanos: Mutex::new(0),
}),
}
}
/// Advance time by the given duration.
pub fn advance(&self, duration: Duration) {
let mut offset = self.inner.offset_nanos.lock().unwrap();
*offset += duration.as_nanos();
}
/// Get the current elapsed duration from the start.
pub fn elapsed(&self) -> Duration {
let offset = self.inner.offset_nanos.lock().unwrap();
Duration::from_nanos(*offset as u64)
}
}
impl Default for SimClock {
fn default() -> Self {
Self::new()
}
}
impl Clock for SimClock {
fn now(&self) -> Instant {
let offset = self.inner.offset_nanos.lock().unwrap();
self.inner.base + Duration::from_nanos(*offset as u64)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_real_clock_advances() {
let clock = RealClock;
let t1 = clock.now();
// Busy-wait a tiny bit
std::thread::sleep(Duration::from_millis(1));
let t2 = clock.now();
assert!(t2 > t1);
}
#[test]
fn test_sim_clock_starts_at_base() {
let clock = SimClock::new();
assert_eq!(clock.elapsed(), Duration::ZERO);
}
#[test]
fn test_sim_clock_advance() {
let clock = SimClock::new();
let t1 = clock.now();
clock.advance(Duration::from_secs(10));
let t2 = clock.now();
assert_eq!(t2.duration_since(t1), Duration::from_secs(10));
assert_eq!(clock.elapsed(), Duration::from_secs(10));
}
#[test]
fn test_sim_clock_multiple_advances() {
let clock = SimClock::new();
clock.advance(Duration::from_millis(100));
clock.advance(Duration::from_millis(200));
clock.advance(Duration::from_millis(300));
assert_eq!(clock.elapsed(), Duration::from_millis(600));
}
#[test]
fn test_sim_clock_clone_shares_state() {
let clock1 = SimClock::new();
let clock2 = clock1.clone();
clock1.advance(Duration::from_secs(5));
assert_eq!(clock2.elapsed(), Duration::from_secs(5));
}
#[test]
fn test_sim_clock_elapsed_since() {
let clock = SimClock::new();
let t1 = clock.now();
clock.advance(Duration::from_secs(42));
assert_eq!(clock.elapsed_since(t1), Duration::from_secs(42));
}
}

666
crates/sq-sim/src/fs.rs Normal file
View File

@@ -0,0 +1,666 @@
use std::collections::BTreeMap;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
// ---------------------------------------------------------------------------
// Traits
// ---------------------------------------------------------------------------
/// Trait abstracting filesystem operations for deterministic simulation.
pub trait FileSystem: Send + Sync {
fn create_dir_all(&self, path: &Path) -> io::Result<()>;
fn open_write(&self, path: &Path) -> io::Result<Box<dyn FileHandle>>;
fn open_append(&self, path: &Path) -> io::Result<Box<dyn FileHandle>>;
fn open_read(&self, path: &Path) -> io::Result<Box<dyn FileHandle>>;
fn remove_file(&self, path: &Path) -> io::Result<()>;
fn list_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>>;
fn exists(&self, path: &Path) -> bool;
fn file_size(&self, path: &Path) -> io::Result<u64>;
}
/// Trait abstracting a file handle for reads/writes/fsync.
pub trait FileHandle: Send + Sync {
fn write_all(&mut self, buf: &[u8]) -> io::Result<()>;
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()>;
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize>;
fn fsync(&mut self) -> io::Result<()>;
fn position(&self) -> u64;
fn seek(&mut self, pos: u64) -> io::Result<()>;
}
// ---------------------------------------------------------------------------
// RealFileSystem
// ---------------------------------------------------------------------------
/// Real filesystem delegating to std::fs.
pub struct RealFileSystem;
impl FileSystem for RealFileSystem {
fn create_dir_all(&self, path: &Path) -> io::Result<()> {
std::fs::create_dir_all(path)
}
fn open_write(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let file = std::fs::File::create(path)?;
Ok(Box::new(RealFileHandle {
file,
position: 0,
}))
}
fn open_append(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)?;
let position = file.metadata()?.len();
Ok(Box::new(RealFileHandle { file, position }))
}
fn open_read(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let file = std::fs::File::open(path)?;
Ok(Box::new(RealFileHandle {
file,
position: 0,
}))
}
fn remove_file(&self, path: &Path) -> io::Result<()> {
std::fs::remove_file(path)
}
fn list_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
let mut entries = Vec::new();
for entry in std::fs::read_dir(path)? {
entries.push(entry?.path());
}
entries.sort();
Ok(entries)
}
fn exists(&self, path: &Path) -> bool {
path.exists()
}
fn file_size(&self, path: &Path) -> io::Result<u64> {
Ok(std::fs::metadata(path)?.len())
}
}
struct RealFileHandle {
file: std::fs::File,
position: u64,
}
impl FileHandle for RealFileHandle {
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
use std::io::Write;
self.file.write_all(buf)?;
self.position += buf.len() as u64;
Ok(())
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
use std::io::Read;
self.file.read_exact(buf)?;
self.position += buf.len() as u64;
Ok(())
}
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
use std::io::Read;
let n = self.file.read_to_end(buf)?;
self.position += n as u64;
Ok(n)
}
fn fsync(&mut self) -> io::Result<()> {
use std::io::Write;
self.file.flush()?;
self.file.sync_all()
}
fn position(&self) -> u64 {
self.position
}
fn seek(&mut self, pos: u64) -> io::Result<()> {
use std::io::Seek;
self.file.seek(io::SeekFrom::Start(pos))?;
self.position = pos;
Ok(())
}
}
// ---------------------------------------------------------------------------
// InMemoryFileSystem
// ---------------------------------------------------------------------------
/// In-memory filesystem for deterministic testing with fault injection.
#[derive(Clone)]
pub struct InMemoryFileSystem {
inner: Arc<Mutex<InMemoryFsInner>>,
}
struct InMemoryFsInner {
/// File contents keyed by canonical path.
files: BTreeMap<PathBuf, Vec<u8>>,
/// Directories that have been created.
dirs: std::collections::BTreeSet<PathBuf>,
/// Fault injection state.
faults: FaultState,
}
#[derive(Default)]
struct FaultState {
fail_next_fsync: Option<io::Error>,
disk_full: bool,
}
impl InMemoryFileSystem {
pub fn new() -> Self {
Self {
inner: Arc::new(Mutex::new(InMemoryFsInner {
files: BTreeMap::new(),
dirs: std::collections::BTreeSet::new(),
faults: FaultState::default(),
})),
}
}
/// Make the next fsync call fail with the given error.
pub fn fail_next_fsync(&self, error: io::Error) {
let mut inner = self.inner.lock().unwrap();
inner.faults.fail_next_fsync = Some(error);
}
/// Simulate disk full: all writes will fail.
pub fn simulate_disk_full(&self) {
let mut inner = self.inner.lock().unwrap();
inner.faults.disk_full = true;
}
/// Clear all fault injection state.
pub fn clear_faults(&self) {
let mut inner = self.inner.lock().unwrap();
inner.faults = FaultState::default();
}
/// Corrupt bytes at a given offset in a file.
pub fn corrupt_bytes(&self, path: &Path, offset: u64, len: usize) {
let mut inner = self.inner.lock().unwrap();
if let Some(data) = inner.files.get_mut(path) {
let start = offset as usize;
let end = (start + len).min(data.len());
for b in &mut data[start..end] {
*b ^= 0xFF;
}
}
}
/// Get a snapshot of file contents (for test assertions).
pub fn read_file_bytes(&self, path: &Path) -> Option<Vec<u8>> {
let inner = self.inner.lock().unwrap();
inner.files.get(path).cloned()
}
}
impl Default for InMemoryFileSystem {
fn default() -> Self {
Self::new()
}
}
impl FileSystem for InMemoryFileSystem {
fn create_dir_all(&self, path: &Path) -> io::Result<()> {
let mut inner = self.inner.lock().unwrap();
// Add this dir and all ancestors.
let mut current = path.to_path_buf();
loop {
inner.dirs.insert(current.clone());
if !current.pop() {
break;
}
}
Ok(())
}
fn open_write(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let inner_ref = self.inner.clone();
// Truncate/create
{
let mut inner = inner_ref.lock().unwrap();
inner.files.insert(path.to_path_buf(), Vec::new());
}
Ok(Box::new(InMemoryFileHandle {
fs: inner_ref,
path: path.to_path_buf(),
position: 0,
}))
}
fn open_append(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let inner_ref = self.inner.clone();
let position = {
let mut inner = inner_ref.lock().unwrap();
let entry = inner
.files
.entry(path.to_path_buf())
.or_insert_with(Vec::new);
entry.len() as u64
};
Ok(Box::new(InMemoryFileHandle {
fs: inner_ref,
path: path.to_path_buf(),
position,
}))
}
fn open_read(&self, path: &Path) -> io::Result<Box<dyn FileHandle>> {
let inner_ref = self.inner.clone();
{
let inner = inner_ref.lock().unwrap();
if !inner.files.contains_key(path) {
return Err(io::Error::new(
io::ErrorKind::NotFound,
format!("file not found: {}", path.display()),
));
}
}
Ok(Box::new(InMemoryFileHandle {
fs: inner_ref,
path: path.to_path_buf(),
position: 0,
}))
}
fn remove_file(&self, path: &Path) -> io::Result<()> {
let mut inner = self.inner.lock().unwrap();
if inner.files.remove(path).is_none() {
return Err(io::Error::new(
io::ErrorKind::NotFound,
format!("file not found: {}", path.display()),
));
}
Ok(())
}
fn list_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
let inner = self.inner.lock().unwrap();
let mut entries = std::collections::BTreeSet::new();
// Find files that are direct children of this directory.
for file_path in inner.files.keys() {
if let Some(parent) = file_path.parent() {
if parent == path {
entries.insert(file_path.clone());
}
}
}
// Find subdirectories that are direct children of this directory.
for dir_path in &inner.dirs {
if let Some(parent) = dir_path.parent() {
if parent == path && dir_path != path {
entries.insert(dir_path.clone());
}
}
}
Ok(entries.into_iter().collect())
}
fn exists(&self, path: &Path) -> bool {
let inner = self.inner.lock().unwrap();
inner.files.contains_key(path) || inner.dirs.contains(path)
}
fn file_size(&self, path: &Path) -> io::Result<u64> {
let inner = self.inner.lock().unwrap();
inner
.files
.get(path)
.map(|data| data.len() as u64)
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::NotFound,
format!("file not found: {}", path.display()),
)
})
}
}
struct InMemoryFileHandle {
fs: Arc<Mutex<InMemoryFsInner>>,
path: PathBuf,
position: u64,
}
impl FileHandle for InMemoryFileHandle {
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
let mut inner = self.fs.lock().unwrap();
if inner.faults.disk_full {
return Err(io::Error::new(
io::ErrorKind::Other,
"disk full (simulated)",
));
}
let data = inner
.files
.get_mut(&self.path)
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "file not found"))?;
let pos = self.position as usize;
if pos + buf.len() > data.len() {
data.resize(pos + buf.len(), 0);
}
data[pos..pos + buf.len()].copy_from_slice(buf);
self.position += buf.len() as u64;
Ok(())
}
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
let inner = self.fs.lock().unwrap();
let data = inner
.files
.get(&self.path)
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "file not found"))?;
let pos = self.position as usize;
if pos + buf.len() > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"unexpected eof",
));
}
buf.copy_from_slice(&data[pos..pos + buf.len()]);
self.position += buf.len() as u64;
Ok(())
}
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let inner = self.fs.lock().unwrap();
let data = inner
.files
.get(&self.path)
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "file not found"))?;
let pos = self.position as usize;
let remaining = &data[pos..];
buf.extend_from_slice(remaining);
self.position += remaining.len() as u64;
Ok(remaining.len())
}
fn fsync(&mut self) -> io::Result<()> {
let mut inner = self.fs.lock().unwrap();
if let Some(err) = inner.faults.fail_next_fsync.take() {
return Err(err);
}
Ok(())
}
fn position(&self) -> u64 {
self.position
}
fn seek(&mut self, pos: u64) -> io::Result<()> {
let inner = self.fs.lock().unwrap();
let data = inner
.files
.get(&self.path)
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "file not found"))?;
if pos > data.len() as u64 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"seek past end of file",
));
}
drop(inner);
self.position = pos;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inmemory_write_read() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/test.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"hello world").unwrap();
fh.fsync().unwrap();
}
{
let mut fh = fs.open_read(path).unwrap();
let mut buf = Vec::new();
fh.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"hello world");
}
}
#[test]
fn test_inmemory_read_exact() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/exact.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"0123456789").unwrap();
}
{
let mut fh = fs.open_read(path).unwrap();
let mut buf = [0u8; 5];
fh.read_exact(&mut buf).unwrap();
assert_eq!(&buf, b"01234");
assert_eq!(fh.position(), 5);
fh.read_exact(&mut buf).unwrap();
assert_eq!(&buf, b"56789");
assert_eq!(fh.position(), 10);
}
}
#[test]
fn test_inmemory_read_exact_eof() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/short.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"hi").unwrap();
}
{
let mut fh = fs.open_read(path).unwrap();
let mut buf = [0u8; 10];
let err = fh.read_exact(&mut buf).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
}
}
#[test]
fn test_inmemory_append() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/append.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"first").unwrap();
}
{
let mut fh = fs.open_append(path).unwrap();
assert_eq!(fh.position(), 5);
fh.write_all(b"second").unwrap();
}
assert_eq!(fs.read_file_bytes(path).unwrap(), b"firstsecond");
}
#[test]
fn test_inmemory_seek() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/seek.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"abcdefghij").unwrap();
}
{
let mut fh = fs.open_read(path).unwrap();
fh.seek(5).unwrap();
assert_eq!(fh.position(), 5);
let mut buf = [0u8; 5];
fh.read_exact(&mut buf).unwrap();
assert_eq!(&buf, b"fghij");
}
}
#[test]
fn test_inmemory_create_dir_and_list() {
let fs = InMemoryFileSystem::new();
fs.create_dir_all(Path::new("/data/topic/0")).unwrap();
assert!(fs.exists(Path::new("/data/topic/0")));
assert!(fs.exists(Path::new("/data/topic")));
assert!(fs.exists(Path::new("/data")));
// Create files in the directory
{
let mut fh = fs.open_write(Path::new("/data/topic/0/seg1.wal")).unwrap();
fh.write_all(b"data1").unwrap();
}
{
let mut fh = fs.open_write(Path::new("/data/topic/0/seg2.wal")).unwrap();
fh.write_all(b"data2").unwrap();
}
let entries = fs.list_dir(Path::new("/data/topic/0")).unwrap();
assert_eq!(entries.len(), 2);
assert!(entries.contains(&PathBuf::from("/data/topic/0/seg1.wal")));
assert!(entries.contains(&PathBuf::from("/data/topic/0/seg2.wal")));
}
#[test]
fn test_inmemory_remove_file() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/remove.dat");
fs.open_write(path).unwrap();
assert!(fs.exists(path));
fs.remove_file(path).unwrap();
assert!(!fs.exists(path));
}
#[test]
fn test_inmemory_remove_nonexistent() {
let fs = InMemoryFileSystem::new();
let err = fs.remove_file(Path::new("/no/such/file")).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::NotFound);
}
#[test]
fn test_inmemory_open_read_nonexistent() {
let fs = InMemoryFileSystem::new();
match fs.open_read(Path::new("/no/such/file")) {
Err(e) => assert_eq!(e.kind(), io::ErrorKind::NotFound),
Ok(_) => panic!("expected NotFound error"),
}
}
#[test]
fn test_inmemory_file_size() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/size.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"twelve chars").unwrap();
}
assert_eq!(fs.file_size(path).unwrap(), 12);
}
// --- Fault injection tests ---
#[test]
fn test_fault_fsync_failure() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/fsync.dat");
fs.fail_next_fsync(io::Error::new(io::ErrorKind::Other, "disk error"));
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"data").unwrap();
let err = fh.fsync().unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::Other);
// Second fsync should succeed (fault was consumed)
fh.fsync().unwrap();
}
#[test]
fn test_fault_disk_full() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/full.dat");
let mut fh = fs.open_write(path).unwrap();
fh.write_all(b"before").unwrap();
fs.simulate_disk_full();
let err = fh.write_all(b"after").unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::Other);
// Clear fault, writes should work again
fs.clear_faults();
fh.write_all(b"recovered").unwrap();
}
#[test]
fn test_fault_corrupt_bytes() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/corrupt.dat");
{
let mut fh = fs.open_write(path).unwrap();
fh.write_all(&[0x00, 0x00, 0x00, 0x00]).unwrap();
}
fs.corrupt_bytes(path, 1, 2);
let data = fs.read_file_bytes(path).unwrap();
assert_eq!(data, vec![0x00, 0xFF, 0xFF, 0x00]);
}
#[test]
fn test_inmemory_write_position_tracking() {
let fs = InMemoryFileSystem::new();
let path = Path::new("/tmp/pos.dat");
let mut fh = fs.open_write(path).unwrap();
assert_eq!(fh.position(), 0);
fh.write_all(b"hello").unwrap();
assert_eq!(fh.position(), 5);
fh.write_all(b" world").unwrap();
assert_eq!(fh.position(), 11);
}
}

View File

@@ -0,0 +1,6 @@
pub mod clock;
pub mod fs;
pub mod network;
pub use clock::*;
pub use fs::*;

View File

@@ -0,0 +1,316 @@
use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::{Arc, Mutex};
/// Identifier for a node in the virtual network.
pub type NodeId = String;
/// A pending message in the virtual network.
#[derive(Debug, Clone)]
struct PendingMessage {
from: NodeId,
to: NodeId,
data: Vec<u8>,
}
/// Virtual network for simulation testing.
/// Supports partition, latency injection, and random packet drop.
pub struct VirtualNetwork {
/// Delivered message queues: node_id -> received messages.
inbox: Arc<Mutex<HashMap<NodeId, VecDeque<(NodeId, Vec<u8>)>>>>,
/// Pending messages not yet delivered (used for latency simulation).
pending: Arc<Mutex<VecDeque<PendingMessage>>>,
/// Partitioned links: (from, to) pairs that are blocked.
partitions: Arc<Mutex<HashSet<(NodeId, NodeId)>>>,
/// Drop probability (0.0 to 1.0).
drop_probability: Arc<Mutex<f64>>,
}
impl VirtualNetwork {
pub fn new() -> Self {
Self {
inbox: Arc::new(Mutex::new(HashMap::new())),
pending: Arc::new(Mutex::new(VecDeque::new())),
partitions: Arc::new(Mutex::new(HashSet::new())),
drop_probability: Arc::new(Mutex::new(0.0)),
}
}
/// Partition the network between two nodes (bidirectional).
pub fn partition(&self, a: &str, b: &str) {
let mut parts = self.partitions.lock().unwrap();
parts.insert((a.to_string(), b.to_string()));
parts.insert((b.to_string(), a.to_string()));
}
/// Heal the partition between two nodes (bidirectional).
pub fn heal(&self, a: &str, b: &str) {
let mut parts = self.partitions.lock().unwrap();
parts.remove(&(a.to_string(), b.to_string()));
parts.remove(&(b.to_string(), a.to_string()));
}
/// Heal all partitions.
pub fn heal_all(&self) {
self.partitions.lock().unwrap().clear();
}
/// Set the probability that a message will be dropped (0.0 = no drops, 1.0 = all dropped).
pub fn set_drop_probability(&self, prob: f64) {
*self.drop_probability.lock().unwrap() = prob.clamp(0.0, 1.0);
}
/// Send a message from one node to another.
/// If the link is partitioned, the message is silently dropped.
pub fn send(&self, from: &str, to: &str, data: Vec<u8>) -> Result<(), NetworkError> {
// Check for partition.
{
let parts = self.partitions.lock().unwrap();
if parts.contains(&(from.to_string(), to.to_string())) {
return Ok(()); // Silently dropped.
}
}
// Check for random drop.
{
let prob = *self.drop_probability.lock().unwrap();
if prob > 0.0 {
let random: f64 = simple_random();
if random < prob {
return Ok(()); // Randomly dropped.
}
}
}
// Queue the message for delivery.
let mut pending = self.pending.lock().unwrap();
pending.push_back(PendingMessage {
from: from.to_string(),
to: to.to_string(),
data,
});
Ok(())
}
/// Deliver all pending messages to their inboxes.
/// Call this to simulate message delivery (allows controlling when messages arrive).
pub fn deliver_pending(&self) {
let messages: Vec<PendingMessage> = {
let mut pending = self.pending.lock().unwrap();
pending.drain(..).collect()
};
let mut inbox = self.inbox.lock().unwrap();
for msg in messages {
inbox
.entry(msg.to.clone())
.or_default()
.push_back((msg.from, msg.data));
}
}
/// Receive a message for a given node. Returns None if no messages are available.
pub fn recv(&self, node: &str) -> Option<(NodeId, Vec<u8>)> {
let mut inbox = self.inbox.lock().unwrap();
inbox.get_mut(node).and_then(|q| q.pop_front())
}
/// Get the number of pending (undelivered) messages.
pub fn pending_count(&self) -> usize {
self.pending.lock().unwrap().len()
}
/// Get the number of messages in a node's inbox.
pub fn inbox_count(&self, node: &str) -> usize {
self.inbox
.lock()
.unwrap()
.get(node)
.map(|q| q.len())
.unwrap_or(0)
}
}
impl Default for VirtualNetwork {
fn default() -> Self {
Self::new()
}
}
/// Simple deterministic pseudo-random based on thread-local state.
fn simple_random() -> f64 {
use std::cell::Cell;
thread_local! {
static STATE: Cell<u64> = const { Cell::new(12345) };
}
STATE.with(|s| {
let mut state = s.get();
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
s.set(state);
(state % 10000) as f64 / 10000.0
})
}
#[derive(Debug, thiserror::Error)]
pub enum NetworkError {
#[error("node '{0}' not reachable")]
Unreachable(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_send_and_receive() {
let net = VirtualNetwork::new();
net.send("node-1", "node-2", b"hello".to_vec()).unwrap();
net.deliver_pending();
let (from, data) = net.recv("node-2").unwrap();
assert_eq!(from, "node-1");
assert_eq!(data, b"hello");
}
#[test]
fn test_no_messages_returns_none() {
let net = VirtualNetwork::new();
assert!(net.recv("node-1").is_none());
}
#[test]
fn test_partition_drops_messages() {
let net = VirtualNetwork::new();
net.partition("node-1", "node-2");
net.send("node-1", "node-2", b"hello".to_vec()).unwrap();
net.deliver_pending();
assert!(net.recv("node-2").is_none());
}
#[test]
fn test_partition_is_bidirectional() {
let net = VirtualNetwork::new();
net.partition("node-1", "node-2");
net.send("node-1", "node-2", b"a->b".to_vec()).unwrap();
net.send("node-2", "node-1", b"b->a".to_vec()).unwrap();
net.deliver_pending();
assert!(net.recv("node-2").is_none());
assert!(net.recv("node-1").is_none());
}
#[test]
fn test_heal_restores_communication() {
let net = VirtualNetwork::new();
net.partition("node-1", "node-2");
net.send("node-1", "node-2", b"before".to_vec()).unwrap();
net.deliver_pending();
assert!(net.recv("node-2").is_none());
net.heal("node-1", "node-2");
net.send("node-1", "node-2", b"after".to_vec()).unwrap();
net.deliver_pending();
let (_, data) = net.recv("node-2").unwrap();
assert_eq!(data, b"after");
}
#[test]
fn test_heal_all() {
let net = VirtualNetwork::new();
net.partition("a", "b");
net.partition("a", "c");
net.heal_all();
net.send("a", "b", b"msg".to_vec()).unwrap();
net.send("a", "c", b"msg".to_vec()).unwrap();
net.deliver_pending();
assert!(net.recv("b").is_some());
assert!(net.recv("c").is_some());
}
#[test]
fn test_multiple_messages_ordered() {
let net = VirtualNetwork::new();
for i in 0..5 {
net.send("a", "b", format!("msg-{i}").into_bytes())
.unwrap();
}
net.deliver_pending();
for i in 0..5 {
let (_, data) = net.recv("b").unwrap();
assert_eq!(data, format!("msg-{i}").as_bytes());
}
assert!(net.recv("b").is_none());
}
#[test]
fn test_pending_and_inbox_counts() {
let net = VirtualNetwork::new();
net.send("a", "b", b"1".to_vec()).unwrap();
net.send("a", "b", b"2".to_vec()).unwrap();
assert_eq!(net.pending_count(), 2);
assert_eq!(net.inbox_count("b"), 0);
net.deliver_pending();
assert_eq!(net.pending_count(), 0);
assert_eq!(net.inbox_count("b"), 2);
}
#[test]
fn test_partition_does_not_affect_other_links() {
let net = VirtualNetwork::new();
net.partition("a", "b");
// a -> c should still work.
net.send("a", "c", b"hello".to_vec()).unwrap();
net.deliver_pending();
assert!(net.recv("c").is_some());
}
#[test]
fn test_drop_probability_all() {
let net = VirtualNetwork::new();
net.set_drop_probability(1.0);
for _ in 0..10 {
net.send("a", "b", b"msg".to_vec()).unwrap();
}
net.deliver_pending();
// All messages should be dropped.
assert_eq!(net.inbox_count("b"), 0);
}
#[test]
fn test_drop_probability_none() {
let net = VirtualNetwork::new();
net.set_drop_probability(0.0);
for _ in 0..10 {
net.send("a", "b", b"msg".to_vec()).unwrap();
}
net.deliver_pending();
// No messages should be dropped.
assert_eq!(net.inbox_count("b"), 10);
}
}

View File

@@ -0,0 +1 @@
pub mod single_node;

View File

@@ -0,0 +1,268 @@
use std::path::PathBuf;
use std::sync::Arc;
use sq_models::WalConfig;
use sq_sim::fs::InMemoryFileSystem;
use sq_sim::SimClock;
use sq_storage::engine::StorageEngine;
fn test_engine() -> (
StorageEngine<InMemoryFileSystem, SimClock>,
Arc<InMemoryFileSystem>,
Arc<SimClock>,
) {
let fs = Arc::new(InMemoryFileSystem::new());
let clock = Arc::new(SimClock::new());
let config = WalConfig {
max_segment_bytes: 1024 * 1024,
max_segment_age_secs: 3600,
data_dir: PathBuf::from("/data"),
..Default::default()
};
let engine = StorageEngine::new(fs.clone(), clock.clone(), config).unwrap();
(engine, fs, clock)
}
/// S01: Single node, single producer, single consumer - baseline correctness
#[test]
fn s01_single_producer_consumer() {
let (engine, _fs, _clock) = test_engine();
// Produce 1000 messages.
for i in 0..1000u64 {
let offset = engine
.append("orders", 0, Some(format!("key-{i}").as_bytes()), format!("value-{i}").as_bytes(), &[], i)
.unwrap();
assert_eq!(offset, i, "offset must match sequence");
}
// Consume all messages.
let messages = engine.read("orders", 0, 0, 2000).unwrap();
// Invariant 1: No message loss.
assert_eq!(messages.len(), 1000);
// Invariant 2: Offsets strictly monotonic, no gaps.
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64, "offset gap detected at index {i}");
}
// Invariant: Content integrity.
for msg in &messages {
let expected_key = format!("key-{}", msg.offset);
let expected_value = format!("value-{}", msg.offset);
assert_eq!(msg.key.as_ref().unwrap(), expected_key.as_bytes());
assert_eq!(msg.value, expected_value.as_bytes());
}
}
/// S02: Single node, concurrent producers to different topics - offset ordering
#[test]
fn s02_multi_topic_producers() {
let (engine, _fs, _clock) = test_engine();
let topics = ["events", "orders", "logs"];
// Write 100 messages to each topic.
for topic in &topics {
for i in 0..100u64 {
let offset = engine.append(topic, 0, None, b"data", &[], i).unwrap();
assert_eq!(offset, i);
}
}
// Verify each topic has its own offset space.
for topic in &topics {
let messages = engine.read(topic, 0, 0, 200).unwrap();
assert_eq!(messages.len(), 100, "topic {topic} should have 100 messages");
// Offsets are monotonic per topic.
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
}
}
// Cross-topic isolation: reading one topic doesn't return messages from another.
let events = engine.read("events", 0, 0, 200).unwrap();
for msg in &events {
assert_eq!(msg.topic.as_str(), "events");
}
}
/// S03: Single node, disk full during write - graceful error handling
#[test]
fn s03_disk_full() {
let fs = Arc::new(InMemoryFileSystem::new());
let clock = Arc::new(SimClock::new());
let config = WalConfig {
max_segment_bytes: 1024 * 1024,
max_segment_age_secs: 3600,
data_dir: PathBuf::from("/data"),
..Default::default()
};
let engine = StorageEngine::new(fs.clone(), clock, config).unwrap();
// Write some messages successfully.
for i in 0..10 {
engine.append("t", 0, None, b"data", &[], i).unwrap();
}
// Simulate disk full.
fs.simulate_disk_full();
// Next write should fail.
let result = engine.append("t", 0, None, b"data", &[], 0);
assert!(result.is_err(), "write should fail when disk is full");
// Clear fault - subsequent writes should work.
fs.clear_faults();
let _offset = engine.append("t", 0, None, b"after-recovery", &[], 0).unwrap();
// Verify earlier messages are still readable.
let messages = engine.read("t", 0, 0, 100).unwrap();
assert!(messages.len() >= 10, "original messages should survive disk full");
}
/// S04: Single node, crash and restart - WAL recovery
#[test]
fn s04_crash_recovery() {
let fs = Arc::new(InMemoryFileSystem::new());
let clock = Arc::new(SimClock::new());
let config = WalConfig {
max_segment_bytes: 1024 * 1024,
max_segment_age_secs: 3600,
data_dir: PathBuf::from("/data"),
..Default::default()
};
// Phase 1: Write messages and "crash" (drop engine).
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
for i in 0..500u64 {
engine
.append("orders", 0, None, format!("msg-{i}").as_bytes(), &[], i)
.unwrap();
}
// Engine dropped here - simulates crash.
}
// Phase 2: "Restart" - create new engine and recover.
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
engine.recover().unwrap();
// Invariant 1: All acked messages survive recovery.
let messages = engine.read("orders", 0, 0, 1000).unwrap();
assert_eq!(messages.len(), 500, "all messages must survive crash");
// Invariant 2: Offsets are intact.
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
assert_eq!(msg.value, format!("msg-{i}").as_bytes());
}
// Can continue writing after recovery.
let offset = engine.append("orders", 0, None, b"post-crash", &[], 0).unwrap();
assert_eq!(offset, 500);
}
}
/// S09: Consumer group offset preservation across restarts
#[test]
fn s09_consumer_group_offset_persistence() {
let fs = Arc::new(InMemoryFileSystem::new());
let clock = Arc::new(SimClock::new());
let config = WalConfig {
max_segment_bytes: 1024 * 1024,
max_segment_age_secs: 3600,
data_dir: PathBuf::from("/data"),
..Default::default()
};
// Write messages and commit an offset.
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
for i in 0..100 {
engine.append("t", 0, None, b"data", &[], i).unwrap();
}
engine.commit_offset("group-1", "t", 0, 50).unwrap();
}
// Restart and verify committed offset survives.
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
engine.recover().unwrap();
// Invariant 4: Consumer group offsets never regress.
let committed = engine.get_committed_offset("group-1", "t", 0);
assert_eq!(committed, Some(50));
// Can resume consuming from committed offset.
let messages = engine.read("t", 0, 51, 100).unwrap();
assert_eq!(messages.len(), 49); // offsets 51-99
}
}
/// S10: High throughput burst - no message loss
#[test]
fn s10_high_throughput() {
let (engine, _fs, _clock) = test_engine();
let msg_count = 10_000u64;
// Burst write.
for i in 0..msg_count {
engine
.append("burst", 0, None, format!("msg-{i}").as_bytes(), &[], i)
.unwrap();
}
// Verify no loss.
let messages = engine.read("burst", 0, 0, (msg_count + 1) as usize).unwrap();
assert_eq!(messages.len(), msg_count as usize);
// Verify ordering.
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
}
}
/// S06: Segment rotation and recovery - multiple segments survive crash
#[test]
fn s06_segment_rotation_recovery() {
let fs = Arc::new(InMemoryFileSystem::new());
let clock = Arc::new(SimClock::new());
let config = WalConfig {
max_segment_bytes: 512, // Very small segments to force rotation.
max_segment_age_secs: 3600,
data_dir: PathBuf::from("/data"),
..Default::default()
};
// Write enough messages to cause multiple segment rotations.
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
for i in 0..200u64 {
engine
.append("t", 0, None, format!("msg-{i}").as_bytes(), &[], i)
.unwrap();
}
}
// Recover.
{
let engine = StorageEngine::new(fs.clone(), clock.clone(), config.clone()).unwrap();
engine.recover().unwrap();
let messages = engine.read("t", 0, 0, 300).unwrap();
assert_eq!(messages.len(), 200, "all messages across segments must survive");
for (i, msg) in messages.iter().enumerate() {
assert_eq!(msg.offset, i as u64);
}
// Continue writing.
let offset = engine.append("t", 0, None, b"new", &[], 0).unwrap();
assert_eq!(offset, 200);
}
}

View File

@@ -0,0 +1 @@
mod scenarios;