feat: add weight
This commit is contained in:
@@ -29,7 +29,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
EnvFilter::from_default_env()
|
||||
.add_directive("nocontrol=trace".parse().unwrap())
|
||||
.add_directive("postgres_backend=trace".parse().unwrap())
|
||||
|
||||
.add_directive("debug".parse().unwrap()),
|
||||
)
|
||||
.with_file(false)
|
||||
|
||||
16
examples/rebalancing-stress/Cargo.toml
Normal file
16
examples/rebalancing-stress/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "rebalancing-stress"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
nocontrol.workspace = true
|
||||
|
||||
anyhow.workspace = true
|
||||
tokio.workspace = true
|
||||
serde.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
tracing.workspace = true
|
||||
uuid.workspace = true
|
||||
tokio-util = { version = "0.7", features = ["rt"] }
|
||||
268
examples/rebalancing-stress/src/main.rs
Normal file
268
examples/rebalancing-stress/src/main.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
//! Stress test for weight-based rebalancing.
|
||||
//!
|
||||
//! Simulates multiple workers sharing an in-process backing store.
|
||||
//! Manifests have varying weights. Workers have capacity limits and
|
||||
//! use FairShare rebalancing to redistribute work as nodes join/leave.
|
||||
//!
|
||||
//! Run with: RUST_LOG=info cargo run -p rebalancing-stress
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use nocontrol::{
|
||||
ControlPlane, Operator, OperatorConfig, OperatorState, RebalancePolicy, Specification,
|
||||
manifests::{Action, Manifest, ManifestMetadata, ManifestState},
|
||||
stores::{BackingStore, BackingStoreEdge},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
)
|
||||
.with_target(false)
|
||||
.without_time()
|
||||
.init();
|
||||
|
||||
let store = BackingStore::in_process();
|
||||
|
||||
// Create 20 manifests with varying weights (total weight = 110)
|
||||
let manifests = vec![
|
||||
("heavy-job-1", 10),
|
||||
("heavy-job-2", 10),
|
||||
("heavy-job-3", 10),
|
||||
("medium-job-1", 5),
|
||||
("medium-job-2", 5),
|
||||
("medium-job-3", 5),
|
||||
("medium-job-4", 5),
|
||||
("medium-job-5", 5),
|
||||
("medium-job-6", 5),
|
||||
("light-job-1", 1),
|
||||
("light-job-2", 1),
|
||||
("light-job-3", 1),
|
||||
("light-job-4", 1),
|
||||
("light-job-5", 1),
|
||||
("light-job-6", 1),
|
||||
("light-job-7", 1),
|
||||
("light-job-8", 1),
|
||||
("light-job-9", 1),
|
||||
("light-job-10", 1),
|
||||
("tiny-job-1", 0),
|
||||
];
|
||||
|
||||
let total_weight: u64 = manifests.iter().map(|(_, w)| *w).sum();
|
||||
tracing::info!(
|
||||
manifest_count = manifests.len(),
|
||||
total_weight,
|
||||
"creating manifests"
|
||||
);
|
||||
|
||||
// Insert all manifests into the shared store using a temporary control plane
|
||||
let seed_operator = OperatorState::new(StressOperator);
|
||||
let seed_cp = ControlPlane::new(seed_operator, store.clone());
|
||||
for (name, weight) in &manifests {
|
||||
seed_cp
|
||||
.add_manifest(Manifest {
|
||||
name: name.to_string(),
|
||||
metadata: ManifestMetadata {},
|
||||
spec: WeightedJob {
|
||||
weight: *weight,
|
||||
name: name.to_string(),
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
|
||||
let cancellation = CancellationToken::new();
|
||||
|
||||
// --- Phase 1: Start 2 workers ---
|
||||
tracing::info!("=== PHASE 1: Starting 2 workers (capacity=60 each, headroom=5) ===");
|
||||
|
||||
let worker1 = spawn_worker("worker-1", store.clone(), 60, 5, cancellation.child_token());
|
||||
let worker2 = spawn_worker("worker-2", store.clone(), 60, 5, cancellation.child_token());
|
||||
|
||||
// Let them stabilize
|
||||
tokio::time::sleep(Duration::from_secs(15)).await;
|
||||
print_distribution(&seed_cp).await;
|
||||
|
||||
// --- Phase 2: Add a 3rd worker ---
|
||||
tracing::info!("=== PHASE 2: Adding worker-3 ===");
|
||||
let worker3 = spawn_worker("worker-3", store.clone(), 60, 5, cancellation.child_token());
|
||||
|
||||
// Let rebalancing happen
|
||||
tokio::time::sleep(Duration::from_secs(25)).await;
|
||||
print_distribution(&seed_cp).await;
|
||||
|
||||
// --- Phase 3: Add a 4th worker with low capacity ---
|
||||
tracing::info!("=== PHASE 3: Adding worker-4 (capacity=15) ===");
|
||||
let worker4 = spawn_worker("worker-4", store.clone(), 15, 2, cancellation.child_token());
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(25)).await;
|
||||
print_distribution(&seed_cp).await;
|
||||
|
||||
// --- Phase 4: Kill worker-1, observe redistribution ---
|
||||
tracing::info!("=== PHASE 4: Killing worker-1, observing redistribution ===");
|
||||
worker1.cancel();
|
||||
|
||||
// Wait for lease expiry (10s in-process) + sync cycles
|
||||
tokio::time::sleep(Duration::from_secs(25)).await;
|
||||
print_distribution(&seed_cp).await;
|
||||
|
||||
// Cleanup
|
||||
tracing::info!("=== DONE: Shutting down all workers ===");
|
||||
cancellation.cancel();
|
||||
worker2.cancel();
|
||||
worker3.cancel();
|
||||
worker4.cancel();
|
||||
|
||||
// Give workers time to shut down gracefully
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn spawn_worker<TStore: BackingStoreEdge<WeightedJob> + 'static>(
|
||||
name: &'static str,
|
||||
store: BackingStore<WeightedJob, TStore>,
|
||||
max_capacity: u64,
|
||||
headroom: u64,
|
||||
cancellation: CancellationToken,
|
||||
) -> CancellationToken {
|
||||
let worker_cancel = CancellationToken::new();
|
||||
|
||||
tokio::spawn({
|
||||
let cancel = worker_cancel.clone();
|
||||
async move {
|
||||
let config = OperatorConfig {
|
||||
max_capacity: Some(max_capacity),
|
||||
rebalance_policy: RebalancePolicy::FairShare { headroom },
|
||||
resync_interval: Duration::from_secs(60),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let operator = OperatorState::new_with_config(StressOperator, config);
|
||||
let cp = ControlPlane::new(operator, store);
|
||||
|
||||
tracing::info!(%name, max_capacity, headroom, "worker started");
|
||||
|
||||
let combined = CancellationToken::new();
|
||||
let combined_child = combined.child_token();
|
||||
|
||||
tokio::spawn({
|
||||
let combined = combined.clone();
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {}
|
||||
_ = cancellation.cancelled() => {}
|
||||
}
|
||||
combined.cancel();
|
||||
}
|
||||
});
|
||||
|
||||
if let Err(e) = cp.execute_with_cancellation(combined_child).await {
|
||||
tracing::error!(%name, error = %e, "worker failed");
|
||||
}
|
||||
|
||||
tracing::info!(%name, "worker stopped");
|
||||
}
|
||||
});
|
||||
|
||||
worker_cancel
|
||||
}
|
||||
|
||||
async fn print_distribution<TOperator, TStore>(cp: &ControlPlane<TOperator, TStore>)
|
||||
where
|
||||
TOperator: Operator<Specifications = WeightedJob>,
|
||||
TStore: BackingStoreEdge<WeightedJob>,
|
||||
{
|
||||
let manifests = cp.get_manifests().await.unwrap_or_default();
|
||||
|
||||
let mut by_worker: std::collections::HashMap<String, (usize, u64)> =
|
||||
std::collections::HashMap::new();
|
||||
let mut unowned = Vec::new();
|
||||
|
||||
for m in &manifests {
|
||||
let w = m.manifest.spec.weight;
|
||||
match &m.lease {
|
||||
Some(lease) => {
|
||||
let entry = by_worker
|
||||
.entry(format!("{}", lease.owner))
|
||||
.or_insert((0, 0));
|
||||
entry.0 += 1;
|
||||
entry.1 += w;
|
||||
}
|
||||
None => {
|
||||
unowned.push(m.manifest.name.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("--- Distribution ---");
|
||||
let mut workers: Vec<_> = by_worker.into_iter().collect();
|
||||
workers.sort_by_key(|(id, _)| id.clone());
|
||||
for (worker_id, (count, weight)) in &workers {
|
||||
tracing::info!(worker = %worker_id, count, weight, "");
|
||||
}
|
||||
if !unowned.is_empty() {
|
||||
tracing::info!(count = unowned.len(), "unowned manifests");
|
||||
}
|
||||
let total_owned_weight: u64 = workers.iter().map(|(_, (_, w))| w).sum();
|
||||
tracing::info!(
|
||||
total_owned_weight,
|
||||
total_manifests = manifests.len(),
|
||||
workers = workers.len(),
|
||||
"summary"
|
||||
);
|
||||
tracing::info!("--------------------");
|
||||
}
|
||||
|
||||
// --- Operator and Specification ---
|
||||
|
||||
#[derive(Clone)]
|
||||
struct StressOperator;
|
||||
|
||||
impl Operator for StressOperator {
|
||||
type Specifications = WeightedJob;
|
||||
type Error = anyhow::Error;
|
||||
|
||||
async fn reconcile(
|
||||
&self,
|
||||
manifest: &mut ManifestState<WeightedJob>,
|
||||
) -> Result<Action, Self::Error> {
|
||||
// Simulate work proportional to weight
|
||||
let work_ms = manifest.manifest.spec.weight * 10;
|
||||
tokio::time::sleep(Duration::from_millis(work_ms)).await;
|
||||
|
||||
Ok(Action::Requeue(Duration::from_secs(5)))
|
||||
}
|
||||
|
||||
async fn on_lease_lost(
|
||||
&self,
|
||||
manifest: &ManifestState<WeightedJob>,
|
||||
) -> Result<(), Self::Error> {
|
||||
tracing::debug!(
|
||||
manifest = %manifest.manifest.name,
|
||||
"lease lost, cleaning up"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct WeightedJob {
|
||||
pub name: String,
|
||||
pub weight: u64,
|
||||
}
|
||||
|
||||
impl Specification for WeightedJob {
|
||||
fn kind(&self) -> &'static str {
|
||||
"weighted-job"
|
||||
}
|
||||
|
||||
fn weight(&self) -> u64 {
|
||||
self.weight
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user