updated docs for release

This commit is contained in:
2026-02-26 17:17:57 +01:00
parent 667d94af7a
commit f4656619be
10 changed files with 335 additions and 81 deletions

View File

@@ -1,3 +1,8 @@
//! The central state machine responsible for coordinating the thermal benchmark.
//!
//! It manages hardware interactions through the [PlatformSal], generates stress
//! using a [Workload], and feeds telemetry to the frontend via MPSC channels.
use anyhow::{Result, Context};
use std::sync::mpsc;
use std::time::{Duration, Instant};
@@ -14,31 +19,48 @@ use crate::load::Workload;
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
/// The central state machine responsible for coordinating the thermal benchmark.
///
/// It manages hardware interactions through the [PlatformSal], generates stress
/// using a [Workload], and feeds telemetry to the frontend via MPSC channels.
pub struct BenchmarkOrchestrator {
/// Injected hardware abstraction layer.
sal: Arc<dyn PlatformSal>,
/// Discovered system facts and paths.
facts: SystemFactSheet,
/// Heat generation workload.
workload: Box<dyn Workload>,
/// Channel for sending telemetry updates to the UI.
telemetry_tx: mpsc::Sender<TelemetryState>,
/// Channel for receiving commands from the UI.
command_rx: mpsc::Receiver<UiCommand>,
/// Current phase of the benchmark.
phase: BenchmarkPhase,
/// Accumulated thermal data points.
profile: ThermalProfile,
/// Mathematics engine for data smoothing and optimization.
engine: OptimizerEngine,
// --- History Buffers (120 points for 60s @ 500ms) ---
/// Sliding window of power readings (Watts).
history_watts: VecDeque<f32>,
/// Sliding window of temperature readings (Celsius).
history_temp: VecDeque<f32>,
/// Sliding window of CPU frequency (MHz).
history_mhz: VecDeque<f32>,
// --- Static Info ---
/// Detected CPU model string.
cpu_model: String,
/// Total system RAM in Gigabytes.
total_ram_gb: u64,
// --- Safety ---
/// Atomic flag indicating a safety-triggered abort.
emergency_abort: Arc<AtomicBool>,
/// Human-readable reason for the emergency abort.
emergency_reason: Arc<Mutex<Option<String>>>,
}
impl BenchmarkOrchestrator {
/// Creates a new orchestrator instance with injected dependencies.
pub fn new(
sal: Arc<dyn PlatformSal>,
facts: SystemFactSheet,
@@ -73,16 +95,17 @@ impl BenchmarkOrchestrator {
}
}
/// Executes the full benchmark sequence.
///
/// This method guarantees that [crate::sal::traits::EnvironmentGuard::restore] and [Workload::stop]
/// are called regardless of whether the benchmark succeeds or fails.
pub fn run(&mut self) -> Result<OptimizationResult> {
self.log("Starting ember-tune Benchmark Sequence.")?;
// Start Watchdog Monitor
let _watchdog_handle = self.spawn_watchdog_monitor();
// Use a closure to ensure cleanup always runs
let result = self.execute_benchmark();
// --- MANDATORY CLEANUP ---
self.log("Benchmark sequence finished. Restoring hardware defaults...")?;
let _ = self.workload.stop();
if let Err(e) = self.sal.restore() {
@@ -93,10 +116,10 @@ impl BenchmarkOrchestrator {
result
}
/// Internal execution logic for the benchmark phases.
fn execute_benchmark(&mut self) -> Result<OptimizationResult> {
let bench_cfg = self.facts.bench_config.clone().context("Benchmarking config missing in facts")?;
// Phase 1: Audit & Baseline
self.phase = BenchmarkPhase::Auditing;
for step in self.sal.audit() {
if let Err(e) = step.outcome {
@@ -107,10 +130,9 @@ impl BenchmarkOrchestrator {
self.log("Suppressing background services (tlp, thermald)...")?;
self.sal.suppress().context("Failed to suppress background services")?;
// Baseline (Idle Calibration)
self.phase = BenchmarkPhase::IdleCalibration;
self.log(&format!("Phase 1: Recording Idle Baseline ({}s)...", bench_cfg.idle_duration_s))?;
self.sal.set_fan_mode("auto")?; // Use auto for idle
self.sal.set_fan_mode("auto")?;
let mut idle_temps = Vec::new();
let start = Instant::now();
@@ -125,10 +147,9 @@ impl BenchmarkOrchestrator {
self.profile.ambient_temp = self.engine.smooth(&idle_temps).last().cloned().unwrap_or(0.0);
self.log(&format!("✓ Idle Baseline: {:.1}°C", self.profile.ambient_temp))?;
// Phase 2: Stress Stepping
self.phase = BenchmarkPhase::StressTesting;
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
self.sal.set_fan_mode("max")?; // Lock fans for consistent resistance
self.sal.set_fan_mode("max")?;
let steps = bench_cfg.power_steps_watts.clone();
for &pl in &steps {
@@ -138,7 +159,6 @@ impl BenchmarkOrchestrator {
self.workload.start(num_cpus::get(), 100)?;
// Wait for equilibrium
let step_start = Instant::now();
let mut step_temps = VecDeque::with_capacity(30);
@@ -152,7 +172,6 @@ impl BenchmarkOrchestrator {
self.send_telemetry(tick)?;
tick += 1;
// Check for stability: Range < 0.5C over last 5s (10 ticks)
if step_start.elapsed() > Duration::from_secs(bench_cfg.stress_duration_min_s) && step_temps.len() == 10 {
let min = step_temps.iter().fold(f32::MAX, |a, &b| a.min(b));
let max = step_temps.iter().fold(f32::MIN, |a, &b| a.max(b));
@@ -164,7 +183,6 @@ impl BenchmarkOrchestrator {
thread::sleep(Duration::from_millis(500));
}
// Record data point
let avg_p = self.sal.get_power_w().unwrap_or(0.0);
let avg_t = self.sal.get_temp().unwrap_or(0.0);
let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0);
@@ -185,7 +203,6 @@ impl BenchmarkOrchestrator {
thread::sleep(Duration::from_secs(bench_cfg.cool_down_s));
}
// Phase 4: Physical Modeling
self.phase = BenchmarkPhase::PhysicalModeling;
self.log("Phase 3: Calculating Silicon Physical Sweet Spot...")?;
@@ -196,7 +213,6 @@ impl BenchmarkOrchestrator {
thread::sleep(Duration::from_secs(3));
// Phase 5: Finalizing
self.phase = BenchmarkPhase::Finalizing;
self.log("Benchmark sequence complete. Generating configurations...")?;
@@ -206,14 +222,12 @@ impl BenchmarkOrchestrator {
trip_temp: res.max_temp_c.max(95.0),
};
// 1. Throttled (Merged if exists)
if let Some(throttled_path) = self.facts.paths.configs.get("throttled") {
crate::engine::formatters::throttled::ThrottledTranslator::save(throttled_path, &config)?;
self.log(&format!("✓ Saved '{}' (merged).", throttled_path.display()))?;
res.config_paths.insert("throttled".to_string(), throttled_path.clone());
}
// 2. i8kmon
if let Some(i8k_path) = self.facts.paths.configs.get("i8kmon") {
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
t_ambient: self.profile.ambient_temp,
@@ -228,6 +242,7 @@ impl BenchmarkOrchestrator {
Ok(res)
}
/// Spawns a concurrent monitor that polls safety sensors every 100ms.
fn spawn_watchdog_monitor(&self) -> thread::JoinHandle<()> {
let abort = self.emergency_abort.clone();
let reason_store = self.emergency_reason.clone();
@@ -279,6 +294,7 @@ impl BenchmarkOrchestrator {
})
}
/// Generates the final [OptimizationResult] based on current measurements.
pub fn generate_result(&self, is_partial: bool) -> OptimizationResult {
let r_theta = self.engine.calculate_thermal_resistance(&self.profile);
let knee = self.engine.find_silicon_knee(&self.profile);
@@ -296,6 +312,7 @@ impl BenchmarkOrchestrator {
}
}
/// Checks if the benchmark has been aborted by the user or the watchdog.
fn check_abort(&self) -> Result<()> {
if self.emergency_abort.load(Ordering::SeqCst) {
let reason = self.emergency_reason.lock().unwrap().clone().unwrap_or_else(|| "Unknown safety trigger".to_string());
@@ -312,6 +329,7 @@ impl BenchmarkOrchestrator {
Ok(())
}
/// Helper to send log messages to the frontend.
fn log(&self, msg: &str) -> Result<()> {
let state = TelemetryState {
cpu_model: self.cpu_model.clone(),
@@ -337,6 +355,7 @@ impl BenchmarkOrchestrator {
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
}
/// Collects current sensors and sends a complete [TelemetryState] to the frontend.
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
let temp = self.sal.get_temp().unwrap_or(0.0);
let pwr = self.sal.get_power_w().unwrap_or(0.0);