updated docs for release
This commit is contained in:
@@ -1,3 +1,8 @@
|
||||
//! The central state machine responsible for coordinating the thermal benchmark.
|
||||
//!
|
||||
//! It manages hardware interactions through the [PlatformSal], generates stress
|
||||
//! using a [Workload], and feeds telemetry to the frontend via MPSC channels.
|
||||
|
||||
use anyhow::{Result, Context};
|
||||
use std::sync::mpsc;
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -14,31 +19,48 @@ use crate::load::Workload;
|
||||
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
|
||||
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
|
||||
|
||||
/// The central state machine responsible for coordinating the thermal benchmark.
|
||||
///
|
||||
/// It manages hardware interactions through the [PlatformSal], generates stress
|
||||
/// using a [Workload], and feeds telemetry to the frontend via MPSC channels.
|
||||
pub struct BenchmarkOrchestrator {
|
||||
/// Injected hardware abstraction layer.
|
||||
sal: Arc<dyn PlatformSal>,
|
||||
/// Discovered system facts and paths.
|
||||
facts: SystemFactSheet,
|
||||
/// Heat generation workload.
|
||||
workload: Box<dyn Workload>,
|
||||
/// Channel for sending telemetry updates to the UI.
|
||||
telemetry_tx: mpsc::Sender<TelemetryState>,
|
||||
/// Channel for receiving commands from the UI.
|
||||
command_rx: mpsc::Receiver<UiCommand>,
|
||||
/// Current phase of the benchmark.
|
||||
phase: BenchmarkPhase,
|
||||
/// Accumulated thermal data points.
|
||||
profile: ThermalProfile,
|
||||
/// Mathematics engine for data smoothing and optimization.
|
||||
engine: OptimizerEngine,
|
||||
|
||||
// --- History Buffers (120 points for 60s @ 500ms) ---
|
||||
/// Sliding window of power readings (Watts).
|
||||
history_watts: VecDeque<f32>,
|
||||
/// Sliding window of temperature readings (Celsius).
|
||||
history_temp: VecDeque<f32>,
|
||||
/// Sliding window of CPU frequency (MHz).
|
||||
history_mhz: VecDeque<f32>,
|
||||
|
||||
// --- Static Info ---
|
||||
/// Detected CPU model string.
|
||||
cpu_model: String,
|
||||
/// Total system RAM in Gigabytes.
|
||||
total_ram_gb: u64,
|
||||
|
||||
// --- Safety ---
|
||||
/// Atomic flag indicating a safety-triggered abort.
|
||||
emergency_abort: Arc<AtomicBool>,
|
||||
/// Human-readable reason for the emergency abort.
|
||||
emergency_reason: Arc<Mutex<Option<String>>>,
|
||||
}
|
||||
|
||||
impl BenchmarkOrchestrator {
|
||||
/// Creates a new orchestrator instance with injected dependencies.
|
||||
pub fn new(
|
||||
sal: Arc<dyn PlatformSal>,
|
||||
facts: SystemFactSheet,
|
||||
@@ -73,16 +95,17 @@ impl BenchmarkOrchestrator {
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes the full benchmark sequence.
|
||||
///
|
||||
/// This method guarantees that [crate::sal::traits::EnvironmentGuard::restore] and [Workload::stop]
|
||||
/// are called regardless of whether the benchmark succeeds or fails.
|
||||
pub fn run(&mut self) -> Result<OptimizationResult> {
|
||||
self.log("Starting ember-tune Benchmark Sequence.")?;
|
||||
|
||||
// Start Watchdog Monitor
|
||||
let _watchdog_handle = self.spawn_watchdog_monitor();
|
||||
|
||||
// Use a closure to ensure cleanup always runs
|
||||
let result = self.execute_benchmark();
|
||||
|
||||
// --- MANDATORY CLEANUP ---
|
||||
self.log("Benchmark sequence finished. Restoring hardware defaults...")?;
|
||||
let _ = self.workload.stop();
|
||||
if let Err(e) = self.sal.restore() {
|
||||
@@ -93,10 +116,10 @@ impl BenchmarkOrchestrator {
|
||||
result
|
||||
}
|
||||
|
||||
/// Internal execution logic for the benchmark phases.
|
||||
fn execute_benchmark(&mut self) -> Result<OptimizationResult> {
|
||||
let bench_cfg = self.facts.bench_config.clone().context("Benchmarking config missing in facts")?;
|
||||
|
||||
// Phase 1: Audit & Baseline
|
||||
self.phase = BenchmarkPhase::Auditing;
|
||||
for step in self.sal.audit() {
|
||||
if let Err(e) = step.outcome {
|
||||
@@ -107,10 +130,9 @@ impl BenchmarkOrchestrator {
|
||||
self.log("Suppressing background services (tlp, thermald)...")?;
|
||||
self.sal.suppress().context("Failed to suppress background services")?;
|
||||
|
||||
// Baseline (Idle Calibration)
|
||||
self.phase = BenchmarkPhase::IdleCalibration;
|
||||
self.log(&format!("Phase 1: Recording Idle Baseline ({}s)...", bench_cfg.idle_duration_s))?;
|
||||
self.sal.set_fan_mode("auto")?; // Use auto for idle
|
||||
self.sal.set_fan_mode("auto")?;
|
||||
|
||||
let mut idle_temps = Vec::new();
|
||||
let start = Instant::now();
|
||||
@@ -125,10 +147,9 @@ impl BenchmarkOrchestrator {
|
||||
self.profile.ambient_temp = self.engine.smooth(&idle_temps).last().cloned().unwrap_or(0.0);
|
||||
self.log(&format!("✓ Idle Baseline: {:.1}°C", self.profile.ambient_temp))?;
|
||||
|
||||
// Phase 2: Stress Stepping
|
||||
self.phase = BenchmarkPhase::StressTesting;
|
||||
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
|
||||
self.sal.set_fan_mode("max")?; // Lock fans for consistent resistance
|
||||
self.sal.set_fan_mode("max")?;
|
||||
|
||||
let steps = bench_cfg.power_steps_watts.clone();
|
||||
for &pl in &steps {
|
||||
@@ -138,7 +159,6 @@ impl BenchmarkOrchestrator {
|
||||
|
||||
self.workload.start(num_cpus::get(), 100)?;
|
||||
|
||||
// Wait for equilibrium
|
||||
let step_start = Instant::now();
|
||||
let mut step_temps = VecDeque::with_capacity(30);
|
||||
|
||||
@@ -152,7 +172,6 @@ impl BenchmarkOrchestrator {
|
||||
self.send_telemetry(tick)?;
|
||||
tick += 1;
|
||||
|
||||
// Check for stability: Range < 0.5C over last 5s (10 ticks)
|
||||
if step_start.elapsed() > Duration::from_secs(bench_cfg.stress_duration_min_s) && step_temps.len() == 10 {
|
||||
let min = step_temps.iter().fold(f32::MAX, |a, &b| a.min(b));
|
||||
let max = step_temps.iter().fold(f32::MIN, |a, &b| a.max(b));
|
||||
@@ -164,7 +183,6 @@ impl BenchmarkOrchestrator {
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
// Record data point
|
||||
let avg_p = self.sal.get_power_w().unwrap_or(0.0);
|
||||
let avg_t = self.sal.get_temp().unwrap_or(0.0);
|
||||
let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0);
|
||||
@@ -185,7 +203,6 @@ impl BenchmarkOrchestrator {
|
||||
thread::sleep(Duration::from_secs(bench_cfg.cool_down_s));
|
||||
}
|
||||
|
||||
// Phase 4: Physical Modeling
|
||||
self.phase = BenchmarkPhase::PhysicalModeling;
|
||||
self.log("Phase 3: Calculating Silicon Physical Sweet Spot...")?;
|
||||
|
||||
@@ -196,7 +213,6 @@ impl BenchmarkOrchestrator {
|
||||
|
||||
thread::sleep(Duration::from_secs(3));
|
||||
|
||||
// Phase 5: Finalizing
|
||||
self.phase = BenchmarkPhase::Finalizing;
|
||||
self.log("Benchmark sequence complete. Generating configurations...")?;
|
||||
|
||||
@@ -206,14 +222,12 @@ impl BenchmarkOrchestrator {
|
||||
trip_temp: res.max_temp_c.max(95.0),
|
||||
};
|
||||
|
||||
// 1. Throttled (Merged if exists)
|
||||
if let Some(throttled_path) = self.facts.paths.configs.get("throttled") {
|
||||
crate::engine::formatters::throttled::ThrottledTranslator::save(throttled_path, &config)?;
|
||||
self.log(&format!("✓ Saved '{}' (merged).", throttled_path.display()))?;
|
||||
res.config_paths.insert("throttled".to_string(), throttled_path.clone());
|
||||
}
|
||||
|
||||
// 2. i8kmon
|
||||
if let Some(i8k_path) = self.facts.paths.configs.get("i8kmon") {
|
||||
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
|
||||
t_ambient: self.profile.ambient_temp,
|
||||
@@ -228,6 +242,7 @@ impl BenchmarkOrchestrator {
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Spawns a concurrent monitor that polls safety sensors every 100ms.
|
||||
fn spawn_watchdog_monitor(&self) -> thread::JoinHandle<()> {
|
||||
let abort = self.emergency_abort.clone();
|
||||
let reason_store = self.emergency_reason.clone();
|
||||
@@ -279,6 +294,7 @@ impl BenchmarkOrchestrator {
|
||||
})
|
||||
}
|
||||
|
||||
/// Generates the final [OptimizationResult] based on current measurements.
|
||||
pub fn generate_result(&self, is_partial: bool) -> OptimizationResult {
|
||||
let r_theta = self.engine.calculate_thermal_resistance(&self.profile);
|
||||
let knee = self.engine.find_silicon_knee(&self.profile);
|
||||
@@ -296,6 +312,7 @@ impl BenchmarkOrchestrator {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the benchmark has been aborted by the user or the watchdog.
|
||||
fn check_abort(&self) -> Result<()> {
|
||||
if self.emergency_abort.load(Ordering::SeqCst) {
|
||||
let reason = self.emergency_reason.lock().unwrap().clone().unwrap_or_else(|| "Unknown safety trigger".to_string());
|
||||
@@ -312,6 +329,7 @@ impl BenchmarkOrchestrator {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper to send log messages to the frontend.
|
||||
fn log(&self, msg: &str) -> Result<()> {
|
||||
let state = TelemetryState {
|
||||
cpu_model: self.cpu_model.clone(),
|
||||
@@ -337,6 +355,7 @@ impl BenchmarkOrchestrator {
|
||||
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
|
||||
}
|
||||
|
||||
/// Collects current sensors and sends a complete [TelemetryState] to the frontend.
|
||||
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
|
||||
let temp = self.sal.get_temp().unwrap_or(0.0);
|
||||
let pwr = self.sal.get_power_w().unwrap_or(0.0);
|
||||
|
||||
Reference in New Issue
Block a user