xps 13 3980

This commit is contained in:
2026-02-26 13:37:04 +01:00
commit d6ac8e5931
21 changed files with 4562 additions and 0 deletions

288
src/orchestrator/mod.rs Normal file
View File

@@ -0,0 +1,288 @@
use anyhow::{Result, Context};
use std::sync::mpsc;
use std::time::{Duration, Instant};
use std::thread;
use std::collections::VecDeque;
use sysinfo::System;
use crate::sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog};
use crate::load::Workload;
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
pub struct BenchmarkOrchestrator {
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
phase: BenchmarkPhase,
profile: ThermalProfile,
engine: OptimizerEngine,
// --- History Buffers (120 points for 60s @ 500ms) ---
history_watts: VecDeque<f32>,
history_temp: VecDeque<f32>,
history_mhz: VecDeque<f32>,
// --- Static Info ---
cpu_model: String,
total_ram_gb: u64,
}
impl BenchmarkOrchestrator {
pub fn new(
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
) -> Self {
let mut sys = System::new_all();
sys.refresh_all();
let cpu_model = sys.cpus().first()
.map(|c| c.brand().to_string())
.unwrap_or_else(|| "Unknown CPU".to_string());
let total_ram_gb = sys.total_memory() / 1024 / 1024 / 1024;
Self {
auditor,
guard,
sensors,
actuators,
watchdog,
workload,
telemetry_tx,
command_rx,
phase: BenchmarkPhase::Auditing,
profile: ThermalProfile::default(),
engine: OptimizerEngine::new(5),
history_watts: VecDeque::with_capacity(120),
history_temp: VecDeque::with_capacity(120),
history_mhz: VecDeque::with_capacity(120),
cpu_model,
total_ram_gb,
}
}
pub fn run(&mut self) -> Result<OptimizationResult> {
self.log("Starting ember-tune Benchmark Sequence.")?;
// Phase 1: Audit & Baseline
self.phase = BenchmarkPhase::Auditing;
for step in self.auditor.audit() {
if let Err(e) = step.outcome {
return Err(anyhow::anyhow!("Audit failed ({}): {:?}", step.description, e));
}
}
self.log("Suppressing background services (tlp, thermald)...")?;
self.guard.suppress().context("Failed to suppress background services")?;
// Baseline (Idle Calibration)
self.phase = BenchmarkPhase::IdleCalibration;
self.log("Phase 1: Recording Idle Baseline (10s)...")?;
self.actuators.set_fan_mode("auto")?; // Use auto for idle
let mut idle_temps = Vec::new();
let start = Instant::now();
let mut tick = 0;
while start.elapsed() < Duration::from_secs(10) {
self.check_abort()?;
self.send_telemetry(tick)?;
idle_temps.push(self.sensors.get_temp().unwrap_or(0.0));
tick += 1;
thread::sleep(Duration::from_millis(500));
}
self.profile.ambient_temp = self.engine.smooth(&idle_temps).last().cloned().unwrap_or(0.0);
self.log(&format!("✓ Idle Baseline: {:.1}°C", self.profile.ambient_temp))?;
// Phase 2: Stress Stepping
self.phase = BenchmarkPhase::StressTesting;
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
self.actuators.set_fan_mode("max")?; // Lock fans for consistent resistance
let power_steps = [15.0, 20.0, 25.0, 30.0, 35.0];
for &pl in &power_steps {
self.log(&format!("Testing PL1 = {:.0}W...", pl))?;
self.actuators.set_sustained_power_limit(pl)?;
self.actuators.set_burst_power_limit(pl + 5.0)?;
self.workload.start(num_cpus::get(), 100)?;
// Wait for equilibrium: Hybrid approach (15s min, 45s max)
let step_start = Instant::now();
let mut step_temps = VecDeque::with_capacity(30); // Last 15s @ 500ms
while step_start.elapsed() < Duration::from_secs(45) {
self.check_abort()?;
if self.watchdog.check_emergency()? {
self.log("⚠ EMERGENCY ABORT: Watchdog triggered!")?;
self.workload.stop()?;
return Err(anyhow::anyhow!("Hardware Watchdog Triggered"));
}
let t = self.sensors.get_temp().unwrap_or(0.0);
step_temps.push_back(t);
if step_temps.len() > 10 { step_temps.pop_front(); }
self.send_telemetry(tick)?;
tick += 1;
// Check for stability: Range < 0.5C over last 5s (10 ticks)
if step_start.elapsed() > Duration::from_secs(15) && step_temps.len() == 10 {
let min = step_temps.iter().fold(f32::MAX, |a, &b| a.min(b));
let max = step_temps.iter().fold(f32::MIN, |a, &b| a.max(b));
if (max - min) < 0.5 {
self.log(&format!(" Equilibrium reached at {:.1}°C", t))?;
break;
}
}
thread::sleep(Duration::from_millis(500));
}
// Record data point
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
let avg_f = 2500.0; // Mock frequency until SensorBus expanded
let fan = self.sensors.get_fan_rpm().unwrap_or(0);
let tp = self.workload.get_throughput().unwrap_or(0.0);
self.profile.points.push(ThermalPoint {
power_w: avg_p,
temp_c: avg_t,
freq_mhz: avg_f,
fan_rpm: fan,
throughput: tp,
});
self.workload.stop()?;
self.log(" Step complete. Cooling down for 5s...")?;
thread::sleep(Duration::from_secs(5));
}
// Phase 4: Physical Modeling
self.phase = BenchmarkPhase::PhysicalModeling;
self.log("Phase 3: Calculating Silicon Physical Sweet Spot...")?;
let res = self.generate_result(false);
self.log(&format!("✓ Thermal Resistance (Rθ): {:.3} K/W", res.thermal_resistance_kw))?;
self.log(&format!("✓ Silicon Knee Found: {:.1} W", res.silicon_knee_watts))?;
thread::sleep(Duration::from_secs(3));
// Phase 5: Finalizing
self.phase = BenchmarkPhase::Finalizing;
self.log("Benchmark sequence complete. Generating configuration...")?;
let config = crate::engine::formatters::throttled::ThrottledConfig {
pl1_limit: res.silicon_knee_watts,
pl2_limit: res.recommended_pl2,
trip_temp: res.max_temp_c.max(95.0),
};
let conf_content = crate::engine::formatters::throttled::ThrottledTranslator::generate_conf(&config);
std::fs::write("throttled.conf", conf_content)?;
self.log("✓ Saved 'throttled.conf'.")?;
self.guard.restore()?;
self.log("✓ Environment restored.")?;
Ok(res)
}
pub fn generate_result(&self, is_partial: bool) -> OptimizationResult {
let r_theta = self.engine.calculate_thermal_resistance(&self.profile);
let knee = self.engine.find_silicon_knee(&self.profile);
let max_t = self.engine.get_max_temp(&self.profile);
OptimizationResult {
profile: self.profile.clone(),
silicon_knee_watts: knee,
thermal_resistance_kw: r_theta,
recommended_pl1: knee,
recommended_pl2: knee * 1.25,
max_temp_c: max_t,
is_partial,
}
}
fn check_abort(&self) -> Result<()> {
if let Ok(cmd) = self.command_rx.try_recv() {
match cmd {
UiCommand::Abort => {
return Err(anyhow::anyhow!("ABORTED"));
}
}
}
Ok(())
}
fn log(&self, msg: &str) -> Result<()> {
let state = TelemetryState {
cpu_model: self.cpu_model.clone(),
total_ram_gb: self.total_ram_gb,
tick: 0,
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
power_w: self.sensors.get_power_w().unwrap_or(0.0),
current_freq: 0.0,
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
governor: "unknown".to_string(),
pl1_limit: 0.0,
pl2_limit: 0.0,
fan_tier: "auto".to_string(),
phase: self.phase,
history_watts: Vec::new(),
history_temp: Vec::new(),
history_mhz: Vec::new(),
log_event: Some(msg.to_string()),
metadata: std::collections::HashMap::new(),
};
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
}
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
let temp = self.sensors.get_temp().unwrap_or(0.0);
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
let freq = 0.0;
self.history_temp.push_back(temp);
self.history_watts.push_back(pwr);
self.history_mhz.push_back(freq);
if self.history_temp.len() > 120 {
self.history_temp.pop_front();
self.history_watts.pop_front();
self.history_mhz.pop_front();
}
let state = TelemetryState {
cpu_model: self.cpu_model.clone(),
total_ram_gb: self.total_ram_gb,
tick,
cpu_temp: temp,
power_w: pwr,
current_freq: freq,
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
governor: "performance".to_string(),
pl1_limit: 15.0,
pl2_limit: 25.0,
fan_tier: "max".to_string(),
phase: self.phase,
history_watts: self.history_watts.iter().cloned().collect(),
history_temp: self.history_temp.iter().cloned().collect(),
history_mhz: self.history_mhz.iter().cloned().collect(),
log_event: None,
metadata: std::collections::HashMap::new(),
};
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
}
}