implemented generic linux sal with heuristics

This commit is contained in:
2026-02-26 15:16:37 +01:00
parent 48c3b46a0c
commit f87efa1d24
13 changed files with 686 additions and 125 deletions

View File

@@ -5,17 +5,13 @@ use std::thread;
use std::collections::VecDeque;
use sysinfo::System;
use crate::sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog};
use crate::sal::traits::{PlatformSal};
use crate::load::Workload;
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
pub struct BenchmarkOrchestrator {
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
sal: Box<dyn PlatformSal>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
@@ -35,11 +31,7 @@ pub struct BenchmarkOrchestrator {
impl BenchmarkOrchestrator {
pub fn new(
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
sal: Box<dyn PlatformSal>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
@@ -53,11 +45,7 @@ impl BenchmarkOrchestrator {
let total_ram_gb = sys.total_memory() / 1024 / 1024 / 1024;
Self {
auditor,
guard,
sensors,
actuators,
watchdog,
sal,
workload,
telemetry_tx,
command_rx,
@@ -77,19 +65,19 @@ impl BenchmarkOrchestrator {
// Phase 1: Audit & Baseline
self.phase = BenchmarkPhase::Auditing;
for step in self.auditor.audit() {
for step in self.sal.audit() {
if let Err(e) = step.outcome {
return Err(anyhow::anyhow!("Audit failed ({}): {:?}", step.description, e));
}
}
self.log("Suppressing background services (tlp, thermald)...")?;
self.guard.suppress().context("Failed to suppress background services")?;
self.sal.suppress().context("Failed to suppress background services")?;
// Baseline (Idle Calibration)
self.phase = BenchmarkPhase::IdleCalibration;
self.log("Phase 1: Recording Idle Baseline (10s)...")?;
self.actuators.set_fan_mode("auto")?; // Use auto for idle
self.sal.set_fan_mode("auto")?; // Use auto for idle
let mut idle_temps = Vec::new();
let start = Instant::now();
@@ -97,7 +85,7 @@ impl BenchmarkOrchestrator {
while start.elapsed() < Duration::from_secs(10) {
self.check_abort()?;
self.send_telemetry(tick)?;
idle_temps.push(self.sensors.get_temp().unwrap_or(0.0));
idle_temps.push(self.sal.get_temp().unwrap_or(0.0));
tick += 1;
thread::sleep(Duration::from_millis(500));
}
@@ -107,13 +95,13 @@ impl BenchmarkOrchestrator {
// Phase 2: Stress Stepping
self.phase = BenchmarkPhase::StressTesting;
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
self.actuators.set_fan_mode("max")?; // Lock fans for consistent resistance
self.sal.set_fan_mode("max")?; // Lock fans for consistent resistance
let power_steps = [15.0, 20.0, 25.0, 30.0, 35.0];
for &pl in &power_steps {
self.log(&format!("Testing PL1 = {:.0}W...", pl))?;
self.actuators.set_sustained_power_limit(pl)?;
self.actuators.set_burst_power_limit(pl + 5.0)?;
self.sal.set_sustained_power_limit(pl)?;
self.sal.set_burst_power_limit(pl + 5.0)?;
self.workload.start(num_cpus::get(), 100)?;
@@ -123,13 +111,13 @@ impl BenchmarkOrchestrator {
while step_start.elapsed() < Duration::from_secs(45) {
self.check_abort()?;
if self.watchdog.check_emergency()? {
if self.sal.check_emergency()? {
self.log("⚠ EMERGENCY ABORT: Watchdog triggered!")?;
self.workload.stop()?;
return Err(anyhow::anyhow!("Hardware Watchdog Triggered"));
}
let t = self.sensors.get_temp().unwrap_or(0.0);
let t = self.sal.get_temp().unwrap_or(0.0);
step_temps.push_back(t);
if step_temps.len() > 10 { step_temps.pop_front(); }
@@ -149,10 +137,10 @@ impl BenchmarkOrchestrator {
}
// Record data point
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0);
let fans = self.sensors.get_fan_rpms().unwrap_or_default();
let avg_p = self.sal.get_power_w().unwrap_or(0.0);
let avg_t = self.sal.get_temp().unwrap_or(0.0);
let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0);
let fans = self.sal.get_fan_rpms().unwrap_or_default();
let primary_fan = fans.first().cloned().unwrap_or(0);
let tp = self.workload.get_throughput().unwrap_or(0.0);
@@ -210,7 +198,7 @@ impl BenchmarkOrchestrator {
std::fs::write("i8kmon.conf", i8k_content)?;
self.log("✓ Saved 'i8kmon.conf'.")?;
self.guard.restore()?;
self.sal.restore()?;
self.log("✓ Environment restored.")?;
Ok(res)
@@ -248,10 +236,10 @@ impl BenchmarkOrchestrator {
cpu_model: self.cpu_model.clone(),
total_ram_gb: self.total_ram_gb,
tick: 0,
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
power_w: self.sensors.get_power_w().unwrap_or(0.0),
current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0),
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
cpu_temp: self.sal.get_temp().unwrap_or(0.0),
power_w: self.sal.get_power_w().unwrap_or(0.0),
current_freq: self.sal.get_freq_mhz().unwrap_or(0.0),
fans: self.sal.get_fan_rpms().unwrap_or_default(),
governor: "unknown".to_string(),
pl1_limit: 0.0,
pl2_limit: 0.0,
@@ -267,9 +255,9 @@ impl BenchmarkOrchestrator {
}
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
let temp = self.sensors.get_temp().unwrap_or(0.0);
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
let freq = self.sensors.get_freq_mhz().unwrap_or(0.0);
let temp = self.sal.get_temp().unwrap_or(0.0);
let pwr = self.sal.get_power_w().unwrap_or(0.0);
let freq = self.sal.get_freq_mhz().unwrap_or(0.0);
self.history_temp.push_back(temp);
self.history_watts.push_back(pwr);
@@ -288,7 +276,7 @@ impl BenchmarkOrchestrator {
cpu_temp: temp,
power_w: pwr,
current_freq: freq,
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
fans: self.sal.get_fan_rpms().unwrap_or_default(),
governor: "performance".to_string(),
pl1_limit: 15.0,
pl2_limit: 25.0,