implemented generic linux sal with heuristics

This commit is contained in:
2026-02-26 15:16:37 +01:00
parent 48c3b46a0c
commit f87efa1d24
13 changed files with 686 additions and 125 deletions

56
Cargo.lock generated
View File

@@ -526,10 +526,12 @@ dependencies = [
"num_cpus",
"owo-colors",
"ratatui",
"regex",
"serde",
"serde_json",
"sysinfo",
"thiserror 2.0.18",
"toml",
"tracing",
"tracing-appender",
"tracing-subscriber",
@@ -1534,6 +1536,15 @@ dependencies = [
"zmij",
]
[[package]]
name = "serde_spanned"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
dependencies = [
"serde_core",
]
[[package]]
name = "sha2"
version = "0.10.9"
@@ -1852,6 +1863,45 @@ dependencies = [
"time-core",
]
[[package]]
name = "toml"
version = "1.0.3+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c"
dependencies = [
"indexmap",
"serde_core",
"serde_spanned",
"toml_datetime",
"toml_parser",
"toml_writer",
"winnow",
]
[[package]]
name = "toml_datetime"
version = "1.0.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e"
dependencies = [
"serde_core",
]
[[package]]
name = "toml_parser"
version = "1.0.9+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
dependencies = [
"winnow",
]
[[package]]
name = "toml_writer"
version = "1.0.6+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
[[package]]
name = "tracing"
version = "0.1.44"
@@ -2492,6 +2542,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]]
name = "winnow"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
[[package]]
name = "wit-bindgen"
version = "0.51.0"

View File

@@ -28,3 +28,5 @@ tracing-appender = "0.2"
sysinfo = "0.38"
libc = "0.2"
num_cpus = "1.17"
toml = "1.0.3"
regex = "1.12.3"

View File

@@ -27,9 +27,9 @@ use ratatui::{backend::CrosstermBackend, Terminal};
use cli::Cli;
use mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError};
use sal::mock::{MockAuditor, MockGuard, MockSensorBus, MockActuatorBus, MockWatchdog};
use sal::dell_xps_9380::DellXps9380Sal;
use sal::traits::{AuditError, PlatformSal};
use sal::mock::MockSal;
use sal::heuristic::engine::HeuristicEngine;
use load::StressNg;
use orchestrator::BenchmarkOrchestrator;
use ui::dashboard::{draw_dashboard, DashboardState};
@@ -107,20 +107,17 @@ fn main() -> Result<()> {
info!("ember-tune starting with args: {:?}", args);
// 2. Pre-flight Audit (Before TUI)
let auditor: Arc<dyn PreflightAuditor> = if args.mock {
Arc::new(MockAuditor)
// 2. Platform Detection & Audit
let sal: Box<dyn PlatformSal> = if args.mock {
Box::new(MockSal::new())
} else {
match DellXps9380Sal::init() {
Ok(sal) => Arc::new(sal),
Err(e) => return Err(miette::miette!("Failed to initialize Dell SAL: {}", e)),
}
HeuristicEngine::detect_and_build()?
};
println!("{}", console::style("─── Pre-flight System Audit ───").bold().cyan());
let mut audit_failures = Vec::new();
for step in auditor.audit() {
for step in sal.audit() {
print!(" Checking {:<40} ", step.description);
io::Write::flush(&mut io::stdout()).into_diagnostic()?;
@@ -151,8 +148,9 @@ fn main() -> Result<()> {
enable_raw_mode().into_diagnostic()?;
let mut stdout = io::stdout();
execute!(stdout, EnterAlternateScreen).into_diagnostic()?;
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend).into_diagnostic()?;
let backend_stdout = io::stdout();
let backend_term = CrosstermBackend::new(backend_stdout);
let mut terminal = Terminal::new(backend_term).into_diagnostic()?;
// 4. State & Communication Setup
let running = Arc::new(AtomicBool::new(true));
@@ -166,40 +164,11 @@ fn main() -> Result<()> {
}).expect("Error setting Ctrl-C handler");
// 5. Spawn Backend Orchestrator
let is_mock = args.mock;
let b_auditor = auditor.clone();
let backend_handle = thread::spawn(move || {
let (guard, sensors, actuators, watchdog): (
Box<dyn EnvironmentGuard>,
Box<dyn SensorBus>,
Box<dyn ActuatorBus>,
Box<dyn HardwareWatchdog>,
) = if is_mock {
(
Box::new(MockGuard::new()),
Box::new(MockSensorBus),
Box::new(MockActuatorBus),
Box::new(MockWatchdog),
)
} else {
// Re-init or share the SAL
let sal = Arc::new(DellXps9380Sal::init().expect("Failed to init Dell SAL in backend"));
(
Box::new(sal::dell_xps_9380::DellXps9380Guard::new()),
Box::new(sal.clone() as Arc<dyn SensorBus>),
Box::new(sal.clone() as Arc<dyn ActuatorBus>),
Box::new(sal as Arc<dyn HardwareWatchdog>),
)
};
let workload = Box::new(StressNg::new());
let mut orchestrator = BenchmarkOrchestrator::new(
Box::new(b_auditor),
guard,
sensors,
actuators,
watchdog,
sal,
workload,
telemetry_tx,
command_rx,
@@ -286,7 +255,7 @@ fn main() -> Result<()> {
}
Ok(Err(e)) => {
if e.to_string() == "ABORTED" {
println!("{}", "Benchmark aborted by user. No summary available.".yellow());
println!("{}", "Benchmark aborted by user.".yellow());
} else {
error!("Orchestrator encountered error: {}", e);
eprintln!("{} {}", "Error:".red().bold(), e);

View File

@@ -5,17 +5,13 @@ use std::thread;
use std::collections::VecDeque;
use sysinfo::System;
use crate::sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog};
use crate::sal::traits::{PlatformSal};
use crate::load::Workload;
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
pub struct BenchmarkOrchestrator {
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
sal: Box<dyn PlatformSal>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
@@ -35,11 +31,7 @@ pub struct BenchmarkOrchestrator {
impl BenchmarkOrchestrator {
pub fn new(
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
sal: Box<dyn PlatformSal>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
@@ -53,11 +45,7 @@ impl BenchmarkOrchestrator {
let total_ram_gb = sys.total_memory() / 1024 / 1024 / 1024;
Self {
auditor,
guard,
sensors,
actuators,
watchdog,
sal,
workload,
telemetry_tx,
command_rx,
@@ -77,19 +65,19 @@ impl BenchmarkOrchestrator {
// Phase 1: Audit & Baseline
self.phase = BenchmarkPhase::Auditing;
for step in self.auditor.audit() {
for step in self.sal.audit() {
if let Err(e) = step.outcome {
return Err(anyhow::anyhow!("Audit failed ({}): {:?}", step.description, e));
}
}
self.log("Suppressing background services (tlp, thermald)...")?;
self.guard.suppress().context("Failed to suppress background services")?;
self.sal.suppress().context("Failed to suppress background services")?;
// Baseline (Idle Calibration)
self.phase = BenchmarkPhase::IdleCalibration;
self.log("Phase 1: Recording Idle Baseline (10s)...")?;
self.actuators.set_fan_mode("auto")?; // Use auto for idle
self.sal.set_fan_mode("auto")?; // Use auto for idle
let mut idle_temps = Vec::new();
let start = Instant::now();
@@ -97,7 +85,7 @@ impl BenchmarkOrchestrator {
while start.elapsed() < Duration::from_secs(10) {
self.check_abort()?;
self.send_telemetry(tick)?;
idle_temps.push(self.sensors.get_temp().unwrap_or(0.0));
idle_temps.push(self.sal.get_temp().unwrap_or(0.0));
tick += 1;
thread::sleep(Duration::from_millis(500));
}
@@ -107,13 +95,13 @@ impl BenchmarkOrchestrator {
// Phase 2: Stress Stepping
self.phase = BenchmarkPhase::StressTesting;
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
self.actuators.set_fan_mode("max")?; // Lock fans for consistent resistance
self.sal.set_fan_mode("max")?; // Lock fans for consistent resistance
let power_steps = [15.0, 20.0, 25.0, 30.0, 35.0];
for &pl in &power_steps {
self.log(&format!("Testing PL1 = {:.0}W...", pl))?;
self.actuators.set_sustained_power_limit(pl)?;
self.actuators.set_burst_power_limit(pl + 5.0)?;
self.sal.set_sustained_power_limit(pl)?;
self.sal.set_burst_power_limit(pl + 5.0)?;
self.workload.start(num_cpus::get(), 100)?;
@@ -123,13 +111,13 @@ impl BenchmarkOrchestrator {
while step_start.elapsed() < Duration::from_secs(45) {
self.check_abort()?;
if self.watchdog.check_emergency()? {
if self.sal.check_emergency()? {
self.log("⚠ EMERGENCY ABORT: Watchdog triggered!")?;
self.workload.stop()?;
return Err(anyhow::anyhow!("Hardware Watchdog Triggered"));
}
let t = self.sensors.get_temp().unwrap_or(0.0);
let t = self.sal.get_temp().unwrap_or(0.0);
step_temps.push_back(t);
if step_temps.len() > 10 { step_temps.pop_front(); }
@@ -149,10 +137,10 @@ impl BenchmarkOrchestrator {
}
// Record data point
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0);
let fans = self.sensors.get_fan_rpms().unwrap_or_default();
let avg_p = self.sal.get_power_w().unwrap_or(0.0);
let avg_t = self.sal.get_temp().unwrap_or(0.0);
let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0);
let fans = self.sal.get_fan_rpms().unwrap_or_default();
let primary_fan = fans.first().cloned().unwrap_or(0);
let tp = self.workload.get_throughput().unwrap_or(0.0);
@@ -210,7 +198,7 @@ impl BenchmarkOrchestrator {
std::fs::write("i8kmon.conf", i8k_content)?;
self.log("✓ Saved 'i8kmon.conf'.")?;
self.guard.restore()?;
self.sal.restore()?;
self.log("✓ Environment restored.")?;
Ok(res)
@@ -248,10 +236,10 @@ impl BenchmarkOrchestrator {
cpu_model: self.cpu_model.clone(),
total_ram_gb: self.total_ram_gb,
tick: 0,
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
power_w: self.sensors.get_power_w().unwrap_or(0.0),
current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0),
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
cpu_temp: self.sal.get_temp().unwrap_or(0.0),
power_w: self.sal.get_power_w().unwrap_or(0.0),
current_freq: self.sal.get_freq_mhz().unwrap_or(0.0),
fans: self.sal.get_fan_rpms().unwrap_or_default(),
governor: "unknown".to_string(),
pl1_limit: 0.0,
pl2_limit: 0.0,
@@ -267,9 +255,9 @@ impl BenchmarkOrchestrator {
}
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
let temp = self.sensors.get_temp().unwrap_or(0.0);
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
let freq = self.sensors.get_freq_mhz().unwrap_or(0.0);
let temp = self.sal.get_temp().unwrap_or(0.0);
let pwr = self.sal.get_power_w().unwrap_or(0.0);
let freq = self.sal.get_freq_mhz().unwrap_or(0.0);
self.history_temp.push_back(temp);
self.history_watts.push_back(pwr);
@@ -288,7 +276,7 @@ impl BenchmarkOrchestrator {
cpu_temp: temp,
power_w: pwr,
current_freq: freq,
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
fans: self.sal.get_fan_rpms().unwrap_or_default(),
governor: "performance".to_string(),
pl1_limit: 15.0,
pl2_limit: 25.0,

View File

@@ -17,6 +17,7 @@ pub struct DellXps9380Sal {
last_poll: Mutex<Instant>,
last_temp: Mutex<f32>,
last_fans: Mutex<Vec<u32>>,
suppressed_services: Mutex<Vec<String>>,
}
impl DellXps9380Sal {
@@ -82,6 +83,7 @@ impl DellXps9380Sal {
last_poll: Mutex::new(Instant::now() - Duration::from_secs(2)),
last_temp: Mutex::new(0.0),
last_fans: Mutex::new(Vec::new()),
suppressed_services: Mutex::new(Vec::new()),
})
}
}
@@ -151,44 +153,36 @@ impl PreflightAuditor for DellXps9380Sal {
}
}
pub struct DellXps9380Guard {
stopped_services: Vec<String>,
}
impl DellXps9380Guard {
pub fn new() -> Self {
Self { stopped_services: Vec::new() }
}
}
impl EnvironmentGuard for DellXps9380Guard {
impl EnvironmentGuard for DellXps9380Sal {
fn suppress(&mut self) -> Result<()> {
let services = ["tlp", "thermald", "i8kmon"];
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in services {
if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() {
debug!("Suppressing service: {}", s);
Command::new("systemctl").args(["stop", s]).status()?;
self.stopped_services.push(s.to_string());
suppressed.push(s.to_string());
}
}
Ok(())
}
fn restore(&mut self) -> Result<()> {
for s in &self.stopped_services {
let _ = Command::new("systemctl").args(["start", s]).status();
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in suppressed.drain(..) {
let _ = Command::new("systemctl").args(["start", &s]).status();
}
self.stopped_services.clear();
Ok(())
}
}
impl Drop for DellXps9380Guard {
impl Drop for DellXps9380Sal {
fn drop(&mut self) {
let _ = self.restore();
}
}
impl SensorBus for DellXps9380Sal {
fn get_temp(&self) -> Result<f32> {
// Enforce 1000ms rate limit for Dell SMM as per GEMINI.md

218
src/sal/generic_linux.rs Normal file
View File

@@ -0,0 +1,218 @@
use anyhow::{Result, anyhow};
use std::path::Path;
use std::fs;
use std::time::{Duration, Instant};
use std::thread;
use std::process::Command;
use tracing::{debug};
use std::sync::mpsc;
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError};
use crate::sal::heuristic::discovery::SystemFactSheet;
use crate::sal::heuristic::schema::HardwareDb;
pub struct GenericLinuxSal {
fact_sheet: SystemFactSheet,
db: HardwareDb,
suppressed_services: Vec<String>,
}
impl GenericLinuxSal {
pub fn new(fact_sheet: SystemFactSheet, db: HardwareDb) -> Self {
Self {
fact_sheet,
db,
suppressed_services: Vec::new(),
}
}
fn is_dell(&self) -> bool {
self.fact_sheet.vendor.to_lowercase().contains("dell")
}
fn read_sysfs_timeout(&self, path: &Path, timeout: Duration) -> Result<String> {
let (tx, rx) = mpsc::channel();
let path_buf = path.to_path_buf();
thread::spawn(move || {
let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string());
let _ = tx.send(res);
});
match rx.recv_timeout(timeout) {
Ok(res) => res.map_err(|e| anyhow!("Failed to read sysfs: {}", e)),
Err(_) => Err(anyhow!("Timeout reading sysfs path: {:?}", path)),
}
}
}
impl PreflightAuditor for GenericLinuxSal {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let mut steps = Vec::new();
// 1. Static DB checks
for check in &self.db.preflight_checks {
let status = Command::new("sh")
.arg("-c")
.arg(&check.check_cmd)
.status();
steps.push(AuditStep {
description: check.name.clone(),
outcome: match status {
Ok(s) if s.success() => Ok(()),
_ => Err(AuditError::KernelIncompatible(check.fail_help.clone())),
}
});
}
// 2. Conflict checks (Critical only)
for conflict_id in &self.fact_sheet.active_conflicts {
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
if conflict.severity == "Critical" {
steps.push(AuditStep {
description: format!("Conflict: {}", conflict.id),
outcome: Err(AuditError::ToolMissing(conflict.help_text.clone())),
});
}
}
}
Box::new(steps.into_iter())
}
}
impl SensorBus for GenericLinuxSal {
fn get_temp(&self) -> Result<f32> {
let path = self.fact_sheet.temp_path.as_ref()
.ok_or_else(|| anyhow!("No temperature sensor path found"))?;
let content = self.read_sysfs_timeout(path, Duration::from_millis(200))?;
let milli_celsius: f32 = content.parse()?;
Ok(milli_celsius / 1000.0)
}
fn get_power_w(&self) -> Result<f32> {
let rapl_path = self.fact_sheet.rapl_paths.first()
.ok_or_else(|| anyhow!("No RAPL path found"))?;
let energy_path = rapl_path.join("energy_uj");
let e1: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?;
let t1 = Instant::now();
thread::sleep(Duration::from_millis(100));
let e2: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?;
let t2 = Instant::now();
let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32();
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
}
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
let mut rpms = Vec::new();
for path in &self.fact_sheet.fan_paths {
if let Ok(content) = self.read_sysfs_timeout(path, Duration::from_millis(200)) {
if let Ok(rpm) = content.parse() { rpms.push(rpm); }
}
}
Ok(rpms)
}
fn get_freq_mhz(&self) -> Result<f32> {
let path = Path::new("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
if path.exists() {
let khz: f32 = self.read_sysfs_timeout(path, Duration::from_millis(200))?.parse()?;
Ok(khz / 1000.0)
} else {
// Fallback: parse /proc/cpuinfo
let cpuinfo = fs::read_to_string("/proc/cpuinfo")?;
for line in cpuinfo.lines() {
if line.starts_with("cpu MHz") {
if let Some((_, mhz)) = line.split_once(':') {
return Ok(mhz.trim().parse()?);
}
}
}
Err(anyhow!("Could not determine CPU frequency"))
}
}
}
impl ActuatorBus for GenericLinuxSal {
fn set_fan_mode(&self, mode: &str) -> Result<()> {
if self.is_dell() {
let cmd = match mode {
"manual" | "max" => self.db.ecosystems.get("dell").and_then(|e| e.fan_manual_mode_cmd.as_ref()),
"auto" => self.db.ecosystems.get("dell").and_then(|e| e.fan_auto_mode_cmd.as_ref()),
_ => return Err(anyhow!("Unsupported fan mode: {}", mode)),
};
if let Some(cmd_str) = cmd {
let parts: Vec<&str> = cmd_str.split_whitespace().collect();
Command::new(parts[0]).args(&parts[1..]).status()?;
Ok(())
} else { Err(anyhow!("Dell fan command missing in DB")) }
} else {
debug!("Fan control not implemented for non-Dell systems yet");
Ok(())
}
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first()
.ok_or_else(|| anyhow!("No RAPL path found for PL1"))?;
let path = rapl_path.join("constraint_0_power_limit_uw");
fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?;
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first()
.ok_or_else(|| anyhow!("No RAPL path found for PL2"))?;
let path = rapl_path.join("constraint_1_power_limit_uw");
fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?;
Ok(())
}
}
impl EnvironmentGuard for GenericLinuxSal {
fn suppress(&mut self) -> Result<()> {
for conflict_id in &self.fact_sheet.active_conflicts {
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
for service in &conflict.services {
debug!("Stopping service: {}", service);
if Command::new("systemctl").arg("stop").arg(service).status()?.success() {
self.suppressed_services.push(service.clone());
}
}
}
}
Ok(())
}
fn restore(&mut self) -> Result<()> {
for service in self.suppressed_services.drain(..) {
debug!("Starting service: {}", service);
let _ = Command::new("systemctl").arg("start").arg(service).status();
}
if self.is_dell() {
let _ = self.set_fan_mode("auto");
}
Ok(())
}
}
impl HardwareWatchdog for GenericLinuxSal {
fn check_emergency(&self) -> Result<bool> {
if let Ok(temp) = self.get_temp() {
if temp > 100.0 {
return Ok(true);
}
}
Ok(false)
}
}
impl Drop for GenericLinuxSal {
fn drop(&mut self) {
let _ = self.restore();
}
}

View File

@@ -0,0 +1,185 @@
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
use std::thread;
use std::sync::mpsc;
use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict};
use tracing::{debug, warn};
/// Strongly-typed findings about the current system.
#[derive(Debug, Clone, Default)]
pub struct SystemFactSheet {
pub vendor: String,
pub model: String,
pub temp_path: Option<PathBuf>,
pub fan_paths: Vec<PathBuf>,
pub rapl_paths: Vec<PathBuf>,
pub active_conflicts: Vec<String>, // List of conflict IDs found active
}
/// Probes the system for hardware sensors, actuators, and service conflicts.
pub fn discover_facts(
sensors: &SensorDiscovery,
actuators: &ActuatorDiscovery,
conflicts: &[Conflict]
) -> SystemFactSheet {
let (vendor, model) = read_dmi_info();
debug!("DMI Identity: Vendor='{}', Model='{}'", vendor, model);
let (temp_path, fan_paths) = discover_hwmon(sensors);
let rapl_paths = discover_rapl(actuators);
let mut active_conflicts = Vec::new();
for conflict in conflicts {
for service in &conflict.services {
if is_service_active(service) {
debug!("Detected active conflict: {} (Service: {})", conflict.id, service);
active_conflicts.push(conflict.id.clone());
break; // Found one service in this conflict, move to next conflict
}
}
}
SystemFactSheet {
vendor,
model,
temp_path,
fan_paths,
rapl_paths,
active_conflicts,
}
}
/// Reads DMI information from sysfs with a safety timeout.
fn read_dmi_info() -> (String, String) {
let vendor = read_sysfs_with_timeout(Path::new("/sys/class/dmi/id/sys_vendor"), Duration::from_millis(100))
.unwrap_or_else(|| "Unknown".to_string());
let model = read_sysfs_with_timeout(Path::new("/sys/class/dmi/id/product_name"), Duration::from_millis(100))
.unwrap_or_else(|| "Unknown".to_string());
(vendor, model)
}
/// Discovers hwmon sensors by matching labels and prioritizing drivers.
fn discover_hwmon(cfg: &SensorDiscovery) -> (Option<PathBuf>, Vec<PathBuf>) {
let mut temp_candidates = Vec::new();
let mut fan_candidates = Vec::new();
let hwmon_base = Path::new("/sys/class/hwmon");
let entries = match fs::read_dir(hwmon_base) {
Ok(e) => e,
Err(e) => {
warn!("Could not read /sys/class/hwmon: {}", e);
return (None, Vec::new());
}
};
for entry in entries.flatten() {
let hwmon_path = entry.path();
let driver_name = read_sysfs_with_timeout(&hwmon_path.join("name"), Duration::from_millis(100))
.unwrap_or_default();
let priority = cfg.hwmon_priority
.iter()
.position(|p| p == &driver_name)
.unwrap_or(usize::MAX);
if let Ok(hw_entries) = fs::read_dir(&hwmon_path) {
for hw_entry in hw_entries.flatten() {
let file_name = hw_entry.file_name().into_string().unwrap_or_default();
// Temperature Sensors
if file_name.starts_with("temp") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.temp_labels.iter().any(|l| label.contains(l)) {
let input_path = hwmon_path.join(file_name.replace("_label", "_input"));
if input_path.exists() {
temp_candidates.push((priority, input_path));
}
}
}
}
// Fan Sensors
if file_name.starts_with("fan") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.fan_labels.iter().any(|l| label.contains(l)) {
let input_path = hwmon_path.join(file_name.replace("_label", "_input"));
if input_path.exists() {
fan_candidates.push((priority, input_path));
}
}
}
}
}
}
}
temp_candidates.sort_by_key(|(p, _)| *p);
fan_candidates.sort_by_key(|(p, _)| *p);
let best_temp = temp_candidates.first().map(|(_, p)| p.clone());
let best_fans = fan_candidates.into_iter().map(|(_, p)| p).collect();
(best_temp, best_fans)
}
/// Discovers RAPL powercap paths.
fn discover_rapl(cfg: &ActuatorDiscovery) -> Vec<PathBuf> {
let mut paths = Vec::new();
let powercap_base = Path::new("/sys/class/powercap");
let entries = match fs::read_dir(powercap_base) {
Ok(e) => e,
Err(_) => return Vec::new(),
};
for entry in entries.flatten() {
let path = entry.path();
let dir_name = entry.file_name().into_string().unwrap_or_default();
if cfg.rapl_paths.contains(&dir_name) {
paths.push(path);
continue;
}
if let Some(name) = read_sysfs_with_timeout(&path.join("name"), Duration::from_millis(100)) {
if cfg.rapl_paths.iter().any(|p| p == &name) {
paths.push(path);
}
}
}
paths
}
/// Checks if a systemd service is currently active.
pub fn is_service_active(service: &str) -> bool {
let status = Command::new("systemctl")
.arg("is-active")
.arg("--quiet")
.arg(service)
.status();
match status {
Ok(s) => s.success(),
Err(_) => false,
}
}
/// Helper to read a sysfs file with a timeout.
fn read_sysfs_with_timeout(path: &Path, timeout: Duration) -> Option<String> {
let (tx, rx) = mpsc::channel();
let path_buf = path.to_path_buf();
thread::spawn(move || {
let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string());
let _ = tx.send(res);
});
match rx.recv_timeout(timeout) {
Ok(Ok(content)) => Some(content),
_ => None,
}
}

View File

@@ -0,0 +1,60 @@
use miette::{Result, IntoDiagnostic, Context};
use std::fs;
use regex::Regex;
use tracing::{info, debug};
use crate::sal::traits::PlatformSal;
use crate::sal::dell_xps_9380::DellXps9380Sal;
use crate::sal::generic_linux::GenericLinuxSal;
use crate::sal::heuristic::schema::HardwareDb;
use crate::sal::heuristic::discovery::{discover_facts};
pub struct HeuristicEngine;
impl HeuristicEngine {
/// Loads the hardware database, probes the system, and builds the appropriate SAL.
pub fn detect_and_build() -> Result<Box<dyn PlatformSal>> {
// 1. Load Hardware DB
let db_path = "assets/hardware_db.toml";
let db_content = fs::read_to_string(db_path)
.into_diagnostic()
.with_context(|| format!("Failed to read hardware database at {}", db_path))?;
let db: HardwareDb = toml::from_str(&db_content)
.into_diagnostic()
.context("Failed to parse hardware_db.toml")?;
// 2. Discover Facts
let facts = discover_facts(&db.discovery.sensors, &db.discovery.actuators, &db.conflicts);
info!("System Identity: {} {}", facts.vendor, facts.model);
// 3. Routing Logic
// --- Special Case: Dell XPS 13 9380 ---
if is_match(&facts.vendor, "(?i)Dell.*") && is_match(&facts.model, "(?i)XPS.*13.*9380.*") {
info!("Specialized SAL Match Found: Dell XPS 13 9380");
let sal = DellXps9380Sal::init().map_err(|e| miette::miette!(e))?;
return Ok(Box::new(sal));
}
// --- Fallback: Generic Linux SAL ---
debug!("No specialized SAL match. Falling back to GenericLinuxSal with DB quirks.");
// Validation: Ensure we found at least a temperature sensor if required
if facts.temp_path.is_none() {
return Err(miette::miette!("No temperature sensor discovered. Generic fallback impossible."));
}
if facts.rapl_paths.is_empty() {
return Err(miette::miette!("No RAPL power interface discovered. Generic fallback impossible."));
}
Ok(Box::new(GenericLinuxSal::new(facts, db)))
}
}
fn is_match(input: &str, pattern: &str) -> bool {
if let Ok(re) = Regex::new(pattern) {
re.is_match(input)
} else {
false
}
}

3
src/sal/heuristic/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod schema;
pub mod discovery;
pub mod engine;

View File

@@ -0,0 +1,90 @@
use serde::Deserialize;
use std::collections::HashMap;
#[derive(Debug, Deserialize, Clone)]
pub struct HardwareDb {
pub metadata: Metadata,
pub conflicts: Vec<Conflict>,
pub ecosystems: HashMap<String, Ecosystem>,
pub quirks: Vec<Quirk>,
pub discovery: Discovery,
pub preflight_checks: Vec<PreflightCheck>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Metadata {
pub version: String,
pub updated: String,
pub description: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Conflict {
pub id: String,
pub services: Vec<String>,
pub contention: String,
pub severity: String,
pub fix_action: String,
pub help_text: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Ecosystem {
pub vendor_regex: String,
pub polling_cap_ms: Option<u64>,
pub drivers: Option<Vec<String>>,
pub fan_manual_mode_cmd: Option<String>,
pub fan_auto_mode_cmd: Option<String>,
pub safety_register: Option<String>,
pub lap_mode_path: Option<String>,
pub profiles_path: Option<String>,
pub ec_write_required: Option<bool>,
pub thermal_policy_path: Option<String>,
pub policy_map: Option<HashMap<String, i32>>,
pub msr_lock_register: Option<String>,
pub msr_lock_bit: Option<u32>,
pub fan_boost_path: Option<String>,
pub ec_tool: Option<String>,
pub optimization: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Quirk {
pub model_regex: String,
pub id: String,
pub issue: String,
pub action: String,
pub monitor_msr: Option<String>,
pub reset_bit: Option<u32>,
pub trigger_path: Option<String>,
pub trigger_value: Option<String>,
pub target_path: Option<String>,
pub format: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Discovery {
pub sensors: SensorDiscovery,
pub actuators: ActuatorDiscovery,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SensorDiscovery {
pub temp_labels: Vec<String>,
pub fan_labels: Vec<String>,
pub hwmon_priority: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ActuatorDiscovery {
pub rapl_paths: Vec<String>,
pub amd_energy_paths: Vec<String>,
pub governor_files: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct PreflightCheck {
pub name: String,
pub check_cmd: String,
pub fail_help: String,
}

View File

@@ -1,8 +1,15 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep};
use anyhow::Result;
pub struct MockAuditor;
impl PreflightAuditor for MockAuditor {
pub struct MockSal;
impl MockSal {
pub fn new() -> Self {
Self
}
}
impl PreflightAuditor for MockSal {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let steps = vec![
AuditStep {
@@ -18,32 +25,16 @@ impl PreflightAuditor for MockAuditor {
}
}
pub struct MockGuard {
pub suppressed: bool,
}
impl MockGuard {
pub fn new() -> Self {
Self { suppressed: false }
}
}
impl EnvironmentGuard for MockGuard {
impl EnvironmentGuard for MockSal {
fn suppress(&mut self) -> Result<()> {
self.suppressed = true;
Ok(())
}
fn restore(&mut self) -> Result<()> {
self.suppressed = false;
Ok(())
}
}
impl Drop for MockGuard {
fn drop(&mut self) {
let _ = self.restore();
}
}
pub struct MockSensorBus;
impl SensorBus for MockSensorBus {
impl SensorBus for MockSal {
fn get_temp(&self) -> Result<f32> {
Ok(42.0)
}
@@ -58,8 +49,7 @@ impl SensorBus for MockSensorBus {
}
}
pub struct MockActuatorBus;
impl ActuatorBus for MockActuatorBus {
impl ActuatorBus for MockSal {
fn set_fan_mode(&self, _mode: &str) -> Result<()> {
Ok(())
}
@@ -71,8 +61,7 @@ impl ActuatorBus for MockActuatorBus {
}
}
pub struct MockWatchdog;
impl HardwareWatchdog for MockWatchdog {
impl HardwareWatchdog for MockSal {
fn check_emergency(&self) -> Result<bool> {
Ok(false)
}

View File

@@ -1,3 +1,5 @@
pub mod traits;
pub mod mock;
pub mod dell_xps_9380;
pub mod generic_linux;
pub mod heuristic;

View File

@@ -48,7 +48,7 @@ impl<T: PreflightAuditor + ?Sized> PreflightAuditor for Arc<T> {
}
/// Suppresses conflicting daemons (tlp, thermald).
pub trait EnvironmentGuard {
pub trait EnvironmentGuard: Send + Sync {
fn suppress(&mut self) -> Result<()>;
fn restore(&mut self) -> Result<()>;
}
@@ -77,7 +77,7 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
}
/// Write-only interface for hardware commands.
pub trait ActuatorBus {
pub trait ActuatorBus: Send + Sync {
fn set_fan_mode(&self, mode: &str) -> Result<()>;
fn set_sustained_power_limit(&self, watts: f32) -> Result<()>;
fn set_burst_power_limit(&self, watts: f32) -> Result<()>;
@@ -96,7 +96,7 @@ impl<T: ActuatorBus + ?Sized> ActuatorBus for Arc<T> {
}
/// Concurrent monitor for catastrophic states.
pub trait HardwareWatchdog {
pub trait HardwareWatchdog: Send + Sync {
fn check_emergency(&self) -> Result<bool>;
}
@@ -105,3 +105,8 @@ impl<T: HardwareWatchdog + ?Sized> HardwareWatchdog for Arc<T> {
(**self).check_emergency()
}
}
/// Aggregate trait for a complete platform implementation.
pub trait PlatformSal: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog {}
impl<T: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog + ?Sized> PlatformSal for T {}