diff --git a/Cargo.lock b/Cargo.lock index 5f2e97d..aca5993 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -526,10 +526,12 @@ dependencies = [ "num_cpus", "owo-colors", "ratatui", + "regex", "serde", "serde_json", "sysinfo", "thiserror 2.0.18", + "toml", "tracing", "tracing-appender", "tracing-subscriber", @@ -1534,6 +1536,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_spanned" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" +dependencies = [ + "serde_core", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1852,6 +1863,45 @@ dependencies = [ "time-core", ] +[[package]] +name = "toml" +version = "1.0.3+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "1.0.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_parser" +version = "1.0.9+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.6+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" + [[package]] name = "tracing" version = "0.1.44" @@ -2492,6 +2542,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 85794ca..40147dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,3 +28,5 @@ tracing-appender = "0.2" sysinfo = "0.38" libc = "0.2" num_cpus = "1.17" +toml = "1.0.3" +regex = "1.12.3" diff --git a/src/main.rs b/src/main.rs index c7dd833..ab30b7b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,9 +27,9 @@ use ratatui::{backend::CrosstermBackend, Terminal}; use cli::Cli; use mediator::{TelemetryState, UiCommand, BenchmarkPhase}; -use sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError}; -use sal::mock::{MockAuditor, MockGuard, MockSensorBus, MockActuatorBus, MockWatchdog}; -use sal::dell_xps_9380::DellXps9380Sal; +use sal::traits::{AuditError, PlatformSal}; +use sal::mock::MockSal; +use sal::heuristic::engine::HeuristicEngine; use load::StressNg; use orchestrator::BenchmarkOrchestrator; use ui::dashboard::{draw_dashboard, DashboardState}; @@ -107,20 +107,17 @@ fn main() -> Result<()> { info!("ember-tune starting with args: {:?}", args); - // 2. Pre-flight Audit (Before TUI) - let auditor: Arc = if args.mock { - Arc::new(MockAuditor) + // 2. Platform Detection & Audit + let sal: Box = if args.mock { + Box::new(MockSal::new()) } else { - match DellXps9380Sal::init() { - Ok(sal) => Arc::new(sal), - Err(e) => return Err(miette::miette!("Failed to initialize Dell SAL: {}", e)), - } + HeuristicEngine::detect_and_build()? }; println!("{}", console::style("─── Pre-flight System Audit ───").bold().cyan()); let mut audit_failures = Vec::new(); - for step in auditor.audit() { + for step in sal.audit() { print!(" Checking {:<40} ", step.description); io::Write::flush(&mut io::stdout()).into_diagnostic()?; @@ -151,8 +148,9 @@ fn main() -> Result<()> { enable_raw_mode().into_diagnostic()?; let mut stdout = io::stdout(); execute!(stdout, EnterAlternateScreen).into_diagnostic()?; - let backend = CrosstermBackend::new(stdout); - let mut terminal = Terminal::new(backend).into_diagnostic()?; + let backend_stdout = io::stdout(); + let backend_term = CrosstermBackend::new(backend_stdout); + let mut terminal = Terminal::new(backend_term).into_diagnostic()?; // 4. State & Communication Setup let running = Arc::new(AtomicBool::new(true)); @@ -166,40 +164,11 @@ fn main() -> Result<()> { }).expect("Error setting Ctrl-C handler"); // 5. Spawn Backend Orchestrator - let is_mock = args.mock; - let b_auditor = auditor.clone(); let backend_handle = thread::spawn(move || { - let (guard, sensors, actuators, watchdog): ( - Box, - Box, - Box, - Box, - ) = if is_mock { - ( - Box::new(MockGuard::new()), - Box::new(MockSensorBus), - Box::new(MockActuatorBus), - Box::new(MockWatchdog), - ) - } else { - // Re-init or share the SAL - let sal = Arc::new(DellXps9380Sal::init().expect("Failed to init Dell SAL in backend")); - ( - Box::new(sal::dell_xps_9380::DellXps9380Guard::new()), - Box::new(sal.clone() as Arc), - Box::new(sal.clone() as Arc), - Box::new(sal as Arc), - ) - }; - let workload = Box::new(StressNg::new()); let mut orchestrator = BenchmarkOrchestrator::new( - Box::new(b_auditor), - guard, - sensors, - actuators, - watchdog, + sal, workload, telemetry_tx, command_rx, @@ -286,7 +255,7 @@ fn main() -> Result<()> { } Ok(Err(e)) => { if e.to_string() == "ABORTED" { - println!("{}", "Benchmark aborted by user. No summary available.".yellow()); + println!("{}", "Benchmark aborted by user.".yellow()); } else { error!("Orchestrator encountered error: {}", e); eprintln!("{} {}", "Error:".red().bold(), e); diff --git a/src/orchestrator/mod.rs b/src/orchestrator/mod.rs index 61bead3..b4b7b73 100644 --- a/src/orchestrator/mod.rs +++ b/src/orchestrator/mod.rs @@ -5,17 +5,13 @@ use std::thread; use std::collections::VecDeque; use sysinfo::System; -use crate::sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog}; +use crate::sal::traits::{PlatformSal}; use crate::load::Workload; use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase}; use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult}; pub struct BenchmarkOrchestrator { - auditor: Box, - guard: Box, - sensors: Box, - actuators: Box, - watchdog: Box, + sal: Box, workload: Box, telemetry_tx: mpsc::Sender, command_rx: mpsc::Receiver, @@ -35,11 +31,7 @@ pub struct BenchmarkOrchestrator { impl BenchmarkOrchestrator { pub fn new( - auditor: Box, - guard: Box, - sensors: Box, - actuators: Box, - watchdog: Box, + sal: Box, workload: Box, telemetry_tx: mpsc::Sender, command_rx: mpsc::Receiver, @@ -53,11 +45,7 @@ impl BenchmarkOrchestrator { let total_ram_gb = sys.total_memory() / 1024 / 1024 / 1024; Self { - auditor, - guard, - sensors, - actuators, - watchdog, + sal, workload, telemetry_tx, command_rx, @@ -77,19 +65,19 @@ impl BenchmarkOrchestrator { // Phase 1: Audit & Baseline self.phase = BenchmarkPhase::Auditing; - for step in self.auditor.audit() { + for step in self.sal.audit() { if let Err(e) = step.outcome { return Err(anyhow::anyhow!("Audit failed ({}): {:?}", step.description, e)); } } self.log("Suppressing background services (tlp, thermald)...")?; - self.guard.suppress().context("Failed to suppress background services")?; + self.sal.suppress().context("Failed to suppress background services")?; // Baseline (Idle Calibration) self.phase = BenchmarkPhase::IdleCalibration; self.log("Phase 1: Recording Idle Baseline (10s)...")?; - self.actuators.set_fan_mode("auto")?; // Use auto for idle + self.sal.set_fan_mode("auto")?; // Use auto for idle let mut idle_temps = Vec::new(); let start = Instant::now(); @@ -97,7 +85,7 @@ impl BenchmarkOrchestrator { while start.elapsed() < Duration::from_secs(10) { self.check_abort()?; self.send_telemetry(tick)?; - idle_temps.push(self.sensors.get_temp().unwrap_or(0.0)); + idle_temps.push(self.sal.get_temp().unwrap_or(0.0)); tick += 1; thread::sleep(Duration::from_millis(500)); } @@ -107,13 +95,13 @@ impl BenchmarkOrchestrator { // Phase 2: Stress Stepping self.phase = BenchmarkPhase::StressTesting; self.log("Phase 2: Starting Synthetic Stress Matrix.")?; - self.actuators.set_fan_mode("max")?; // Lock fans for consistent resistance + self.sal.set_fan_mode("max")?; // Lock fans for consistent resistance let power_steps = [15.0, 20.0, 25.0, 30.0, 35.0]; for &pl in &power_steps { self.log(&format!("Testing PL1 = {:.0}W...", pl))?; - self.actuators.set_sustained_power_limit(pl)?; - self.actuators.set_burst_power_limit(pl + 5.0)?; + self.sal.set_sustained_power_limit(pl)?; + self.sal.set_burst_power_limit(pl + 5.0)?; self.workload.start(num_cpus::get(), 100)?; @@ -123,13 +111,13 @@ impl BenchmarkOrchestrator { while step_start.elapsed() < Duration::from_secs(45) { self.check_abort()?; - if self.watchdog.check_emergency()? { + if self.sal.check_emergency()? { self.log("⚠ EMERGENCY ABORT: Watchdog triggered!")?; self.workload.stop()?; return Err(anyhow::anyhow!("Hardware Watchdog Triggered")); } - let t = self.sensors.get_temp().unwrap_or(0.0); + let t = self.sal.get_temp().unwrap_or(0.0); step_temps.push_back(t); if step_temps.len() > 10 { step_temps.pop_front(); } @@ -149,10 +137,10 @@ impl BenchmarkOrchestrator { } // Record data point - let avg_p = self.sensors.get_power_w().unwrap_or(0.0); - let avg_t = self.sensors.get_temp().unwrap_or(0.0); - let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0); - let fans = self.sensors.get_fan_rpms().unwrap_or_default(); + let avg_p = self.sal.get_power_w().unwrap_or(0.0); + let avg_t = self.sal.get_temp().unwrap_or(0.0); + let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0); + let fans = self.sal.get_fan_rpms().unwrap_or_default(); let primary_fan = fans.first().cloned().unwrap_or(0); let tp = self.workload.get_throughput().unwrap_or(0.0); @@ -210,7 +198,7 @@ impl BenchmarkOrchestrator { std::fs::write("i8kmon.conf", i8k_content)?; self.log("✓ Saved 'i8kmon.conf'.")?; - self.guard.restore()?; + self.sal.restore()?; self.log("✓ Environment restored.")?; Ok(res) @@ -248,10 +236,10 @@ impl BenchmarkOrchestrator { cpu_model: self.cpu_model.clone(), total_ram_gb: self.total_ram_gb, tick: 0, - cpu_temp: self.sensors.get_temp().unwrap_or(0.0), - power_w: self.sensors.get_power_w().unwrap_or(0.0), - current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0), - fans: self.sensors.get_fan_rpms().unwrap_or_default(), + cpu_temp: self.sal.get_temp().unwrap_or(0.0), + power_w: self.sal.get_power_w().unwrap_or(0.0), + current_freq: self.sal.get_freq_mhz().unwrap_or(0.0), + fans: self.sal.get_fan_rpms().unwrap_or_default(), governor: "unknown".to_string(), pl1_limit: 0.0, pl2_limit: 0.0, @@ -267,9 +255,9 @@ impl BenchmarkOrchestrator { } fn send_telemetry(&mut self, tick: u64) -> Result<()> { - let temp = self.sensors.get_temp().unwrap_or(0.0); - let pwr = self.sensors.get_power_w().unwrap_or(0.0); - let freq = self.sensors.get_freq_mhz().unwrap_or(0.0); + let temp = self.sal.get_temp().unwrap_or(0.0); + let pwr = self.sal.get_power_w().unwrap_or(0.0); + let freq = self.sal.get_freq_mhz().unwrap_or(0.0); self.history_temp.push_back(temp); self.history_watts.push_back(pwr); @@ -288,7 +276,7 @@ impl BenchmarkOrchestrator { cpu_temp: temp, power_w: pwr, current_freq: freq, - fans: self.sensors.get_fan_rpms().unwrap_or_default(), + fans: self.sal.get_fan_rpms().unwrap_or_default(), governor: "performance".to_string(), pl1_limit: 15.0, pl2_limit: 25.0, diff --git a/src/sal/dell_xps_9380.rs b/src/sal/dell_xps_9380.rs index 75d747e..e8f7fc6 100644 --- a/src/sal/dell_xps_9380.rs +++ b/src/sal/dell_xps_9380.rs @@ -17,6 +17,7 @@ pub struct DellXps9380Sal { last_poll: Mutex, last_temp: Mutex, last_fans: Mutex>, + suppressed_services: Mutex>, } impl DellXps9380Sal { @@ -82,6 +83,7 @@ impl DellXps9380Sal { last_poll: Mutex::new(Instant::now() - Duration::from_secs(2)), last_temp: Mutex::new(0.0), last_fans: Mutex::new(Vec::new()), + suppressed_services: Mutex::new(Vec::new()), }) } } @@ -151,44 +153,36 @@ impl PreflightAuditor for DellXps9380Sal { } } -pub struct DellXps9380Guard { - stopped_services: Vec, -} - -impl DellXps9380Guard { - pub fn new() -> Self { - Self { stopped_services: Vec::new() } - } -} - -impl EnvironmentGuard for DellXps9380Guard { +impl EnvironmentGuard for DellXps9380Sal { fn suppress(&mut self) -> Result<()> { let services = ["tlp", "thermald", "i8kmon"]; + let mut suppressed = self.suppressed_services.lock().unwrap(); for s in services { if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() { debug!("Suppressing service: {}", s); Command::new("systemctl").args(["stop", s]).status()?; - self.stopped_services.push(s.to_string()); + suppressed.push(s.to_string()); } } Ok(()) } fn restore(&mut self) -> Result<()> { - for s in &self.stopped_services { - let _ = Command::new("systemctl").args(["start", s]).status(); + let mut suppressed = self.suppressed_services.lock().unwrap(); + for s in suppressed.drain(..) { + let _ = Command::new("systemctl").args(["start", &s]).status(); } - self.stopped_services.clear(); Ok(()) } } -impl Drop for DellXps9380Guard { +impl Drop for DellXps9380Sal { fn drop(&mut self) { let _ = self.restore(); } } + impl SensorBus for DellXps9380Sal { fn get_temp(&self) -> Result { // Enforce 1000ms rate limit for Dell SMM as per GEMINI.md diff --git a/src/sal/generic_linux.rs b/src/sal/generic_linux.rs new file mode 100644 index 0000000..a9527be --- /dev/null +++ b/src/sal/generic_linux.rs @@ -0,0 +1,218 @@ +use anyhow::{Result, anyhow}; +use std::path::Path; +use std::fs; +use std::time::{Duration, Instant}; +use std::thread; +use std::process::Command; +use tracing::{debug}; +use std::sync::mpsc; + +use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError}; +use crate::sal::heuristic::discovery::SystemFactSheet; +use crate::sal::heuristic::schema::HardwareDb; + +pub struct GenericLinuxSal { + fact_sheet: SystemFactSheet, + db: HardwareDb, + suppressed_services: Vec, +} + +impl GenericLinuxSal { + pub fn new(fact_sheet: SystemFactSheet, db: HardwareDb) -> Self { + Self { + fact_sheet, + db, + suppressed_services: Vec::new(), + } + } + + fn is_dell(&self) -> bool { + self.fact_sheet.vendor.to_lowercase().contains("dell") + } + + fn read_sysfs_timeout(&self, path: &Path, timeout: Duration) -> Result { + let (tx, rx) = mpsc::channel(); + let path_buf = path.to_path_buf(); + + thread::spawn(move || { + let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string()); + let _ = tx.send(res); + }); + + match rx.recv_timeout(timeout) { + Ok(res) => res.map_err(|e| anyhow!("Failed to read sysfs: {}", e)), + Err(_) => Err(anyhow!("Timeout reading sysfs path: {:?}", path)), + } + } +} + +impl PreflightAuditor for GenericLinuxSal { + fn audit(&self) -> Box + '_> { + let mut steps = Vec::new(); + + // 1. Static DB checks + for check in &self.db.preflight_checks { + let status = Command::new("sh") + .arg("-c") + .arg(&check.check_cmd) + .status(); + + steps.push(AuditStep { + description: check.name.clone(), + outcome: match status { + Ok(s) if s.success() => Ok(()), + _ => Err(AuditError::KernelIncompatible(check.fail_help.clone())), + } + }); + } + + // 2. Conflict checks (Critical only) + for conflict_id in &self.fact_sheet.active_conflicts { + if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) { + if conflict.severity == "Critical" { + steps.push(AuditStep { + description: format!("Conflict: {}", conflict.id), + outcome: Err(AuditError::ToolMissing(conflict.help_text.clone())), + }); + } + } + } + + Box::new(steps.into_iter()) + } +} + +impl SensorBus for GenericLinuxSal { + fn get_temp(&self) -> Result { + let path = self.fact_sheet.temp_path.as_ref() + .ok_or_else(|| anyhow!("No temperature sensor path found"))?; + let content = self.read_sysfs_timeout(path, Duration::from_millis(200))?; + let milli_celsius: f32 = content.parse()?; + Ok(milli_celsius / 1000.0) + } + + fn get_power_w(&self) -> Result { + let rapl_path = self.fact_sheet.rapl_paths.first() + .ok_or_else(|| anyhow!("No RAPL path found"))?; + let energy_path = rapl_path.join("energy_uj"); + + let e1: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?; + let t1 = Instant::now(); + thread::sleep(Duration::from_millis(100)); + let e2: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?; + let t2 = Instant::now(); + + let delta_e = e2.wrapping_sub(e1); + let delta_t = t2.duration_since(t1).as_secs_f32(); + Ok((delta_e as f32 / 1_000_000.0) / delta_t) + } + + fn get_fan_rpms(&self) -> Result> { + let mut rpms = Vec::new(); + for path in &self.fact_sheet.fan_paths { + if let Ok(content) = self.read_sysfs_timeout(path, Duration::from_millis(200)) { + if let Ok(rpm) = content.parse() { rpms.push(rpm); } + } + } + Ok(rpms) + } + + fn get_freq_mhz(&self) -> Result { + let path = Path::new("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq"); + if path.exists() { + let khz: f32 = self.read_sysfs_timeout(path, Duration::from_millis(200))?.parse()?; + Ok(khz / 1000.0) + } else { + // Fallback: parse /proc/cpuinfo + let cpuinfo = fs::read_to_string("/proc/cpuinfo")?; + for line in cpuinfo.lines() { + if line.starts_with("cpu MHz") { + if let Some((_, mhz)) = line.split_once(':') { + return Ok(mhz.trim().parse()?); + } + } + } + Err(anyhow!("Could not determine CPU frequency")) + } + } +} + +impl ActuatorBus for GenericLinuxSal { + fn set_fan_mode(&self, mode: &str) -> Result<()> { + if self.is_dell() { + let cmd = match mode { + "manual" | "max" => self.db.ecosystems.get("dell").and_then(|e| e.fan_manual_mode_cmd.as_ref()), + "auto" => self.db.ecosystems.get("dell").and_then(|e| e.fan_auto_mode_cmd.as_ref()), + _ => return Err(anyhow!("Unsupported fan mode: {}", mode)), + }; + if let Some(cmd_str) = cmd { + let parts: Vec<&str> = cmd_str.split_whitespace().collect(); + Command::new(parts[0]).args(&parts[1..]).status()?; + Ok(()) + } else { Err(anyhow!("Dell fan command missing in DB")) } + } else { + debug!("Fan control not implemented for non-Dell systems yet"); + Ok(()) + } + } + + fn set_sustained_power_limit(&self, watts: f32) -> Result<()> { + let rapl_path = self.fact_sheet.rapl_paths.first() + .ok_or_else(|| anyhow!("No RAPL path found for PL1"))?; + let path = rapl_path.join("constraint_0_power_limit_uw"); + fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?; + Ok(()) + } + + fn set_burst_power_limit(&self, watts: f32) -> Result<()> { + let rapl_path = self.fact_sheet.rapl_paths.first() + .ok_or_else(|| anyhow!("No RAPL path found for PL2"))?; + let path = rapl_path.join("constraint_1_power_limit_uw"); + fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?; + Ok(()) + } +} + +impl EnvironmentGuard for GenericLinuxSal { + fn suppress(&mut self) -> Result<()> { + for conflict_id in &self.fact_sheet.active_conflicts { + if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) { + for service in &conflict.services { + debug!("Stopping service: {}", service); + if Command::new("systemctl").arg("stop").arg(service).status()?.success() { + self.suppressed_services.push(service.clone()); + } + } + } + } + Ok(()) + } + + fn restore(&mut self) -> Result<()> { + for service in self.suppressed_services.drain(..) { + debug!("Starting service: {}", service); + let _ = Command::new("systemctl").arg("start").arg(service).status(); + } + if self.is_dell() { + let _ = self.set_fan_mode("auto"); + } + Ok(()) + } +} + +impl HardwareWatchdog for GenericLinuxSal { + fn check_emergency(&self) -> Result { + if let Ok(temp) = self.get_temp() { + if temp > 100.0 { + return Ok(true); + } + } + Ok(false) + } +} + +impl Drop for GenericLinuxSal { + fn drop(&mut self) { + let _ = self.restore(); + } +} diff --git a/src/sal/heuristic/discovery.rs b/src/sal/heuristic/discovery.rs new file mode 100644 index 0000000..a4f894a --- /dev/null +++ b/src/sal/heuristic/discovery.rs @@ -0,0 +1,185 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; +use std::thread; +use std::sync::mpsc; +use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict}; +use tracing::{debug, warn}; + +/// Strongly-typed findings about the current system. +#[derive(Debug, Clone, Default)] +pub struct SystemFactSheet { + pub vendor: String, + pub model: String, + pub temp_path: Option, + pub fan_paths: Vec, + pub rapl_paths: Vec, + pub active_conflicts: Vec, // List of conflict IDs found active +} + +/// Probes the system for hardware sensors, actuators, and service conflicts. +pub fn discover_facts( + sensors: &SensorDiscovery, + actuators: &ActuatorDiscovery, + conflicts: &[Conflict] +) -> SystemFactSheet { + let (vendor, model) = read_dmi_info(); + + debug!("DMI Identity: Vendor='{}', Model='{}'", vendor, model); + + let (temp_path, fan_paths) = discover_hwmon(sensors); + let rapl_paths = discover_rapl(actuators); + + let mut active_conflicts = Vec::new(); + for conflict in conflicts { + for service in &conflict.services { + if is_service_active(service) { + debug!("Detected active conflict: {} (Service: {})", conflict.id, service); + active_conflicts.push(conflict.id.clone()); + break; // Found one service in this conflict, move to next conflict + } + } + } + + SystemFactSheet { + vendor, + model, + temp_path, + fan_paths, + rapl_paths, + active_conflicts, + } +} + +/// Reads DMI information from sysfs with a safety timeout. +fn read_dmi_info() -> (String, String) { + let vendor = read_sysfs_with_timeout(Path::new("/sys/class/dmi/id/sys_vendor"), Duration::from_millis(100)) + .unwrap_or_else(|| "Unknown".to_string()); + let model = read_sysfs_with_timeout(Path::new("/sys/class/dmi/id/product_name"), Duration::from_millis(100)) + .unwrap_or_else(|| "Unknown".to_string()); + (vendor, model) +} + +/// Discovers hwmon sensors by matching labels and prioritizing drivers. +fn discover_hwmon(cfg: &SensorDiscovery) -> (Option, Vec) { + let mut temp_candidates = Vec::new(); + let mut fan_candidates = Vec::new(); + + let hwmon_base = Path::new("/sys/class/hwmon"); + let entries = match fs::read_dir(hwmon_base) { + Ok(e) => e, + Err(e) => { + warn!("Could not read /sys/class/hwmon: {}", e); + return (None, Vec::new()); + } + }; + + for entry in entries.flatten() { + let hwmon_path = entry.path(); + + let driver_name = read_sysfs_with_timeout(&hwmon_path.join("name"), Duration::from_millis(100)) + .unwrap_or_default(); + + let priority = cfg.hwmon_priority + .iter() + .position(|p| p == &driver_name) + .unwrap_or(usize::MAX); + + if let Ok(hw_entries) = fs::read_dir(&hwmon_path) { + for hw_entry in hw_entries.flatten() { + let file_name = hw_entry.file_name().into_string().unwrap_or_default(); + + // Temperature Sensors + if file_name.starts_with("temp") && file_name.ends_with("_label") { + if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) { + if cfg.temp_labels.iter().any(|l| label.contains(l)) { + let input_path = hwmon_path.join(file_name.replace("_label", "_input")); + if input_path.exists() { + temp_candidates.push((priority, input_path)); + } + } + } + } + + // Fan Sensors + if file_name.starts_with("fan") && file_name.ends_with("_label") { + if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) { + if cfg.fan_labels.iter().any(|l| label.contains(l)) { + let input_path = hwmon_path.join(file_name.replace("_label", "_input")); + if input_path.exists() { + fan_candidates.push((priority, input_path)); + } + } + } + } + } + } + } + + temp_candidates.sort_by_key(|(p, _)| *p); + fan_candidates.sort_by_key(|(p, _)| *p); + + let best_temp = temp_candidates.first().map(|(_, p)| p.clone()); + let best_fans = fan_candidates.into_iter().map(|(_, p)| p).collect(); + + (best_temp, best_fans) +} + +/// Discovers RAPL powercap paths. +fn discover_rapl(cfg: &ActuatorDiscovery) -> Vec { + let mut paths = Vec::new(); + let powercap_base = Path::new("/sys/class/powercap"); + + let entries = match fs::read_dir(powercap_base) { + Ok(e) => e, + Err(_) => return Vec::new(), + }; + + for entry in entries.flatten() { + let path = entry.path(); + let dir_name = entry.file_name().into_string().unwrap_or_default(); + + if cfg.rapl_paths.contains(&dir_name) { + paths.push(path); + continue; + } + + if let Some(name) = read_sysfs_with_timeout(&path.join("name"), Duration::from_millis(100)) { + if cfg.rapl_paths.iter().any(|p| p == &name) { + paths.push(path); + } + } + } + paths +} + +/// Checks if a systemd service is currently active. +pub fn is_service_active(service: &str) -> bool { + let status = Command::new("systemctl") + .arg("is-active") + .arg("--quiet") + .arg(service) + .status(); + + match status { + Ok(s) => s.success(), + Err(_) => false, + } +} + +/// Helper to read a sysfs file with a timeout. +fn read_sysfs_with_timeout(path: &Path, timeout: Duration) -> Option { + let (tx, rx) = mpsc::channel(); + let path_buf = path.to_path_buf(); + + thread::spawn(move || { + let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string()); + let _ = tx.send(res); + }); + + match rx.recv_timeout(timeout) { + Ok(Ok(content)) => Some(content), + _ => None, + } +} diff --git a/src/sal/heuristic/engine.rs b/src/sal/heuristic/engine.rs new file mode 100644 index 0000000..fce728c --- /dev/null +++ b/src/sal/heuristic/engine.rs @@ -0,0 +1,60 @@ +use miette::{Result, IntoDiagnostic, Context}; +use std::fs; +use regex::Regex; +use tracing::{info, debug}; + +use crate::sal::traits::PlatformSal; +use crate::sal::dell_xps_9380::DellXps9380Sal; +use crate::sal::generic_linux::GenericLinuxSal; +use crate::sal::heuristic::schema::HardwareDb; +use crate::sal::heuristic::discovery::{discover_facts}; + +pub struct HeuristicEngine; + +impl HeuristicEngine { + /// Loads the hardware database, probes the system, and builds the appropriate SAL. + pub fn detect_and_build() -> Result> { + // 1. Load Hardware DB + let db_path = "assets/hardware_db.toml"; + let db_content = fs::read_to_string(db_path) + .into_diagnostic() + .with_context(|| format!("Failed to read hardware database at {}", db_path))?; + let db: HardwareDb = toml::from_str(&db_content) + .into_diagnostic() + .context("Failed to parse hardware_db.toml")?; + + // 2. Discover Facts + let facts = discover_facts(&db.discovery.sensors, &db.discovery.actuators, &db.conflicts); + info!("System Identity: {} {}", facts.vendor, facts.model); + + // 3. Routing Logic + + // --- Special Case: Dell XPS 13 9380 --- + if is_match(&facts.vendor, "(?i)Dell.*") && is_match(&facts.model, "(?i)XPS.*13.*9380.*") { + info!("Specialized SAL Match Found: Dell XPS 13 9380"); + let sal = DellXps9380Sal::init().map_err(|e| miette::miette!(e))?; + return Ok(Box::new(sal)); + } + + // --- Fallback: Generic Linux SAL --- + debug!("No specialized SAL match. Falling back to GenericLinuxSal with DB quirks."); + + // Validation: Ensure we found at least a temperature sensor if required + if facts.temp_path.is_none() { + return Err(miette::miette!("No temperature sensor discovered. Generic fallback impossible.")); + } + if facts.rapl_paths.is_empty() { + return Err(miette::miette!("No RAPL power interface discovered. Generic fallback impossible.")); + } + + Ok(Box::new(GenericLinuxSal::new(facts, db))) + } +} + +fn is_match(input: &str, pattern: &str) -> bool { + if let Ok(re) = Regex::new(pattern) { + re.is_match(input) + } else { + false + } +} diff --git a/src/sal/heuristic/mod.rs b/src/sal/heuristic/mod.rs new file mode 100644 index 0000000..75942f8 --- /dev/null +++ b/src/sal/heuristic/mod.rs @@ -0,0 +1,3 @@ +pub mod schema; +pub mod discovery; +pub mod engine; diff --git a/src/sal/heuristic/schema.rs b/src/sal/heuristic/schema.rs new file mode 100644 index 0000000..316e701 --- /dev/null +++ b/src/sal/heuristic/schema.rs @@ -0,0 +1,90 @@ +use serde::Deserialize; +use std::collections::HashMap; + +#[derive(Debug, Deserialize, Clone)] +pub struct HardwareDb { + pub metadata: Metadata, + pub conflicts: Vec, + pub ecosystems: HashMap, + pub quirks: Vec, + pub discovery: Discovery, + pub preflight_checks: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Metadata { + pub version: String, + pub updated: String, + pub description: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Conflict { + pub id: String, + pub services: Vec, + pub contention: String, + pub severity: String, + pub fix_action: String, + pub help_text: String, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Ecosystem { + pub vendor_regex: String, + pub polling_cap_ms: Option, + pub drivers: Option>, + pub fan_manual_mode_cmd: Option, + pub fan_auto_mode_cmd: Option, + pub safety_register: Option, + pub lap_mode_path: Option, + pub profiles_path: Option, + pub ec_write_required: Option, + pub thermal_policy_path: Option, + pub policy_map: Option>, + pub msr_lock_register: Option, + pub msr_lock_bit: Option, + pub fan_boost_path: Option, + pub ec_tool: Option, + pub optimization: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Quirk { + pub model_regex: String, + pub id: String, + pub issue: String, + pub action: String, + pub monitor_msr: Option, + pub reset_bit: Option, + pub trigger_path: Option, + pub trigger_value: Option, + pub target_path: Option, + pub format: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Discovery { + pub sensors: SensorDiscovery, + pub actuators: ActuatorDiscovery, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct SensorDiscovery { + pub temp_labels: Vec, + pub fan_labels: Vec, + pub hwmon_priority: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct ActuatorDiscovery { + pub rapl_paths: Vec, + pub amd_energy_paths: Vec, + pub governor_files: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct PreflightCheck { + pub name: String, + pub check_cmd: String, + pub fail_help: String, +} diff --git a/src/sal/mock.rs b/src/sal/mock.rs index 097f049..dabe27a 100644 --- a/src/sal/mock.rs +++ b/src/sal/mock.rs @@ -1,8 +1,15 @@ use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep}; use anyhow::Result; -pub struct MockAuditor; -impl PreflightAuditor for MockAuditor { +pub struct MockSal; + +impl MockSal { + pub fn new() -> Self { + Self + } +} + +impl PreflightAuditor for MockSal { fn audit(&self) -> Box + '_> { let steps = vec![ AuditStep { @@ -18,32 +25,16 @@ impl PreflightAuditor for MockAuditor { } } -pub struct MockGuard { - pub suppressed: bool, -} -impl MockGuard { - pub fn new() -> Self { - Self { suppressed: false } - } -} -impl EnvironmentGuard for MockGuard { +impl EnvironmentGuard for MockSal { fn suppress(&mut self) -> Result<()> { - self.suppressed = true; Ok(()) } fn restore(&mut self) -> Result<()> { - self.suppressed = false; Ok(()) } } -impl Drop for MockGuard { - fn drop(&mut self) { - let _ = self.restore(); - } -} -pub struct MockSensorBus; -impl SensorBus for MockSensorBus { +impl SensorBus for MockSal { fn get_temp(&self) -> Result { Ok(42.0) } @@ -58,8 +49,7 @@ impl SensorBus for MockSensorBus { } } -pub struct MockActuatorBus; -impl ActuatorBus for MockActuatorBus { +impl ActuatorBus for MockSal { fn set_fan_mode(&self, _mode: &str) -> Result<()> { Ok(()) } @@ -71,8 +61,7 @@ impl ActuatorBus for MockActuatorBus { } } -pub struct MockWatchdog; -impl HardwareWatchdog for MockWatchdog { +impl HardwareWatchdog for MockSal { fn check_emergency(&self) -> Result { Ok(false) } diff --git a/src/sal/mod.rs b/src/sal/mod.rs index 53e7712..16526ac 100644 --- a/src/sal/mod.rs +++ b/src/sal/mod.rs @@ -1,3 +1,5 @@ pub mod traits; pub mod mock; pub mod dell_xps_9380; +pub mod generic_linux; +pub mod heuristic; diff --git a/src/sal/traits.rs b/src/sal/traits.rs index 906e1b4..3aabf75 100644 --- a/src/sal/traits.rs +++ b/src/sal/traits.rs @@ -48,7 +48,7 @@ impl PreflightAuditor for Arc { } /// Suppresses conflicting daemons (tlp, thermald). -pub trait EnvironmentGuard { +pub trait EnvironmentGuard: Send + Sync { fn suppress(&mut self) -> Result<()>; fn restore(&mut self) -> Result<()>; } @@ -77,7 +77,7 @@ impl SensorBus for Arc { } /// Write-only interface for hardware commands. -pub trait ActuatorBus { +pub trait ActuatorBus: Send + Sync { fn set_fan_mode(&self, mode: &str) -> Result<()>; fn set_sustained_power_limit(&self, watts: f32) -> Result<()>; fn set_burst_power_limit(&self, watts: f32) -> Result<()>; @@ -96,7 +96,7 @@ impl ActuatorBus for Arc { } /// Concurrent monitor for catastrophic states. -pub trait HardwareWatchdog { +pub trait HardwareWatchdog: Send + Sync { fn check_emergency(&self) -> Result; } @@ -105,3 +105,8 @@ impl HardwareWatchdog for Arc { (**self).check_emergency() } } + +/// Aggregate trait for a complete platform implementation. +pub trait PlatformSal: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog {} + +impl PlatformSal for T {}