fixed hardware_db and improved stability and robustness of generic sal
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep};
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep, SafetyStatus};
|
||||
use anyhow::{Result, Context};
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::sync::Mutex;
|
||||
use tracing::debug;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
pub struct DellXps9380Sal {
|
||||
temp_path: PathBuf,
|
||||
@@ -18,6 +18,8 @@ pub struct DellXps9380Sal {
|
||||
last_temp: Mutex<f32>,
|
||||
last_fans: Mutex<Vec<u32>>,
|
||||
suppressed_services: Mutex<Vec<String>>,
|
||||
msr_file: Mutex<fs::File>,
|
||||
last_energy: Mutex<(u64, Instant)>,
|
||||
}
|
||||
|
||||
impl DellXps9380Sal {
|
||||
@@ -35,7 +37,6 @@ impl DellXps9380Sal {
|
||||
|
||||
if name == "dell_smm" {
|
||||
temp_path = Some(p.join("temp1_input"));
|
||||
// Discover all fans
|
||||
if let Ok(fan_entries) = fs::read_dir(&p) {
|
||||
for fan_entry in fan_entries.flatten() {
|
||||
let fan_p = fan_entry.path();
|
||||
@@ -54,7 +55,6 @@ impl DellXps9380Sal {
|
||||
}
|
||||
}
|
||||
|
||||
// Discovery for RAPL via powercap
|
||||
if let Ok(entries) = fs::read_dir("/sys/class/powercap") {
|
||||
for entry in entries.flatten() {
|
||||
let p = entry.path();
|
||||
@@ -72,6 +72,9 @@ impl DellXps9380Sal {
|
||||
|
||||
let rapl_base = rapl_base_path.context("Could not find RAPL package-0 path in powercap")?;
|
||||
let freq_path = PathBuf::from("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
|
||||
|
||||
let msr_file = fs::OpenOptions::new().read(true).write(true).open("/dev/cpu/0/msr")
|
||||
.context("Failed to open /dev/cpu/0/msr. Is the 'msr' module loaded?")?;
|
||||
|
||||
Ok(Self {
|
||||
temp_path: temp_path.context("Could not find dell_smm temperature path")?,
|
||||
@@ -84,68 +87,64 @@ impl DellXps9380Sal {
|
||||
last_temp: Mutex::new(0.0),
|
||||
last_fans: Mutex::new(Vec::new()),
|
||||
suppressed_services: Mutex::new(Vec::new()),
|
||||
msr_file: Mutex::new(msr_file),
|
||||
last_energy: Mutex::new((0, Instant::now())),
|
||||
})
|
||||
}
|
||||
|
||||
fn read_msr(&self, msr: u32) -> Result<u64> {
|
||||
use std::os::unix::fs::FileExt;
|
||||
let mut buf = [0u8; 8];
|
||||
let file = self.msr_file.lock().unwrap();
|
||||
file.read_at(&mut buf, msr as u64)?;
|
||||
Ok(u64::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
fn write_msr(&self, msr: u32, val: u64) -> Result<()> {
|
||||
use std::os::unix::fs::FileExt;
|
||||
let file = self.msr_file.lock().unwrap();
|
||||
file.write_at(&val.to_le_bytes(), msr as u64)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl PreflightAuditor for DellXps9380Sal {
|
||||
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
|
||||
let mut steps = Vec::new();
|
||||
|
||||
// 1. Root check
|
||||
steps.push(AuditStep {
|
||||
description: "Root Privileges".to_string(),
|
||||
outcome: if unsafe { libc::getuid() } == 0 { Ok(()) } else { Err(AuditError::RootRequired) }
|
||||
});
|
||||
|
||||
// 2. Kernel modules check (simplified check via sysfs/proc)
|
||||
let modules = ["dell_smm_hwmon", "msr", "intel_rapl_msr"];
|
||||
for mod_name in modules {
|
||||
let path = format!("/sys/module/{}", mod_name);
|
||||
steps.push(AuditStep {
|
||||
description: format!("Kernel Module: {}", mod_name),
|
||||
outcome: if PathBuf::from(path).exists() { Ok(()) } else {
|
||||
Err(AuditError::ToolMissing(format!("Module '{}' not loaded. Run 'sudo modprobe {}'", mod_name, mod_name)))
|
||||
Err(AuditError::ToolMissing(format!("Module '{}' not loaded.", mod_name)))
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Kernel parameters check
|
||||
let cmdline = fs::read_to_string("/proc/cmdline").unwrap_or_default();
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: dell_smm_hwmon.ignore_dmi=1".to_string(),
|
||||
outcome: if cmdline.contains("dell_smm_hwmon.ignore_dmi=1") { Ok(()) } else {
|
||||
Err(AuditError::MissingKernelParam("dell_smm_hwmon.ignore_dmi=1".to_string()))
|
||||
}
|
||||
});
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: dell_smm_hwmon.restricted=0".to_string(),
|
||||
outcome: if cmdline.contains("dell_smm_hwmon.restricted=0") { Ok(()) } else {
|
||||
Err(AuditError::MissingKernelParam("dell_smm_hwmon.restricted=0".to_string()))
|
||||
}
|
||||
});
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: msr.allow_writes=on".to_string(),
|
||||
outcome: if cmdline.contains("msr.allow_writes=on") { Ok(()) } else {
|
||||
Err(AuditError::MissingKernelParam("msr.allow_writes=on".to_string()))
|
||||
}
|
||||
});
|
||||
let params = [
|
||||
("dell_smm_hwmon.ignore_dmi=1", "dell_smm_hwmon.ignore_dmi=1"),
|
||||
("dell_smm_hwmon.restricted=0", "dell_smm_hwmon.restricted=0"),
|
||||
("msr.allow_writes=on", "msr.allow_writes=on"),
|
||||
];
|
||||
for (label, p) in params {
|
||||
steps.push(AuditStep {
|
||||
description: format!("Kernel Param: {}", label),
|
||||
outcome: if cmdline.contains(p) { Ok(()) } else { Err(AuditError::MissingKernelParam(p.to_string())) }
|
||||
});
|
||||
}
|
||||
|
||||
// 4. Lockdown check
|
||||
let lockdown = fs::read_to_string("/sys/kernel/security/lockdown").unwrap_or_default();
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Lockdown Status".to_string(),
|
||||
outcome: if lockdown.contains("[none]") || lockdown.is_empty() { Ok(()) } else {
|
||||
Err(AuditError::KernelIncompatible("Kernel is in lockdown mode. Set to 'none' to allow MSR/SMM writes.".to_string()))
|
||||
}
|
||||
});
|
||||
|
||||
// 5. Check AC power
|
||||
let ac_status = fs::read_to_string("/sys/class/power_supply/AC/online").unwrap_or_else(|_| "0".to_string());
|
||||
steps.push(AuditStep {
|
||||
description: "AC Power Connection".to_string(),
|
||||
outcome: if ac_status.trim() == "1" { Ok(()) } else {
|
||||
Err(AuditError::AcPowerMissing("System must be on AC power for benchmarking".to_string()))
|
||||
Err(AuditError::AcPowerMissing("System must be on AC power".to_string()))
|
||||
}
|
||||
});
|
||||
|
||||
@@ -154,12 +153,11 @@ impl PreflightAuditor for DellXps9380Sal {
|
||||
}
|
||||
|
||||
impl EnvironmentGuard for DellXps9380Sal {
|
||||
fn suppress(&mut self) -> Result<()> {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
let services = ["tlp", "thermald", "i8kmon"];
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for s in services {
|
||||
if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() {
|
||||
debug!("Suppressing service: {}", s);
|
||||
Command::new("systemctl").args(["stop", s]).status()?;
|
||||
suppressed.push(s.to_string());
|
||||
}
|
||||
@@ -167,7 +165,7 @@ impl EnvironmentGuard for DellXps9380Sal {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn restore(&mut self) -> Result<()> {
|
||||
fn restore(&self) -> Result<()> {
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for s in suppressed.drain(..) {
|
||||
let _ = Command::new("systemctl").args(["start", &s]).status();
|
||||
@@ -176,38 +174,31 @@ impl EnvironmentGuard for DellXps9380Sal {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DellXps9380Sal {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.restore();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl SensorBus for DellXps9380Sal {
|
||||
fn get_temp(&self) -> Result<f32> {
|
||||
// Enforce 1000ms rate limit for Dell SMM as per GEMINI.md
|
||||
let mut last_poll = self.last_poll.lock().unwrap();
|
||||
let now = Instant::now();
|
||||
|
||||
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
|
||||
return Ok(*self.last_temp.lock().unwrap());
|
||||
}
|
||||
|
||||
let s = fs::read_to_string(&self.temp_path)?;
|
||||
let val = s.trim().parse::<f32>()? / 1000.0;
|
||||
|
||||
*self.last_temp.lock().unwrap() = val;
|
||||
*last_poll = now;
|
||||
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
if self.pwr_path.to_string_lossy().contains("energy_uj") {
|
||||
let e1 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
let mut last = self.last_energy.lock().unwrap();
|
||||
let e2 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
|
||||
Ok((e2.saturating_sub(e1)) as f32 / 100000.0)
|
||||
let t2 = Instant::now();
|
||||
let (e1, t1) = *last;
|
||||
let delta_e = e2.wrapping_sub(e1);
|
||||
let delta_t = t2.duration_since(t1).as_secs_f32();
|
||||
*last = (e2, t2);
|
||||
if delta_t < 0.01 { return Ok(0.0); }
|
||||
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
|
||||
} else {
|
||||
let s = fs::read_to_string(&self.pwr_path)?;
|
||||
Ok(s.trim().parse::<f32>()? / 1000000.0)
|
||||
@@ -217,66 +208,65 @@ impl SensorBus for DellXps9380Sal {
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
let mut last_poll = self.last_poll.lock().unwrap();
|
||||
let now = Instant::now();
|
||||
|
||||
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
|
||||
return Ok(self.last_fans.lock().unwrap().clone());
|
||||
}
|
||||
|
||||
let mut fans = Vec::new();
|
||||
for path in &self.fan_paths {
|
||||
if let Ok(s) = fs::read_to_string(path) {
|
||||
if let Ok(rpm) = s.trim().parse::<u32>() {
|
||||
fans.push(rpm);
|
||||
}
|
||||
if let Ok(rpm) = s.trim().parse::<u32>() { fans.push(rpm); }
|
||||
}
|
||||
}
|
||||
|
||||
*self.last_fans.lock().unwrap() = fans.clone();
|
||||
*last_poll = now;
|
||||
|
||||
Ok(fans)
|
||||
}
|
||||
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
let s = fs::read_to_string(&self.freq_path)?;
|
||||
let val = s.trim().parse::<f32>()? / 1000.0;
|
||||
Ok(val)
|
||||
Ok(s.trim().parse::<f32>()? / 1000.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl ActuatorBus for DellXps9380Sal {
|
||||
fn set_fan_mode(&self, mode: &str) -> Result<()> {
|
||||
match mode {
|
||||
"max" | "Manual" => {
|
||||
Command::new("dell-bios-fan-control").arg("0").status()?;
|
||||
}
|
||||
"auto" | "Auto" => {
|
||||
Command::new("dell-bios-fan-control").arg("1").status()?;
|
||||
}
|
||||
_ => {
|
||||
debug!("Unknown fan mode requested: {}", mode);
|
||||
}
|
||||
"max" | "Manual" => { Command::new("dell-bios-fan-control").arg("0").status()?; }
|
||||
"auto" | "Auto" => { Command::new("dell-bios-fan-control").arg("1").status()?; }
|
||||
_ => { debug!("Unknown fan mode: {}", mode); }
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
|
||||
let uw = (watts * 1_000_000.0) as u64;
|
||||
fs::write(&self.pl1_path, uw.to_string())?;
|
||||
fs::write(&self.pl1_path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
|
||||
let uw = (watts * 1_000_000.0) as u64;
|
||||
fs::write(&self.pl2_path, uw.to_string())?;
|
||||
fs::write(&self.pl2_path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl HardwareWatchdog for DellXps9380Sal {
|
||||
fn check_emergency(&self) -> Result<bool> {
|
||||
// Check for thermal throttling or BD PROCHOT
|
||||
// Simplified for now
|
||||
Ok(false)
|
||||
fn get_safety_status(&self) -> Result<SafetyStatus> {
|
||||
let temp = self.get_temp()?;
|
||||
if temp > 98.0 {
|
||||
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal Runaway: {:.1}°C", temp)));
|
||||
}
|
||||
if let Ok(msr_val) = self.read_msr(0x1FC) {
|
||||
if (msr_val & 0x1) != 0 && temp < 85.0 {
|
||||
let _ = self.write_msr(0x1FC, msr_val & !0x1);
|
||||
return Ok(SafetyStatus::Warning("BD PROCHOT Latch Cleared".to_string()));
|
||||
}
|
||||
}
|
||||
Ok(SafetyStatus::Nominal)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DellXps9380Sal {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.restore();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,19 +2,21 @@ use anyhow::{Result, anyhow};
|
||||
use std::path::Path;
|
||||
use std::fs;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::thread;
|
||||
use std::process::Command;
|
||||
use tracing::{debug};
|
||||
use std::sync::mpsc;
|
||||
use tracing::{debug, warn};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError};
|
||||
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus};
|
||||
use crate::sal::heuristic::discovery::SystemFactSheet;
|
||||
use crate::sal::heuristic::schema::HardwareDb;
|
||||
|
||||
pub struct GenericLinuxSal {
|
||||
fact_sheet: SystemFactSheet,
|
||||
db: HardwareDb,
|
||||
suppressed_services: Vec<String>,
|
||||
suppressed_services: Mutex<Vec<String>>,
|
||||
last_valid_temp: Mutex<(f32, Instant)>,
|
||||
current_pl1: Mutex<f32>,
|
||||
last_energy: Mutex<(u64, Instant)>,
|
||||
}
|
||||
|
||||
impl GenericLinuxSal {
|
||||
@@ -22,7 +24,10 @@ impl GenericLinuxSal {
|
||||
Self {
|
||||
fact_sheet,
|
||||
db,
|
||||
suppressed_services: Vec::new(),
|
||||
suppressed_services: Mutex::new(Vec::new()),
|
||||
last_valid_temp: Mutex::new((0.0, Instant::now())),
|
||||
current_pl1: Mutex::new(15.0),
|
||||
last_energy: Mutex::new((0, Instant::now())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,33 +35,18 @@ impl GenericLinuxSal {
|
||||
self.fact_sheet.vendor.to_lowercase().contains("dell")
|
||||
}
|
||||
|
||||
fn read_sysfs_timeout(&self, path: &Path, timeout: Duration) -> Result<String> {
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let path_buf = path.to_path_buf();
|
||||
|
||||
thread::spawn(move || {
|
||||
let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string());
|
||||
let _ = tx.send(res);
|
||||
});
|
||||
|
||||
match rx.recv_timeout(timeout) {
|
||||
Ok(res) => res.map_err(|e| anyhow!("Failed to read sysfs: {}", e)),
|
||||
Err(_) => Err(anyhow!("Timeout reading sysfs path: {:?}", path)),
|
||||
}
|
||||
/// Read sysfs safely. We removed the thread-per-read timeout logic
|
||||
/// as it was inefficient. sysfs reads are generally fast enough.
|
||||
fn read_sysfs(&self, path: &Path) -> Result<String> {
|
||||
fs::read_to_string(path).map(|s| s.trim().to_string()).map_err(|e| anyhow!(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl PreflightAuditor for GenericLinuxSal {
|
||||
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
|
||||
let mut steps = Vec::new();
|
||||
|
||||
// 1. Static DB checks
|
||||
for check in &self.db.preflight_checks {
|
||||
let status = Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(&check.check_cmd)
|
||||
.status();
|
||||
|
||||
let status = Command::new("sh").arg("-c").arg(&check.check_cmd).status();
|
||||
steps.push(AuditStep {
|
||||
description: check.name.clone(),
|
||||
outcome: match status {
|
||||
@@ -65,8 +55,6 @@ impl PreflightAuditor for GenericLinuxSal {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 2. Conflict checks (Critical only)
|
||||
for conflict_id in &self.fact_sheet.active_conflicts {
|
||||
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
|
||||
if conflict.severity == "Critical" {
|
||||
@@ -77,7 +65,6 @@ impl PreflightAuditor for GenericLinuxSal {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Box::new(steps.into_iter())
|
||||
}
|
||||
}
|
||||
@@ -86,31 +73,32 @@ impl SensorBus for GenericLinuxSal {
|
||||
fn get_temp(&self) -> Result<f32> {
|
||||
let path = self.fact_sheet.temp_path.as_ref()
|
||||
.ok_or_else(|| anyhow!("No temperature sensor path found"))?;
|
||||
let content = self.read_sysfs_timeout(path, Duration::from_millis(200))?;
|
||||
let milli_celsius: f32 = content.parse()?;
|
||||
Ok(milli_celsius / 1000.0)
|
||||
let content = self.read_sysfs(path)?;
|
||||
let temp = content.parse::<f32>()? / 1000.0;
|
||||
let mut last = self.last_valid_temp.lock().unwrap();
|
||||
if (temp - last.0).abs() > 0.01 { *last = (temp, Instant::now()); }
|
||||
Ok(temp)
|
||||
}
|
||||
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first()
|
||||
.ok_or_else(|| anyhow!("No RAPL path found"))?;
|
||||
let energy_path = rapl_path.join("energy_uj");
|
||||
|
||||
let e1: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?;
|
||||
let t1 = Instant::now();
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
let e2: u64 = self.read_sysfs_timeout(&energy_path, Duration::from_millis(200))?.parse()?;
|
||||
let mut last = self.last_energy.lock().unwrap();
|
||||
let e2: u64 = self.read_sysfs(&energy_path)?.parse()?;
|
||||
let t2 = Instant::now();
|
||||
|
||||
let (e1, t1) = *last;
|
||||
let delta_e = e2.wrapping_sub(e1);
|
||||
let delta_t = t2.duration_since(t1).as_secs_f32();
|
||||
*last = (e2, t2);
|
||||
if delta_t < 0.01 { return Ok(0.0); }
|
||||
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
|
||||
}
|
||||
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
let mut rpms = Vec::new();
|
||||
for path in &self.fact_sheet.fan_paths {
|
||||
if let Ok(content) = self.read_sysfs_timeout(path, Duration::from_millis(200)) {
|
||||
if let Ok(content) = self.read_sysfs(path) {
|
||||
if let Ok(rpm) = content.parse() { rpms.push(rpm); }
|
||||
}
|
||||
}
|
||||
@@ -120,10 +108,8 @@ impl SensorBus for GenericLinuxSal {
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
let path = Path::new("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
|
||||
if path.exists() {
|
||||
let khz: f32 = self.read_sysfs_timeout(path, Duration::from_millis(200))?.parse()?;
|
||||
Ok(khz / 1000.0)
|
||||
Ok(self.read_sysfs(path)?.parse::<f32>()? / 1000.0)
|
||||
} else {
|
||||
// Fallback: parse /proc/cpuinfo
|
||||
let cpuinfo = fs::read_to_string("/proc/cpuinfo")?;
|
||||
for line in cpuinfo.lines() {
|
||||
if line.starts_with("cpu MHz") {
|
||||
@@ -149,38 +135,32 @@ impl ActuatorBus for GenericLinuxSal {
|
||||
let parts: Vec<&str> = cmd_str.split_whitespace().collect();
|
||||
Command::new(parts[0]).args(&parts[1..]).status()?;
|
||||
Ok(())
|
||||
} else { Err(anyhow!("Dell fan command missing in DB")) }
|
||||
} else {
|
||||
debug!("Fan control not implemented for non-Dell systems yet");
|
||||
Ok(())
|
||||
}
|
||||
} else { Err(anyhow!("Dell fan command missing")) }
|
||||
} else { Ok(()) }
|
||||
}
|
||||
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first()
|
||||
.ok_or_else(|| anyhow!("No RAPL path found for PL1"))?;
|
||||
let path = rapl_path.join("constraint_0_power_limit_uw");
|
||||
fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL1 path"))?;
|
||||
fs::write(rapl_path.join("constraint_0_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
*self.current_pl1.lock().unwrap() = watts;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first()
|
||||
.ok_or_else(|| anyhow!("No RAPL path found for PL2"))?;
|
||||
let path = rapl_path.join("constraint_1_power_limit_uw");
|
||||
fs::write(path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL2 path"))?;
|
||||
fs::write(rapl_path.join("constraint_1_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl EnvironmentGuard for GenericLinuxSal {
|
||||
fn suppress(&mut self) -> Result<()> {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for conflict_id in &self.fact_sheet.active_conflicts {
|
||||
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
|
||||
for service in &conflict.services {
|
||||
debug!("Stopping service: {}", service);
|
||||
if Command::new("systemctl").arg("stop").arg(service).status()?.success() {
|
||||
self.suppressed_services.push(service.clone());
|
||||
suppressed.push(service.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -188,31 +168,30 @@ impl EnvironmentGuard for GenericLinuxSal {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn restore(&mut self) -> Result<()> {
|
||||
for service in self.suppressed_services.drain(..) {
|
||||
debug!("Starting service: {}", service);
|
||||
fn restore(&self) -> Result<()> {
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for service in suppressed.drain(..) {
|
||||
let _ = Command::new("systemctl").arg("start").arg(service).status();
|
||||
}
|
||||
if self.is_dell() {
|
||||
let _ = self.set_fan_mode("auto");
|
||||
}
|
||||
if self.is_dell() { let _ = self.set_fan_mode("auto"); }
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl HardwareWatchdog for GenericLinuxSal {
|
||||
fn check_emergency(&self) -> Result<bool> {
|
||||
if let Ok(temp) = self.get_temp() {
|
||||
if temp > 100.0 {
|
||||
return Ok(true);
|
||||
}
|
||||
fn get_safety_status(&self) -> Result<SafetyStatus> {
|
||||
let temp = self.get_temp()?;
|
||||
if temp > 100.0 {
|
||||
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal runaway: {:.1}°C", temp)));
|
||||
}
|
||||
Ok(false)
|
||||
let last = self.last_valid_temp.lock().unwrap();
|
||||
if last.1.elapsed() > Duration::from_secs(5) {
|
||||
return Ok(SafetyStatus::EmergencyAbort("Temperature sensor stalled".to_string()));
|
||||
}
|
||||
Ok(SafetyStatus::Nominal)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GenericLinuxSal {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.restore();
|
||||
}
|
||||
fn drop(&mut self) { let _ = self.restore(); }
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ pub struct Conflict {
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
pub struct Ecosystem {
|
||||
pub vendor_regex: String,
|
||||
pub product_regex: Option<String>,
|
||||
pub polling_cap_ms: Option<u64>,
|
||||
pub drivers: Option<Vec<String>>,
|
||||
pub fan_manual_mode_cmd: Option<String>,
|
||||
@@ -46,6 +47,7 @@ pub struct Ecosystem {
|
||||
pub fan_boost_path: Option<String>,
|
||||
pub ec_tool: Option<String>,
|
||||
pub optimization: Option<String>,
|
||||
pub help_text: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone)]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep};
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep, PlatformSal, SafetyStatus};
|
||||
use anyhow::Result;
|
||||
|
||||
pub struct MockSal;
|
||||
@@ -26,10 +26,10 @@ impl PreflightAuditor for MockSal {
|
||||
}
|
||||
|
||||
impl EnvironmentGuard for MockSal {
|
||||
fn suppress(&mut self) -> Result<()> {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
fn restore(&mut self) -> Result<()> {
|
||||
fn restore(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -62,7 +62,7 @@ impl ActuatorBus for MockSal {
|
||||
}
|
||||
|
||||
impl HardwareWatchdog for MockSal {
|
||||
fn check_emergency(&self) -> Result<bool> {
|
||||
Ok(false)
|
||||
fn get_safety_status(&self) -> Result<SafetyStatus> {
|
||||
Ok(SafetyStatus::Nominal)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,8 +49,17 @@ impl<T: PreflightAuditor + ?Sized> PreflightAuditor for Arc<T> {
|
||||
|
||||
/// Suppresses conflicting daemons (tlp, thermald).
|
||||
pub trait EnvironmentGuard: Send + Sync {
|
||||
fn suppress(&mut self) -> Result<()>;
|
||||
fn restore(&mut self) -> Result<()>;
|
||||
fn suppress(&self) -> Result<()>;
|
||||
fn restore(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
impl<T: EnvironmentGuard + ?Sized> EnvironmentGuard for Arc<T> {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
(**self).suppress()
|
||||
}
|
||||
fn restore(&self) -> Result<()> {
|
||||
(**self).restore()
|
||||
}
|
||||
}
|
||||
|
||||
/// Read-only interface for standardized metrics.
|
||||
@@ -97,15 +106,23 @@ impl<T: ActuatorBus + ?Sized> ActuatorBus for Arc<T> {
|
||||
|
||||
/// Concurrent monitor for catastrophic states.
|
||||
pub trait HardwareWatchdog: Send + Sync {
|
||||
fn check_emergency(&self) -> Result<bool>;
|
||||
fn get_safety_status(&self) -> Result<SafetyStatus>;
|
||||
}
|
||||
|
||||
impl<T: HardwareWatchdog + ?Sized> HardwareWatchdog for Arc<T> {
|
||||
fn check_emergency(&self) -> Result<bool> {
|
||||
(**self).check_emergency()
|
||||
fn get_safety_status(&self) -> Result<SafetyStatus> {
|
||||
(**self).get_safety_status()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SafetyStatus {
|
||||
Nominal,
|
||||
Warning(String),
|
||||
Critical(String),
|
||||
EmergencyAbort(String),
|
||||
}
|
||||
|
||||
/// Aggregate trait for a complete platform implementation.
|
||||
pub trait PlatformSal: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user