fixed hardware_db and improved stability and robustness of generic sal

This commit is contained in:
2026-02-26 15:52:44 +01:00
parent f87efa1d24
commit 073414a25e
13 changed files with 488 additions and 225 deletions

View File

@@ -1,11 +1,11 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep};
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep, SafetyStatus};
use anyhow::{Result, Context};
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use std::time::{Duration, Instant};
use std::sync::Mutex;
use tracing::debug;
use tracing::{debug, warn};
pub struct DellXps9380Sal {
temp_path: PathBuf,
@@ -18,6 +18,8 @@ pub struct DellXps9380Sal {
last_temp: Mutex<f32>,
last_fans: Mutex<Vec<u32>>,
suppressed_services: Mutex<Vec<String>>,
msr_file: Mutex<fs::File>,
last_energy: Mutex<(u64, Instant)>,
}
impl DellXps9380Sal {
@@ -35,7 +37,6 @@ impl DellXps9380Sal {
if name == "dell_smm" {
temp_path = Some(p.join("temp1_input"));
// Discover all fans
if let Ok(fan_entries) = fs::read_dir(&p) {
for fan_entry in fan_entries.flatten() {
let fan_p = fan_entry.path();
@@ -54,7 +55,6 @@ impl DellXps9380Sal {
}
}
// Discovery for RAPL via powercap
if let Ok(entries) = fs::read_dir("/sys/class/powercap") {
for entry in entries.flatten() {
let p = entry.path();
@@ -72,6 +72,9 @@ impl DellXps9380Sal {
let rapl_base = rapl_base_path.context("Could not find RAPL package-0 path in powercap")?;
let freq_path = PathBuf::from("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
let msr_file = fs::OpenOptions::new().read(true).write(true).open("/dev/cpu/0/msr")
.context("Failed to open /dev/cpu/0/msr. Is the 'msr' module loaded?")?;
Ok(Self {
temp_path: temp_path.context("Could not find dell_smm temperature path")?,
@@ -84,68 +87,64 @@ impl DellXps9380Sal {
last_temp: Mutex::new(0.0),
last_fans: Mutex::new(Vec::new()),
suppressed_services: Mutex::new(Vec::new()),
msr_file: Mutex::new(msr_file),
last_energy: Mutex::new((0, Instant::now())),
})
}
fn read_msr(&self, msr: u32) -> Result<u64> {
use std::os::unix::fs::FileExt;
let mut buf = [0u8; 8];
let file = self.msr_file.lock().unwrap();
file.read_at(&mut buf, msr as u64)?;
Ok(u64::from_le_bytes(buf))
}
fn write_msr(&self, msr: u32, val: u64) -> Result<()> {
use std::os::unix::fs::FileExt;
let file = self.msr_file.lock().unwrap();
file.write_at(&val.to_le_bytes(), msr as u64)?;
Ok(())
}
}
impl PreflightAuditor for DellXps9380Sal {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let mut steps = Vec::new();
// 1. Root check
steps.push(AuditStep {
description: "Root Privileges".to_string(),
outcome: if unsafe { libc::getuid() } == 0 { Ok(()) } else { Err(AuditError::RootRequired) }
});
// 2. Kernel modules check (simplified check via sysfs/proc)
let modules = ["dell_smm_hwmon", "msr", "intel_rapl_msr"];
for mod_name in modules {
let path = format!("/sys/module/{}", mod_name);
steps.push(AuditStep {
description: format!("Kernel Module: {}", mod_name),
outcome: if PathBuf::from(path).exists() { Ok(()) } else {
Err(AuditError::ToolMissing(format!("Module '{}' not loaded. Run 'sudo modprobe {}'", mod_name, mod_name)))
Err(AuditError::ToolMissing(format!("Module '{}' not loaded.", mod_name)))
}
});
}
// 3. Kernel parameters check
let cmdline = fs::read_to_string("/proc/cmdline").unwrap_or_default();
steps.push(AuditStep {
description: "Kernel Param: dell_smm_hwmon.ignore_dmi=1".to_string(),
outcome: if cmdline.contains("dell_smm_hwmon.ignore_dmi=1") { Ok(()) } else {
Err(AuditError::MissingKernelParam("dell_smm_hwmon.ignore_dmi=1".to_string()))
}
});
steps.push(AuditStep {
description: "Kernel Param: dell_smm_hwmon.restricted=0".to_string(),
outcome: if cmdline.contains("dell_smm_hwmon.restricted=0") { Ok(()) } else {
Err(AuditError::MissingKernelParam("dell_smm_hwmon.restricted=0".to_string()))
}
});
steps.push(AuditStep {
description: "Kernel Param: msr.allow_writes=on".to_string(),
outcome: if cmdline.contains("msr.allow_writes=on") { Ok(()) } else {
Err(AuditError::MissingKernelParam("msr.allow_writes=on".to_string()))
}
});
let params = [
("dell_smm_hwmon.ignore_dmi=1", "dell_smm_hwmon.ignore_dmi=1"),
("dell_smm_hwmon.restricted=0", "dell_smm_hwmon.restricted=0"),
("msr.allow_writes=on", "msr.allow_writes=on"),
];
for (label, p) in params {
steps.push(AuditStep {
description: format!("Kernel Param: {}", label),
outcome: if cmdline.contains(p) { Ok(()) } else { Err(AuditError::MissingKernelParam(p.to_string())) }
});
}
// 4. Lockdown check
let lockdown = fs::read_to_string("/sys/kernel/security/lockdown").unwrap_or_default();
steps.push(AuditStep {
description: "Kernel Lockdown Status".to_string(),
outcome: if lockdown.contains("[none]") || lockdown.is_empty() { Ok(()) } else {
Err(AuditError::KernelIncompatible("Kernel is in lockdown mode. Set to 'none' to allow MSR/SMM writes.".to_string()))
}
});
// 5. Check AC power
let ac_status = fs::read_to_string("/sys/class/power_supply/AC/online").unwrap_or_else(|_| "0".to_string());
steps.push(AuditStep {
description: "AC Power Connection".to_string(),
outcome: if ac_status.trim() == "1" { Ok(()) } else {
Err(AuditError::AcPowerMissing("System must be on AC power for benchmarking".to_string()))
Err(AuditError::AcPowerMissing("System must be on AC power".to_string()))
}
});
@@ -154,12 +153,11 @@ impl PreflightAuditor for DellXps9380Sal {
}
impl EnvironmentGuard for DellXps9380Sal {
fn suppress(&mut self) -> Result<()> {
fn suppress(&self) -> Result<()> {
let services = ["tlp", "thermald", "i8kmon"];
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in services {
if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() {
debug!("Suppressing service: {}", s);
Command::new("systemctl").args(["stop", s]).status()?;
suppressed.push(s.to_string());
}
@@ -167,7 +165,7 @@ impl EnvironmentGuard for DellXps9380Sal {
Ok(())
}
fn restore(&mut self) -> Result<()> {
fn restore(&self) -> Result<()> {
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in suppressed.drain(..) {
let _ = Command::new("systemctl").args(["start", &s]).status();
@@ -176,38 +174,31 @@ impl EnvironmentGuard for DellXps9380Sal {
}
}
impl Drop for DellXps9380Sal {
fn drop(&mut self) {
let _ = self.restore();
}
}
impl SensorBus for DellXps9380Sal {
fn get_temp(&self) -> Result<f32> {
// Enforce 1000ms rate limit for Dell SMM as per GEMINI.md
let mut last_poll = self.last_poll.lock().unwrap();
let now = Instant::now();
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
return Ok(*self.last_temp.lock().unwrap());
}
let s = fs::read_to_string(&self.temp_path)?;
let val = s.trim().parse::<f32>()? / 1000.0;
*self.last_temp.lock().unwrap() = val;
*last_poll = now;
Ok(val)
}
fn get_power_w(&self) -> Result<f32> {
if self.pwr_path.to_string_lossy().contains("energy_uj") {
let e1 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
std::thread::sleep(Duration::from_millis(100));
let mut last = self.last_energy.lock().unwrap();
let e2 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
Ok((e2.saturating_sub(e1)) as f32 / 100000.0)
let t2 = Instant::now();
let (e1, t1) = *last;
let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32();
*last = (e2, t2);
if delta_t < 0.01 { return Ok(0.0); }
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
} else {
let s = fs::read_to_string(&self.pwr_path)?;
Ok(s.trim().parse::<f32>()? / 1000000.0)
@@ -217,66 +208,65 @@ impl SensorBus for DellXps9380Sal {
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
let mut last_poll = self.last_poll.lock().unwrap();
let now = Instant::now();
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
return Ok(self.last_fans.lock().unwrap().clone());
}
let mut fans = Vec::new();
for path in &self.fan_paths {
if let Ok(s) = fs::read_to_string(path) {
if let Ok(rpm) = s.trim().parse::<u32>() {
fans.push(rpm);
}
if let Ok(rpm) = s.trim().parse::<u32>() { fans.push(rpm); }
}
}
*self.last_fans.lock().unwrap() = fans.clone();
*last_poll = now;
Ok(fans)
}
fn get_freq_mhz(&self) -> Result<f32> {
let s = fs::read_to_string(&self.freq_path)?;
let val = s.trim().parse::<f32>()? / 1000.0;
Ok(val)
Ok(s.trim().parse::<f32>()? / 1000.0)
}
}
impl ActuatorBus for DellXps9380Sal {
fn set_fan_mode(&self, mode: &str) -> Result<()> {
match mode {
"max" | "Manual" => {
Command::new("dell-bios-fan-control").arg("0").status()?;
}
"auto" | "Auto" => {
Command::new("dell-bios-fan-control").arg("1").status()?;
}
_ => {
debug!("Unknown fan mode requested: {}", mode);
}
"max" | "Manual" => { Command::new("dell-bios-fan-control").arg("0").status()?; }
"auto" | "Auto" => { Command::new("dell-bios-fan-control").arg("1").status()?; }
_ => { debug!("Unknown fan mode: {}", mode); }
}
Ok(())
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
let uw = (watts * 1_000_000.0) as u64;
fs::write(&self.pl1_path, uw.to_string())?;
fs::write(&self.pl1_path, ((watts * 1_000_000.0) as u64).to_string())?;
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
let uw = (watts * 1_000_000.0) as u64;
fs::write(&self.pl2_path, uw.to_string())?;
fs::write(&self.pl2_path, ((watts * 1_000_000.0) as u64).to_string())?;
Ok(())
}
}
impl HardwareWatchdog for DellXps9380Sal {
fn check_emergency(&self) -> Result<bool> {
// Check for thermal throttling or BD PROCHOT
// Simplified for now
Ok(false)
fn get_safety_status(&self) -> Result<SafetyStatus> {
let temp = self.get_temp()?;
if temp > 98.0 {
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal Runaway: {:.1}°C", temp)));
}
if let Ok(msr_val) = self.read_msr(0x1FC) {
if (msr_val & 0x1) != 0 && temp < 85.0 {
let _ = self.write_msr(0x1FC, msr_val & !0x1);
return Ok(SafetyStatus::Warning("BD PROCHOT Latch Cleared".to_string()));
}
}
Ok(SafetyStatus::Nominal)
}
}
impl Drop for DellXps9380Sal {
fn drop(&mut self) {
let _ = self.restore();
}
}