252 lines
9.7 KiB
Rust
252 lines
9.7 KiB
Rust
use anyhow::{Result, anyhow};
|
|
use std::path::{Path};
|
|
use std::fs;
|
|
use std::time::{Duration, Instant};
|
|
use std::sync::Mutex;
|
|
|
|
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
|
|
use crate::sal::safety::{TdpLimitMicroWatts, FanSpeedPercentage};
|
|
use crate::sal::heuristic::discovery::SystemFactSheet;
|
|
use crate::sal::heuristic::schema::HardwareDb;
|
|
|
|
pub struct GenericLinuxSal {
|
|
ctx: EnvironmentCtx,
|
|
fact_sheet: SystemFactSheet,
|
|
db: HardwareDb,
|
|
suppressed_services: Mutex<Vec<String>>,
|
|
last_valid_temp: Mutex<(f32, Instant)>,
|
|
current_pl1: Mutex<u64>,
|
|
last_energy: Mutex<(u64, Instant)>,
|
|
|
|
// --- Original State for Restoration ---
|
|
original_pl1: Mutex<Option<u64>>,
|
|
original_pl2: Mutex<Option<u64>>,
|
|
}
|
|
|
|
impl GenericLinuxSal {
|
|
pub fn new(ctx: EnvironmentCtx, facts: SystemFactSheet, db: HardwareDb) -> Self {
|
|
let initial_energy = if let Some(pwr_base) = facts.rapl_paths.first() {
|
|
fs::read_to_string(pwr_base.join("energy_uj")).unwrap_or_default().trim().parse().unwrap_or(0)
|
|
} else {
|
|
0
|
|
};
|
|
|
|
Self {
|
|
db,
|
|
suppressed_services: Mutex::new(Vec::new()),
|
|
last_valid_temp: Mutex::new((0.0, Instant::now())),
|
|
current_pl1: Mutex::new(15_000_000),
|
|
last_energy: Mutex::new((initial_energy, Instant::now())),
|
|
fact_sheet: facts,
|
|
ctx,
|
|
original_pl1: Mutex::new(None),
|
|
original_pl2: Mutex::new(None),
|
|
}
|
|
}
|
|
|
|
fn is_dell(&self) -> bool {
|
|
self.fact_sheet.vendor.to_lowercase().contains("dell")
|
|
}
|
|
|
|
fn read_sysfs(&self, path: &Path) -> Result<String> {
|
|
fs::read_to_string(path).map(|s| s.trim().to_string()).map_err(|e| anyhow!(e))
|
|
}
|
|
}
|
|
|
|
impl PreflightAuditor for GenericLinuxSal {
|
|
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
|
|
let mut steps = Vec::new();
|
|
for check in &self.db.preflight_checks {
|
|
let status = self.ctx.runner.run("sh", &["-c", &check.check_cmd]);
|
|
steps.push(AuditStep {
|
|
description: check.name.clone(),
|
|
outcome: match status {
|
|
Ok(_) => Ok(()),
|
|
_ => Err(AuditError::KernelIncompatible(check.fail_help.clone())),
|
|
}
|
|
});
|
|
}
|
|
for conflict_id in &self.fact_sheet.active_conflicts {
|
|
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
|
|
if conflict.severity == "Critical" {
|
|
steps.push(AuditStep {
|
|
description: format!("Conflict: {}", conflict.id),
|
|
outcome: Err(AuditError::ToolMissing(conflict.help_text.clone())),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
Box::new(steps.into_iter())
|
|
}
|
|
}
|
|
|
|
impl SensorBus for GenericLinuxSal {
|
|
fn get_temp(&self) -> Result<f32> {
|
|
let path = self.fact_sheet.temp_path.as_ref()
|
|
.ok_or_else(|| anyhow!("No temperature sensor path found"))?;
|
|
let content = self.read_sysfs(path)?;
|
|
let temp = content.parse::<f32>()? / 1000.0;
|
|
let mut last = self.last_valid_temp.lock().unwrap();
|
|
if (temp - last.0).abs() > 0.01 { *last = (temp, Instant::now()); }
|
|
Ok(temp)
|
|
}
|
|
|
|
fn get_power_w(&self) -> Result<f32> {
|
|
let rapl_path = self.fact_sheet.rapl_paths.first()
|
|
.ok_or_else(|| anyhow!("No RAPL path found"))?;
|
|
let energy_path = rapl_path.join("energy_uj");
|
|
let mut last = self.last_energy.lock().unwrap();
|
|
let e2: u64 = self.read_sysfs(&energy_path)?.parse()?;
|
|
let t2 = Instant::now();
|
|
let (e1, t1) = *last;
|
|
let delta_e = e2.wrapping_sub(e1);
|
|
let delta_t = t2.duration_since(t1).as_secs_f32();
|
|
*last = (e2, t2);
|
|
if delta_t < 0.05 { return Ok(0.0); }
|
|
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
|
|
}
|
|
|
|
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
|
let mut rpms = Vec::new();
|
|
for path in &self.fact_sheet.fan_paths {
|
|
if let Ok(content) = self.read_sysfs(path) {
|
|
if let Ok(rpm) = content.parse() { rpms.push(rpm); }
|
|
}
|
|
}
|
|
Ok(rpms)
|
|
}
|
|
|
|
fn get_freq_mhz(&self) -> Result<f32> {
|
|
let path = self.ctx.sysfs_base.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
|
|
if path.exists() {
|
|
Ok(self.read_sysfs(&path)?.parse::<f32>()? / 1000.0)
|
|
} else {
|
|
let cpuinfo_path = self.ctx.sysfs_base.join("proc/cpuinfo");
|
|
let cpuinfo = fs::read_to_string(cpuinfo_path)?;
|
|
for line in cpuinfo.lines() {
|
|
if line.starts_with("cpu MHz") {
|
|
if let Some((_, mhz)) = line.split_once(':') {
|
|
return Ok(mhz.trim().parse()?);
|
|
}
|
|
}
|
|
}
|
|
Err(anyhow!("Could not determine CPU frequency"))
|
|
}
|
|
}
|
|
|
|
fn get_throttling_status(&self) -> Result<bool> {
|
|
// Fallback: check if any cooling device is active (cur_state > 0)
|
|
let cooling_base = self.ctx.sysfs_base.join("sys/class/thermal");
|
|
if let Ok(entries) = fs::read_dir(cooling_base) {
|
|
for entry in entries.flatten() {
|
|
if entry.file_name().to_string_lossy().starts_with("cooling_device") {
|
|
if let Ok(state) = fs::read_to_string(entry.path().join("cur_state")) {
|
|
if state.trim().parse::<u32>().unwrap_or(0) > 0 {
|
|
return Ok(true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(false)
|
|
}
|
|
}
|
|
|
|
impl ActuatorBus for GenericLinuxSal {
|
|
fn set_fan_mode(&self, mode: &str) -> Result<()> {
|
|
if self.is_dell() {
|
|
let cmd = match mode {
|
|
"manual" | "max" => self.db.ecosystems.get("dell").and_then(|e| e.fan_manual_mode_cmd.as_ref()),
|
|
"auto" => self.db.ecosystems.get("dell").and_then(|e| e.fan_auto_mode_cmd.as_ref()),
|
|
_ => return Err(anyhow!("Unsupported fan mode: {}", mode)),
|
|
};
|
|
if let Some(cmd_str) = cmd {
|
|
let parts: Vec<&str> = cmd_str.split_whitespace().collect();
|
|
self.ctx.runner.run(parts[0], &parts[1..])?;
|
|
Ok(())
|
|
} else { Err(anyhow!("Dell fan command missing")) }
|
|
} else { Ok(()) }
|
|
}
|
|
|
|
fn set_fan_speed(&self, _speed: FanSpeedPercentage) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
|
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL1 path"))?;
|
|
fs::write(rapl_path.join("constraint_0_power_limit_uw"), limit.as_u64().to_string())?;
|
|
*self.current_pl1.lock().unwrap() = limit.as_u64();
|
|
Ok(())
|
|
}
|
|
|
|
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
|
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL2 path"))?;
|
|
fs::write(rapl_path.join("constraint_1_power_limit_uw"), limit.as_u64().to_string())?;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl EnvironmentGuard for GenericLinuxSal {
|
|
fn suppress(&self) -> Result<()> {
|
|
// Snapshot Power Limits
|
|
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
|
|
if let Ok(pl1) = fs::read_to_string(rapl_path.join("constraint_0_power_limit_uw")) {
|
|
*self.original_pl1.lock().unwrap() = pl1.trim().parse().ok();
|
|
}
|
|
if let Ok(pl2) = fs::read_to_string(rapl_path.join("constraint_1_power_limit_uw")) {
|
|
*self.original_pl2.lock().unwrap() = pl2.trim().parse().ok();
|
|
}
|
|
}
|
|
|
|
let mut suppressed = self.suppressed_services.lock().unwrap();
|
|
for conflict_id in &self.fact_sheet.active_conflicts {
|
|
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
|
|
for service in &conflict.services {
|
|
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", service]).is_ok() {
|
|
let _ = self.ctx.runner.run("systemctl", &["stop", service]);
|
|
suppressed.push(service.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn restore(&self) -> Result<()> {
|
|
// Restore Power Limits
|
|
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
|
|
if let Some(pl1) = *self.original_pl1.lock().unwrap() {
|
|
let _ = fs::write(rapl_path.join("constraint_0_power_limit_uw"), pl1.to_string());
|
|
}
|
|
if let Some(pl2) = *self.original_pl2.lock().unwrap() {
|
|
let _ = fs::write(rapl_path.join("constraint_1_power_limit_uw"), pl2.to_string());
|
|
}
|
|
}
|
|
|
|
let mut suppressed = self.suppressed_services.lock().unwrap();
|
|
for service in suppressed.drain(..) {
|
|
let _ = self.ctx.runner.run("systemctl", &["start", &service]);
|
|
}
|
|
if self.is_dell() { let _ = self.set_fan_mode("auto"); }
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl HardwareWatchdog for GenericLinuxSal {
|
|
fn get_safety_status(&self) -> Result<SafetyStatus> {
|
|
let temp = self.get_temp()?;
|
|
if temp > 100.0 {
|
|
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal runaway: {:.1}°C", temp)));
|
|
}
|
|
let last = self.last_valid_temp.lock().unwrap();
|
|
if last.1.elapsed() > Duration::from_secs(5) {
|
|
return Ok(SafetyStatus::EmergencyAbort("Temperature sensor stalled".to_string()));
|
|
}
|
|
Ok(SafetyStatus::Nominal)
|
|
}
|
|
}
|
|
|
|
impl Drop for GenericLinuxSal {
|
|
fn drop(&mut self) { let _ = self.restore(); }
|
|
}
|