implemented safety features to prevent system damage

This commit is contained in:
2026-02-27 02:47:51 +01:00
parent 4c4026a600
commit f0925a3ab3
9 changed files with 373 additions and 83 deletions

View File

@@ -1,10 +1,10 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep, SafetyStatus, EnvironmentCtx};
use crate::sal::safety::TdpLimitMicroWatts;
use anyhow::{Result, Context, anyhow};
use std::fs;
use std::path::{PathBuf};
use std::time::{Duration, Instant};
use std::sync::Mutex;
use tracing::{debug};
use crate::sal::heuristic::discovery::SystemFactSheet;
pub struct DellXps9380Sal {
@@ -151,7 +151,6 @@ impl EnvironmentGuard for DellXps9380Sal {
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in services {
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() {
debug!("Suppressing service: {}", s);
let _ = self.ctx.runner.run("systemctl", &["stop", s]);
suppressed.push(s.to_string());
}
@@ -251,18 +250,18 @@ impl ActuatorBus for DellXps9380Sal {
match mode {
"max" | "Manual" => { self.ctx.runner.run(&tool_str, &["0"])?; }
"auto" | "Auto" => { self.ctx.runner.run(&tool_str, &["1"])?; }
_ => { debug!("Unknown fan mode: {}", mode); }
_ => {}
}
Ok(())
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
fs::write(&self.pl1_path, ((watts * 1_000_000.0) as u64).to_string())?;
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
fs::write(&self.pl1_path, limit.as_u64().to_string())?;
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
fs::write(&self.pl2_path, ((watts * 1_000_000.0) as u64).to_string())?;
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
fs::write(&self.pl2_path, limit.as_u64().to_string())?;
Ok(())
}
}

View File

@@ -6,6 +6,7 @@ use std::sync::Mutex;
use tracing::{debug};
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
use crate::sal::safety::TdpLimitMicroWatts;
use crate::sal::heuristic::discovery::SystemFactSheet;
use crate::sal::heuristic::schema::HardwareDb;
@@ -15,7 +16,7 @@ pub struct GenericLinuxSal {
db: HardwareDb,
suppressed_services: Mutex<Vec<String>>,
last_valid_temp: Mutex<(f32, Instant)>,
current_pl1: Mutex<f32>,
current_pl1: Mutex<u64>,
last_energy: Mutex<(u64, Instant)>,
// --- Original State for Restoration ---
@@ -35,7 +36,7 @@ impl GenericLinuxSal {
db,
suppressed_services: Mutex::new(Vec::new()),
last_valid_temp: Mutex::new((0.0, Instant::now())),
current_pl1: Mutex::new(15.0),
current_pl1: Mutex::new(15_000_000),
last_energy: Mutex::new((initial_energy, Instant::now())),
fact_sheet: facts,
ctx,
@@ -151,16 +152,16 @@ impl ActuatorBus for GenericLinuxSal {
} else { Ok(()) }
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL1 path"))?;
fs::write(rapl_path.join("constraint_0_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
*self.current_pl1.lock().unwrap() = watts;
fs::write(rapl_path.join("constraint_0_power_limit_uw"), limit.as_u64().to_string())?;
*self.current_pl1.lock().unwrap() = limit.as_u64();
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL2 path"))?;
fs::write(rapl_path.join("constraint_1_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
fs::write(rapl_path.join("constraint_1_power_limit_uw"), limit.as_u64().to_string())?;
Ok(())
}
}

View File

@@ -1,4 +1,5 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep, SafetyStatus};
use crate::sal::safety::TdpLimitMicroWatts;
use anyhow::Result;
pub struct MockSal {
@@ -59,10 +60,10 @@ impl ActuatorBus for MockSal {
fn set_fan_mode(&self, _mode: &str) -> Result<()> {
Ok(())
}
fn set_sustained_power_limit(&self, _watts: f32) -> Result<()> {
fn set_sustained_power_limit(&self, _limit: TdpLimitMicroWatts) -> Result<()> {
Ok(())
}
fn set_burst_power_limit(&self, _watts: f32) -> Result<()> {
fn set_burst_power_limit(&self, _limit: TdpLimitMicroWatts) -> Result<()> {
Ok(())
}
}

View File

@@ -3,3 +3,4 @@ pub mod mock;
pub mod dell_xps_9380;
pub mod generic_linux;
pub mod heuristic;
pub mod safety;

175
src/sal/safety.rs Normal file
View File

@@ -0,0 +1,175 @@
//! Universal Safeguard Architecture (USA) and Hardware Primitives.
//!
//! This module provides the `HardwareStateGuard` for guaranteed state
//! restoration and type-safe primitives to prevent dangerous hardware states.
use anyhow::{Result, bail, Context};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use tracing::{info, warn, error};
// --- Type-Driven Safety Primitives ---
/// Represents a safe TDP limit in microwatts.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct TdpLimitMicroWatts(u64);
impl TdpLimitMicroWatts {
/// Strict bounds to prevent hardware bricking.
pub const MIN_SAFE_UW: u64 = 5_000_000; // 5 Watts
pub const MAX_SAFE_UW: u64 = 80_000_000; // 80 Watts
/// Constructs a new TdpLimitMicroWatts, enforcing safety bounds.
///
/// # Errors
/// Returns a `HardwareSafetyError` (via `anyhow::bail`) if the value is out of bounds.
pub fn new(microwatts: u64) -> Result<Self> {
if microwatts < Self::MIN_SAFE_UW {
bail!("HardwareSafetyError: Requested TDP {} uW is below the absolute safety floor of {} uW.", microwatts, Self::MIN_SAFE_UW);
}
if microwatts > Self::MAX_SAFE_UW {
bail!("HardwareSafetyError: Requested TDP {} uW exceeds absolute maximum of {} uW.", microwatts, Self::MAX_SAFE_UW);
}
Ok(Self(microwatts))
}
pub fn as_u64(&self) -> u64 {
self.0
}
pub fn as_watts(&self) -> f32 {
self.0 as f32 / 1_000_000.0
}
}
/// Represents a safe Fan Speed in Percentage (0-100).
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct FanSpeedPercentage(u8);
impl FanSpeedPercentage {
/// Constructs a new FanSpeedPercentage, enforcing safety bounds.
pub fn new(percent: u8) -> Result<Self> {
if percent > 100 {
bail!("HardwareSafetyError: Fan speed percentage {} exceeds 100%.", percent);
}
Ok(Self(percent))
}
pub fn as_u8(&self) -> u8 {
self.0
}
}
/// Represents a safe Thermal Threshold in Celsius.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct ThermalThresholdCelsius(f32);
impl ThermalThresholdCelsius {
pub const MAX_SAFE_C: f32 = 98.0;
/// Constructs a new ThermalThresholdCelsius, enforcing safety bounds.
pub fn new(celsius: f32) -> Result<Self> {
if celsius < 0.0 || celsius > Self::MAX_SAFE_C {
bail!("HardwareSafetyError: Thermal threshold {}°C is outside safe bounds (0.0 - {}).", celsius, Self::MAX_SAFE_C);
}
Ok(Self(celsius))
}
pub fn as_f32(&self) -> f32 {
self.0
}
}
// --- The HardwareStateGuard (RAII Restorer) ---
/// Represents a deep snapshot of the system state before benchmarking.
#[derive(Debug, Default, Clone)]
pub struct SystemSnapshot {
/// Maps file paths to their raw string content (e.g., RAPL limits).
pub sysfs_nodes: HashMap<PathBuf, String>,
/// List of services that were active and subsequently stopped.
pub suppressed_services: Vec<String>,
}
/// The Universal Safeguard wrapper.
///
/// Implements the "Ironclad Restorer" pattern via the [Drop] trait.
pub struct HardwareStateGuard {
snapshot: SystemSnapshot,
is_armed: bool,
}
impl HardwareStateGuard {
/// Arms the safeguard by taking a snapshot of the target files and services.
///
/// # Errors
/// Returns an error if any critical sysfs node cannot be read.
pub fn acquire(target_files: &[PathBuf], target_services: &[String]) -> Result<Self> {
let mut snapshot = SystemSnapshot::default();
info!("USA: Arming safeguard and snapshotting system state...");
for path in target_files {
if path.exists() {
let content = fs::read_to_string(path)
.with_context(|| format!("Failed to snapshot {:?}", path))?;
snapshot.sysfs_nodes.insert(path.clone(), content.trim().to_string());
} else {
warn!("USA: Target node {:?} does not exist, skipping snapshot.", path);
}
}
for service in target_services {
let status = std::process::Command::new("systemctl")
.args(["is-active", "--quiet", service])
.status();
if let Ok(s) = status {
if s.success() {
snapshot.suppressed_services.push(service.clone());
}
}
}
Ok(Self {
snapshot,
is_armed: true,
})
}
/// Explicit manual restoration (can be called upon successful exit).
pub fn release(&mut self) -> Result<()> {
if !self.is_armed {
return Ok(());
}
info!("USA: Initiating Ironclad Restoration...");
// 1. Restore Power/Sysfs states
for (path, content) in &self.snapshot.sysfs_nodes {
if let Err(e) = fs::write(path, content) {
error!("USA RESTORATION FAILURE: Could not revert {:?}: {}", path, e);
}
}
// 2. Restart Services
for service in &self.snapshot.suppressed_services {
let _ = std::process::Command::new("systemctl")
.args(["start", service])
.status();
}
self.is_armed = false;
Ok(())
}
}
impl Drop for HardwareStateGuard {
fn drop(&mut self) {
if self.is_armed {
warn!("USA: HardwareStateGuard triggered via Drop (panic/unexpected exit). Reverting system state...");
let _ = self.release();
}
}
}

View File

@@ -157,6 +157,8 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
}
}
use crate::sal::safety::TdpLimitMicroWatts;
/// Provides a write-only interface for hardware actuators.
pub trait ActuatorBus: Send + Sync {
/// Sets the fan control mode (e.g., "auto" or "max").
@@ -165,28 +167,28 @@ pub trait ActuatorBus: Send + Sync {
/// Returns an error if the fan control command or `sysfs` write fails.
fn set_fan_mode(&self, mode: &str) -> Result<()>;
/// Sets the sustained power limit (PL1) in Watts.
/// Sets the sustained power limit (PL1) using a validated wrapper.
///
/// # Errors
/// Returns an error if the RAPL `sysfs` node cannot be written to.
fn set_sustained_power_limit(&self, watts: f32) -> Result<()>;
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()>;
/// Sets the burst power limit (PL2) in Watts.
/// Sets the burst power limit (PL2) using a validated wrapper.
///
/// # Errors
/// Returns an error if the RAPL `sysfs` node cannot be written to.
fn set_burst_power_limit(&self, watts: f32) -> Result<()>;
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()>;
}
impl<T: ActuatorBus + ?Sized> ActuatorBus for Arc<T> {
fn set_fan_mode(&self, mode: &str) -> Result<()> {
(**self).set_fan_mode(mode)
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
(**self).set_sustained_power_limit(watts)
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
(**self).set_sustained_power_limit(limit)
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
(**self).set_burst_power_limit(watts)
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
(**self).set_burst_power_limit(limit)
}
}