implemented safety features to prevent system damage
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep, SafetyStatus, EnvironmentCtx};
|
||||
use crate::sal::safety::TdpLimitMicroWatts;
|
||||
use anyhow::{Result, Context, anyhow};
|
||||
use std::fs;
|
||||
use std::path::{PathBuf};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::sync::Mutex;
|
||||
use tracing::{debug};
|
||||
use crate::sal::heuristic::discovery::SystemFactSheet;
|
||||
|
||||
pub struct DellXps9380Sal {
|
||||
@@ -151,7 +151,6 @@ impl EnvironmentGuard for DellXps9380Sal {
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for s in services {
|
||||
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() {
|
||||
debug!("Suppressing service: {}", s);
|
||||
let _ = self.ctx.runner.run("systemctl", &["stop", s]);
|
||||
suppressed.push(s.to_string());
|
||||
}
|
||||
@@ -251,18 +250,18 @@ impl ActuatorBus for DellXps9380Sal {
|
||||
match mode {
|
||||
"max" | "Manual" => { self.ctx.runner.run(&tool_str, &["0"])?; }
|
||||
"auto" | "Auto" => { self.ctx.runner.run(&tool_str, &["1"])?; }
|
||||
_ => { debug!("Unknown fan mode: {}", mode); }
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
|
||||
fs::write(&self.pl1_path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
fs::write(&self.pl1_path, limit.as_u64().to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
|
||||
fs::write(&self.pl2_path, ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
fs::write(&self.pl2_path, limit.as_u64().to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::sync::Mutex;
|
||||
use tracing::{debug};
|
||||
|
||||
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
|
||||
use crate::sal::safety::TdpLimitMicroWatts;
|
||||
use crate::sal::heuristic::discovery::SystemFactSheet;
|
||||
use crate::sal::heuristic::schema::HardwareDb;
|
||||
|
||||
@@ -15,7 +16,7 @@ pub struct GenericLinuxSal {
|
||||
db: HardwareDb,
|
||||
suppressed_services: Mutex<Vec<String>>,
|
||||
last_valid_temp: Mutex<(f32, Instant)>,
|
||||
current_pl1: Mutex<f32>,
|
||||
current_pl1: Mutex<u64>,
|
||||
last_energy: Mutex<(u64, Instant)>,
|
||||
|
||||
// --- Original State for Restoration ---
|
||||
@@ -35,7 +36,7 @@ impl GenericLinuxSal {
|
||||
db,
|
||||
suppressed_services: Mutex::new(Vec::new()),
|
||||
last_valid_temp: Mutex::new((0.0, Instant::now())),
|
||||
current_pl1: Mutex::new(15.0),
|
||||
current_pl1: Mutex::new(15_000_000),
|
||||
last_energy: Mutex::new((initial_energy, Instant::now())),
|
||||
fact_sheet: facts,
|
||||
ctx,
|
||||
@@ -151,16 +152,16 @@ impl ActuatorBus for GenericLinuxSal {
|
||||
} else { Ok(()) }
|
||||
}
|
||||
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
|
||||
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL1 path"))?;
|
||||
fs::write(rapl_path.join("constraint_0_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
*self.current_pl1.lock().unwrap() = watts;
|
||||
fs::write(rapl_path.join("constraint_0_power_limit_uw"), limit.as_u64().to_string())?;
|
||||
*self.current_pl1.lock().unwrap() = limit.as_u64();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
|
||||
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL2 path"))?;
|
||||
fs::write(rapl_path.join("constraint_1_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
|
||||
fs::write(rapl_path.join("constraint_1_power_limit_uw"), limit.as_u64().to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep, SafetyStatus};
|
||||
use crate::sal::safety::TdpLimitMicroWatts;
|
||||
use anyhow::Result;
|
||||
|
||||
pub struct MockSal {
|
||||
@@ -59,10 +60,10 @@ impl ActuatorBus for MockSal {
|
||||
fn set_fan_mode(&self, _mode: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
fn set_sustained_power_limit(&self, _watts: f32) -> Result<()> {
|
||||
fn set_sustained_power_limit(&self, _limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
fn set_burst_power_limit(&self, _watts: f32) -> Result<()> {
|
||||
fn set_burst_power_limit(&self, _limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,3 +3,4 @@ pub mod mock;
|
||||
pub mod dell_xps_9380;
|
||||
pub mod generic_linux;
|
||||
pub mod heuristic;
|
||||
pub mod safety;
|
||||
|
||||
175
src/sal/safety.rs
Normal file
175
src/sal/safety.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
//! Universal Safeguard Architecture (USA) and Hardware Primitives.
|
||||
//!
|
||||
//! This module provides the `HardwareStateGuard` for guaranteed state
|
||||
//! restoration and type-safe primitives to prevent dangerous hardware states.
|
||||
|
||||
use anyhow::{Result, bail, Context};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tracing::{info, warn, error};
|
||||
|
||||
// --- Type-Driven Safety Primitives ---
|
||||
|
||||
/// Represents a safe TDP limit in microwatts.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct TdpLimitMicroWatts(u64);
|
||||
|
||||
impl TdpLimitMicroWatts {
|
||||
/// Strict bounds to prevent hardware bricking.
|
||||
pub const MIN_SAFE_UW: u64 = 5_000_000; // 5 Watts
|
||||
pub const MAX_SAFE_UW: u64 = 80_000_000; // 80 Watts
|
||||
|
||||
/// Constructs a new TdpLimitMicroWatts, enforcing safety bounds.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns a `HardwareSafetyError` (via `anyhow::bail`) if the value is out of bounds.
|
||||
pub fn new(microwatts: u64) -> Result<Self> {
|
||||
if microwatts < Self::MIN_SAFE_UW {
|
||||
bail!("HardwareSafetyError: Requested TDP {} uW is below the absolute safety floor of {} uW.", microwatts, Self::MIN_SAFE_UW);
|
||||
}
|
||||
if microwatts > Self::MAX_SAFE_UW {
|
||||
bail!("HardwareSafetyError: Requested TDP {} uW exceeds absolute maximum of {} uW.", microwatts, Self::MAX_SAFE_UW);
|
||||
}
|
||||
Ok(Self(microwatts))
|
||||
}
|
||||
|
||||
pub fn as_u64(&self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn as_watts(&self) -> f32 {
|
||||
self.0 as f32 / 1_000_000.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a safe Fan Speed in Percentage (0-100).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FanSpeedPercentage(u8);
|
||||
|
||||
impl FanSpeedPercentage {
|
||||
/// Constructs a new FanSpeedPercentage, enforcing safety bounds.
|
||||
pub fn new(percent: u8) -> Result<Self> {
|
||||
if percent > 100 {
|
||||
bail!("HardwareSafetyError: Fan speed percentage {} exceeds 100%.", percent);
|
||||
}
|
||||
Ok(Self(percent))
|
||||
}
|
||||
|
||||
pub fn as_u8(&self) -> u8 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a safe Thermal Threshold in Celsius.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
|
||||
pub struct ThermalThresholdCelsius(f32);
|
||||
|
||||
impl ThermalThresholdCelsius {
|
||||
pub const MAX_SAFE_C: f32 = 98.0;
|
||||
|
||||
/// Constructs a new ThermalThresholdCelsius, enforcing safety bounds.
|
||||
pub fn new(celsius: f32) -> Result<Self> {
|
||||
if celsius < 0.0 || celsius > Self::MAX_SAFE_C {
|
||||
bail!("HardwareSafetyError: Thermal threshold {}°C is outside safe bounds (0.0 - {}).", celsius, Self::MAX_SAFE_C);
|
||||
}
|
||||
Ok(Self(celsius))
|
||||
}
|
||||
|
||||
pub fn as_f32(&self) -> f32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
// --- The HardwareStateGuard (RAII Restorer) ---
|
||||
|
||||
/// Represents a deep snapshot of the system state before benchmarking.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct SystemSnapshot {
|
||||
/// Maps file paths to their raw string content (e.g., RAPL limits).
|
||||
pub sysfs_nodes: HashMap<PathBuf, String>,
|
||||
/// List of services that were active and subsequently stopped.
|
||||
pub suppressed_services: Vec<String>,
|
||||
}
|
||||
|
||||
/// The Universal Safeguard wrapper.
|
||||
///
|
||||
/// Implements the "Ironclad Restorer" pattern via the [Drop] trait.
|
||||
pub struct HardwareStateGuard {
|
||||
snapshot: SystemSnapshot,
|
||||
is_armed: bool,
|
||||
}
|
||||
|
||||
impl HardwareStateGuard {
|
||||
/// Arms the safeguard by taking a snapshot of the target files and services.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if any critical sysfs node cannot be read.
|
||||
pub fn acquire(target_files: &[PathBuf], target_services: &[String]) -> Result<Self> {
|
||||
let mut snapshot = SystemSnapshot::default();
|
||||
|
||||
info!("USA: Arming safeguard and snapshotting system state...");
|
||||
|
||||
for path in target_files {
|
||||
if path.exists() {
|
||||
let content = fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to snapshot {:?}", path))?;
|
||||
snapshot.sysfs_nodes.insert(path.clone(), content.trim().to_string());
|
||||
} else {
|
||||
warn!("USA: Target node {:?} does not exist, skipping snapshot.", path);
|
||||
}
|
||||
}
|
||||
|
||||
for service in target_services {
|
||||
let status = std::process::Command::new("systemctl")
|
||||
.args(["is-active", "--quiet", service])
|
||||
.status();
|
||||
|
||||
if let Ok(s) = status {
|
||||
if s.success() {
|
||||
snapshot.suppressed_services.push(service.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
snapshot,
|
||||
is_armed: true,
|
||||
})
|
||||
}
|
||||
|
||||
/// Explicit manual restoration (can be called upon successful exit).
|
||||
pub fn release(&mut self) -> Result<()> {
|
||||
if !self.is_armed {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("USA: Initiating Ironclad Restoration...");
|
||||
|
||||
// 1. Restore Power/Sysfs states
|
||||
for (path, content) in &self.snapshot.sysfs_nodes {
|
||||
if let Err(e) = fs::write(path, content) {
|
||||
error!("USA RESTORATION FAILURE: Could not revert {:?}: {}", path, e);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Restart Services
|
||||
for service in &self.snapshot.suppressed_services {
|
||||
let _ = std::process::Command::new("systemctl")
|
||||
.args(["start", service])
|
||||
.status();
|
||||
}
|
||||
|
||||
self.is_armed = false;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for HardwareStateGuard {
|
||||
fn drop(&mut self) {
|
||||
if self.is_armed {
|
||||
warn!("USA: HardwareStateGuard triggered via Drop (panic/unexpected exit). Reverting system state...");
|
||||
let _ = self.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,6 +157,8 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
|
||||
}
|
||||
}
|
||||
|
||||
use crate::sal::safety::TdpLimitMicroWatts;
|
||||
|
||||
/// Provides a write-only interface for hardware actuators.
|
||||
pub trait ActuatorBus: Send + Sync {
|
||||
/// Sets the fan control mode (e.g., "auto" or "max").
|
||||
@@ -165,28 +167,28 @@ pub trait ActuatorBus: Send + Sync {
|
||||
/// Returns an error if the fan control command or `sysfs` write fails.
|
||||
fn set_fan_mode(&self, mode: &str) -> Result<()>;
|
||||
|
||||
/// Sets the sustained power limit (PL1) in Watts.
|
||||
/// Sets the sustained power limit (PL1) using a validated wrapper.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the RAPL `sysfs` node cannot be written to.
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()>;
|
||||
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()>;
|
||||
|
||||
/// Sets the burst power limit (PL2) in Watts.
|
||||
/// Sets the burst power limit (PL2) using a validated wrapper.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the RAPL `sysfs` node cannot be written to.
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()>;
|
||||
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()>;
|
||||
}
|
||||
|
||||
impl<T: ActuatorBus + ?Sized> ActuatorBus for Arc<T> {
|
||||
fn set_fan_mode(&self, mode: &str) -> Result<()> {
|
||||
(**self).set_fan_mode(mode)
|
||||
}
|
||||
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
|
||||
(**self).set_sustained_power_limit(watts)
|
||||
fn set_sustained_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
(**self).set_sustained_power_limit(limit)
|
||||
}
|
||||
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
|
||||
(**self).set_burst_power_limit(watts)
|
||||
fn set_burst_power_limit(&self, limit: TdpLimitMicroWatts) -> Result<()> {
|
||||
(**self).set_burst_power_limit(limit)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user