fixed dangerous states to be applied
This commit is contained in:
@@ -22,6 +22,11 @@ pub struct DellXps9380Sal {
|
||||
suppressed_services: Mutex<Vec<String>>,
|
||||
msr_file: Mutex<fs::File>,
|
||||
last_energy: Mutex<(u64, Instant)>,
|
||||
|
||||
// --- Original State for Restoration ---
|
||||
original_pl1: Mutex<Option<u64>>,
|
||||
original_pl2: Mutex<Option<u64>>,
|
||||
original_fan_mode: Mutex<Option<String>>,
|
||||
}
|
||||
|
||||
impl DellXps9380Sal {
|
||||
@@ -53,6 +58,9 @@ impl DellXps9380Sal {
|
||||
last_energy: Mutex::new((initial_energy, Instant::now())),
|
||||
fact_sheet: facts,
|
||||
ctx,
|
||||
original_pl1: Mutex::new(None),
|
||||
original_pl2: Mutex::new(None),
|
||||
original_fan_mode: Mutex::new(None),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -126,12 +134,25 @@ impl PreflightAuditor for DellXps9380Sal {
|
||||
|
||||
impl EnvironmentGuard for DellXps9380Sal {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
// 1. Snapshot Power Limits
|
||||
if let Ok(pl1) = fs::read_to_string(&self.pl1_path) {
|
||||
*self.original_pl1.lock().unwrap() = pl1.trim().parse().ok();
|
||||
}
|
||||
if let Ok(pl2) = fs::read_to_string(&self.pl2_path) {
|
||||
*self.original_pl2.lock().unwrap() = pl2.trim().parse().ok();
|
||||
}
|
||||
|
||||
// 2. Snapshot Fan Mode (Assumption: Dell BIOS Fan Control is active)
|
||||
// We can't easily read current state of dell-bios-fan-control, so we assume 'auto' (1)
|
||||
*self.original_fan_mode.lock().unwrap() = Some("1".to_string());
|
||||
|
||||
// 3. Stop Services
|
||||
let services = ["tlp", "thermald", "i8kmon"];
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for s in services {
|
||||
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() {
|
||||
debug!("Suppressing service: {}", s);
|
||||
self.ctx.runner.run("systemctl", &["stop", s])?;
|
||||
let _ = self.ctx.runner.run("systemctl", &["stop", s]);
|
||||
suppressed.push(s.to_string());
|
||||
}
|
||||
}
|
||||
@@ -139,6 +160,20 @@ impl EnvironmentGuard for DellXps9380Sal {
|
||||
}
|
||||
|
||||
fn restore(&self) -> Result<()> {
|
||||
// 1. Restore Power Limits
|
||||
if let Some(pl1) = *self.original_pl1.lock().unwrap() {
|
||||
let _ = fs::write(&self.pl1_path, pl1.to_string());
|
||||
}
|
||||
if let Some(pl2) = *self.original_pl2.lock().unwrap() {
|
||||
let _ = fs::write(&self.pl2_path, pl2.to_string());
|
||||
}
|
||||
|
||||
// 2. Restore Fan Mode (BIOS Control)
|
||||
if let Some(tool_path) = self.fact_sheet.paths.tools.get("dell_fan_ctrl") {
|
||||
let _ = self.ctx.runner.run(&tool_path.to_string_lossy(), &["1"]);
|
||||
}
|
||||
|
||||
// 3. Restart Services
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for s in suppressed.drain(..) {
|
||||
let _ = self.ctx.runner.run("systemctl", &["start", &s]);
|
||||
@@ -162,17 +197,23 @@ impl SensorBus for DellXps9380Sal {
|
||||
}
|
||||
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
if self.pwr_path.to_string_lossy().contains("energy_uj") {
|
||||
// FIX: Ensure we always read from energy_uj if available for delta calculation
|
||||
let rapl_base = self.pl1_path.parent().context("RAPL path error")?;
|
||||
let energy_path = rapl_base.join("energy_uj");
|
||||
|
||||
if energy_path.exists() {
|
||||
let mut last = self.last_energy.lock().unwrap();
|
||||
let e2 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
|
||||
let e2_str = fs::read_to_string(&energy_path)?;
|
||||
let e2 = e2_str.trim().parse::<u64>()?;
|
||||
let t2 = Instant::now();
|
||||
let (e1, t1) = *last;
|
||||
let delta_e = e2.wrapping_sub(e1);
|
||||
let delta_t = t2.duration_since(t1).as_secs_f32();
|
||||
*last = (e2, t2);
|
||||
if delta_t < 0.01 { return Ok(0.0); }
|
||||
if delta_t < 0.05 { return Ok(0.0); }
|
||||
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
|
||||
} else {
|
||||
// Fallback to power1_average if it exists (units are µW)
|
||||
let s = fs::read_to_string(&self.pwr_path)?;
|
||||
Ok(s.trim().parse::<f32>()? / 1000000.0)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::path::{Path};
|
||||
use std::fs;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::sync::Mutex;
|
||||
use tracing::{debug};
|
||||
|
||||
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
|
||||
use crate::sal::heuristic::discovery::SystemFactSheet;
|
||||
@@ -16,6 +17,10 @@ pub struct GenericLinuxSal {
|
||||
last_valid_temp: Mutex<(f32, Instant)>,
|
||||
current_pl1: Mutex<f32>,
|
||||
last_energy: Mutex<(u64, Instant)>,
|
||||
|
||||
// --- Original State for Restoration ---
|
||||
original_pl1: Mutex<Option<u64>>,
|
||||
original_pl2: Mutex<Option<u64>>,
|
||||
}
|
||||
|
||||
impl GenericLinuxSal {
|
||||
@@ -34,6 +39,8 @@ impl GenericLinuxSal {
|
||||
last_energy: Mutex::new((initial_energy, Instant::now())),
|
||||
fact_sheet: facts,
|
||||
ctx,
|
||||
original_pl1: Mutex::new(None),
|
||||
original_pl2: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +102,7 @@ impl SensorBus for GenericLinuxSal {
|
||||
let delta_e = e2.wrapping_sub(e1);
|
||||
let delta_t = t2.duration_since(t1).as_secs_f32();
|
||||
*last = (e2, t2);
|
||||
if delta_t < 0.01 { return Ok(0.0); }
|
||||
if delta_t < 0.05 { return Ok(0.0); }
|
||||
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
|
||||
}
|
||||
|
||||
@@ -160,12 +167,22 @@ impl ActuatorBus for GenericLinuxSal {
|
||||
|
||||
impl EnvironmentGuard for GenericLinuxSal {
|
||||
fn suppress(&self) -> Result<()> {
|
||||
// Snapshot Power Limits
|
||||
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
|
||||
if let Ok(pl1) = fs::read_to_string(rapl_path.join("constraint_0_power_limit_uw")) {
|
||||
*self.original_pl1.lock().unwrap() = pl1.trim().parse().ok();
|
||||
}
|
||||
if let Ok(pl2) = fs::read_to_string(rapl_path.join("constraint_1_power_limit_uw")) {
|
||||
*self.original_pl2.lock().unwrap() = pl2.trim().parse().ok();
|
||||
}
|
||||
}
|
||||
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for conflict_id in &self.fact_sheet.active_conflicts {
|
||||
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
|
||||
for service in &conflict.services {
|
||||
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", service]).is_ok() {
|
||||
self.ctx.runner.run("systemctl", &["stop", service])?;
|
||||
let _ = self.ctx.runner.run("systemctl", &["stop", service]);
|
||||
suppressed.push(service.clone());
|
||||
}
|
||||
}
|
||||
@@ -175,6 +192,16 @@ impl EnvironmentGuard for GenericLinuxSal {
|
||||
}
|
||||
|
||||
fn restore(&self) -> Result<()> {
|
||||
// Restore Power Limits
|
||||
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
|
||||
if let Some(pl1) = *self.original_pl1.lock().unwrap() {
|
||||
let _ = fs::write(rapl_path.join("constraint_0_power_limit_uw"), pl1.to_string());
|
||||
}
|
||||
if let Some(pl2) = *self.original_pl2.lock().unwrap() {
|
||||
let _ = fs::write(rapl_path.join("constraint_1_power_limit_uw"), pl2.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let mut suppressed = self.suppressed_services.lock().unwrap();
|
||||
for service in suppressed.drain(..) {
|
||||
let _ = self.ctx.runner.run("systemctl", &["start", &service]);
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{Duration};
|
||||
use std::thread;
|
||||
use std::sync::mpsc;
|
||||
use std::collections::HashMap;
|
||||
use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict, Discovery, Benchmarking};
|
||||
use crate::sys::SyscallRunner;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
/// Registry of dynamically discovered paths for configs and tools.
|
||||
@@ -31,6 +31,7 @@ pub struct SystemFactSheet {
|
||||
/// Probes the system for hardware sensors, actuators, service conflicts, and paths.
|
||||
pub fn discover_facts(
|
||||
base_path: &Path,
|
||||
runner: &dyn SyscallRunner,
|
||||
discovery: &Discovery,
|
||||
conflicts: &[Conflict],
|
||||
bench_config: Benchmarking,
|
||||
@@ -45,7 +46,7 @@ pub fn discover_facts(
|
||||
let mut active_conflicts = Vec::new();
|
||||
for conflict in conflicts {
|
||||
for service in &conflict.services {
|
||||
if is_service_active(service) {
|
||||
if is_service_active(runner, service) {
|
||||
debug!("Detected active conflict: {} (Service: {})", conflict.id, service);
|
||||
active_conflicts.push(conflict.id.clone());
|
||||
break;
|
||||
@@ -93,7 +94,6 @@ fn discover_paths(base_path: &Path, discovery: &Discovery) -> PathRegistry {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If not found, use the first one as default if any exist
|
||||
if !registry.configs.contains_key(id) {
|
||||
if let Some(first) = candidates.first() {
|
||||
registry.configs.insert(id.clone(), PathBuf::from(first));
|
||||
@@ -142,7 +142,6 @@ fn discover_hwmon(base_path: &Path, cfg: &SensorDiscovery) -> (Option<PathBuf>,
|
||||
for hw_entry in hw_entries.flatten() {
|
||||
let file_name = hw_entry.file_name().into_string().unwrap_or_default();
|
||||
|
||||
// Temperature Sensors
|
||||
if file_name.starts_with("temp") && file_name.ends_with("_label") {
|
||||
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
|
||||
if cfg.temp_labels.iter().any(|l| label.contains(l)) {
|
||||
@@ -154,7 +153,6 @@ fn discover_hwmon(base_path: &Path, cfg: &SensorDiscovery) -> (Option<PathBuf>,
|
||||
}
|
||||
}
|
||||
|
||||
// Fan Sensors
|
||||
if file_name.starts_with("fan") && file_name.ends_with("_label") {
|
||||
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
|
||||
if cfg.fan_labels.iter().any(|l| label.contains(l)) {
|
||||
@@ -206,18 +204,9 @@ fn discover_rapl(base_path: &Path, cfg: &ActuatorDiscovery) -> Vec<PathBuf> {
|
||||
paths
|
||||
}
|
||||
|
||||
/// Checks if a systemd service is currently active.
|
||||
pub fn is_service_active(service: &str) -> bool {
|
||||
let status = Command::new("systemctl")
|
||||
.arg("is-active")
|
||||
.arg("--quiet")
|
||||
.arg(service)
|
||||
.status();
|
||||
|
||||
match status {
|
||||
Ok(s) => s.success(),
|
||||
Err(_) => false,
|
||||
}
|
||||
/// Checks if a systemd service is currently active using the injected runner.
|
||||
pub fn is_service_active(runner: &dyn SyscallRunner, service: &str) -> bool {
|
||||
runner.run("systemctl", &["is-active", "--quiet", service]).is_ok()
|
||||
}
|
||||
|
||||
/// Helper to read a sysfs file with a timeout.
|
||||
|
||||
@@ -24,7 +24,7 @@ impl HeuristicEngine {
|
||||
.context("Failed to parse hardware_db.toml")?;
|
||||
|
||||
// 2. Discover Facts
|
||||
let facts = discover_facts(&ctx.sysfs_base, &db.discovery, &db.conflicts, db.benchmarking.clone());
|
||||
let facts = discover_facts(&ctx.sysfs_base, ctx.runner.as_ref(), &db.discovery, &db.conflicts, db.benchmarking.clone());
|
||||
info!("System Identity: {} {}", facts.vendor, facts.model);
|
||||
|
||||
// 3. Routing Logic
|
||||
|
||||
Reference in New Issue
Block a user