release/1.2.0 #2

Merged
nvrl merged 13 commits from release/1.2.0 into main 2026-03-01 00:32:04 +01:00
9 changed files with 116 additions and 29 deletions
Showing only changes of commit 4c4026a600 - Show all commits

View File

@@ -7,6 +7,7 @@
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
use std::collections::HashMap; use std::collections::HashMap;
use std::path::PathBuf; use std::path::PathBuf;
use tracing::warn;
pub mod formatters; pub mod formatters;
@@ -180,6 +181,18 @@ impl OptimizerEngine {
} }
} }
let best_pl = if max_score > f32::MIN {
best_pl
} else {
profile.points.last().map(|p| p.power_w).unwrap_or(15.0)
};
// Safety Floor: Never recommend a TDP below 5W, as this bricks system performance.
if best_pl < 5.0 {
warn!("Heuristic suggested dangerously low PL1 ({:.1}W). Falling back to 15W safety floor.", best_pl);
return 15.0;
}
best_pl best_pl
} }
} }

View File

@@ -161,6 +161,7 @@ fn main() -> Result<()> {
// 5. Spawn Backend Orchestrator // 5. Spawn Backend Orchestrator
let sal_backend = sal.clone(); let sal_backend = sal.clone();
let facts_backend = facts.clone(); let facts_backend = facts.clone();
let config_out = args.config_out.clone();
let backend_handle = thread::spawn(move || { let backend_handle = thread::spawn(move || {
let workload = Box::new(StressNg::new()); let workload = Box::new(StressNg::new());
let mut orchestrator = BenchmarkOrchestrator::new( let mut orchestrator = BenchmarkOrchestrator::new(
@@ -169,6 +170,7 @@ fn main() -> Result<()> {
workload, workload,
telemetry_tx, telemetry_tx,
command_rx, command_rx,
config_out,
); );
orchestrator.run() orchestrator.run()
}); });

View File

@@ -12,6 +12,7 @@ use sysinfo::System;
use std::sync::Arc; use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex; use std::sync::Mutex;
use std::path::PathBuf;
use crate::sal::traits::{PlatformSal, SafetyStatus}; use crate::sal::traits::{PlatformSal, SafetyStatus};
use crate::sal::heuristic::discovery::SystemFactSheet; use crate::sal::heuristic::discovery::SystemFactSheet;
@@ -40,6 +41,8 @@ pub struct BenchmarkOrchestrator {
profile: ThermalProfile, profile: ThermalProfile,
/// Mathematics engine for data smoothing and optimization. /// Mathematics engine for data smoothing and optimization.
engine: OptimizerEngine, engine: OptimizerEngine,
/// CLI override for the configuration output path.
optional_config_out: Option<PathBuf>,
/// Sliding window of power readings (Watts). /// Sliding window of power readings (Watts).
history_watts: VecDeque<f32>, history_watts: VecDeque<f32>,
@@ -67,6 +70,7 @@ impl BenchmarkOrchestrator {
workload: Box<dyn Workload>, workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>, telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>, command_rx: mpsc::Receiver<UiCommand>,
optional_config_out: Option<PathBuf>,
) -> Self { ) -> Self {
let mut sys = System::new_all(); let mut sys = System::new_all();
sys.refresh_all(); sys.refresh_all();
@@ -92,6 +96,7 @@ impl BenchmarkOrchestrator {
total_ram_gb, total_ram_gb,
emergency_abort: Arc::new(AtomicBool::new(false)), emergency_abort: Arc::new(AtomicBool::new(false)),
emergency_reason: Arc::new(Mutex::new(None)), emergency_reason: Arc::new(Mutex::new(None)),
optional_config_out,
} }
} }
@@ -222,12 +227,18 @@ impl BenchmarkOrchestrator {
trip_temp: res.max_temp_c.max(95.0), trip_temp: res.max_temp_c.max(95.0),
}; };
if let Some(throttled_path) = self.facts.paths.configs.get("throttled") { // 1. Throttled (Merged if exists)
crate::engine::formatters::throttled::ThrottledTranslator::save(throttled_path, &config)?; // PRIORITY: optional_config_out > facts discovery > fallback
self.log(&format!("✓ Saved '{}' (merged).", throttled_path.display()))?; let throttled_path = self.optional_config_out.clone()
res.config_paths.insert("throttled".to_string(), throttled_path.clone()); .or_else(|| self.facts.paths.configs.get("throttled").cloned());
if let Some(path) = throttled_path {
crate::engine::formatters::throttled::ThrottledTranslator::save(&path, &config)?;
self.log(&format!("✓ Saved '{}'.", path.display()))?;
res.config_paths.insert("throttled".to_string(), path.clone());
} }
// 2. i8kmon
if let Some(i8k_path) = self.facts.paths.configs.get("i8kmon") { if let Some(i8k_path) = self.facts.paths.configs.get("i8kmon") {
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig { let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
t_ambient: self.profile.ambient_temp, t_ambient: self.profile.ambient_temp,

View File

@@ -22,6 +22,11 @@ pub struct DellXps9380Sal {
suppressed_services: Mutex<Vec<String>>, suppressed_services: Mutex<Vec<String>>,
msr_file: Mutex<fs::File>, msr_file: Mutex<fs::File>,
last_energy: Mutex<(u64, Instant)>, last_energy: Mutex<(u64, Instant)>,
// --- Original State for Restoration ---
original_pl1: Mutex<Option<u64>>,
original_pl2: Mutex<Option<u64>>,
original_fan_mode: Mutex<Option<String>>,
} }
impl DellXps9380Sal { impl DellXps9380Sal {
@@ -53,6 +58,9 @@ impl DellXps9380Sal {
last_energy: Mutex::new((initial_energy, Instant::now())), last_energy: Mutex::new((initial_energy, Instant::now())),
fact_sheet: facts, fact_sheet: facts,
ctx, ctx,
original_pl1: Mutex::new(None),
original_pl2: Mutex::new(None),
original_fan_mode: Mutex::new(None),
}) })
} }
@@ -126,12 +134,25 @@ impl PreflightAuditor for DellXps9380Sal {
impl EnvironmentGuard for DellXps9380Sal { impl EnvironmentGuard for DellXps9380Sal {
fn suppress(&self) -> Result<()> { fn suppress(&self) -> Result<()> {
// 1. Snapshot Power Limits
if let Ok(pl1) = fs::read_to_string(&self.pl1_path) {
*self.original_pl1.lock().unwrap() = pl1.trim().parse().ok();
}
if let Ok(pl2) = fs::read_to_string(&self.pl2_path) {
*self.original_pl2.lock().unwrap() = pl2.trim().parse().ok();
}
// 2. Snapshot Fan Mode (Assumption: Dell BIOS Fan Control is active)
// We can't easily read current state of dell-bios-fan-control, so we assume 'auto' (1)
*self.original_fan_mode.lock().unwrap() = Some("1".to_string());
// 3. Stop Services
let services = ["tlp", "thermald", "i8kmon"]; let services = ["tlp", "thermald", "i8kmon"];
let mut suppressed = self.suppressed_services.lock().unwrap(); let mut suppressed = self.suppressed_services.lock().unwrap();
for s in services { for s in services {
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() { if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() {
debug!("Suppressing service: {}", s); debug!("Suppressing service: {}", s);
self.ctx.runner.run("systemctl", &["stop", s])?; let _ = self.ctx.runner.run("systemctl", &["stop", s]);
suppressed.push(s.to_string()); suppressed.push(s.to_string());
} }
} }
@@ -139,6 +160,20 @@ impl EnvironmentGuard for DellXps9380Sal {
} }
fn restore(&self) -> Result<()> { fn restore(&self) -> Result<()> {
// 1. Restore Power Limits
if let Some(pl1) = *self.original_pl1.lock().unwrap() {
let _ = fs::write(&self.pl1_path, pl1.to_string());
}
if let Some(pl2) = *self.original_pl2.lock().unwrap() {
let _ = fs::write(&self.pl2_path, pl2.to_string());
}
// 2. Restore Fan Mode (BIOS Control)
if let Some(tool_path) = self.fact_sheet.paths.tools.get("dell_fan_ctrl") {
let _ = self.ctx.runner.run(&tool_path.to_string_lossy(), &["1"]);
}
// 3. Restart Services
let mut suppressed = self.suppressed_services.lock().unwrap(); let mut suppressed = self.suppressed_services.lock().unwrap();
for s in suppressed.drain(..) { for s in suppressed.drain(..) {
let _ = self.ctx.runner.run("systemctl", &["start", &s]); let _ = self.ctx.runner.run("systemctl", &["start", &s]);
@@ -162,17 +197,23 @@ impl SensorBus for DellXps9380Sal {
} }
fn get_power_w(&self) -> Result<f32> { fn get_power_w(&self) -> Result<f32> {
if self.pwr_path.to_string_lossy().contains("energy_uj") { // FIX: Ensure we always read from energy_uj if available for delta calculation
let rapl_base = self.pl1_path.parent().context("RAPL path error")?;
let energy_path = rapl_base.join("energy_uj");
if energy_path.exists() {
let mut last = self.last_energy.lock().unwrap(); let mut last = self.last_energy.lock().unwrap();
let e2 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?; let e2_str = fs::read_to_string(&energy_path)?;
let e2 = e2_str.trim().parse::<u64>()?;
let t2 = Instant::now(); let t2 = Instant::now();
let (e1, t1) = *last; let (e1, t1) = *last;
let delta_e = e2.wrapping_sub(e1); let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32(); let delta_t = t2.duration_since(t1).as_secs_f32();
*last = (e2, t2); *last = (e2, t2);
if delta_t < 0.01 { return Ok(0.0); } if delta_t < 0.05 { return Ok(0.0); }
Ok((delta_e as f32 / 1_000_000.0) / delta_t) Ok((delta_e as f32 / 1_000_000.0) / delta_t)
} else { } else {
// Fallback to power1_average if it exists (units are µW)
let s = fs::read_to_string(&self.pwr_path)?; let s = fs::read_to_string(&self.pwr_path)?;
Ok(s.trim().parse::<f32>()? / 1000000.0) Ok(s.trim().parse::<f32>()? / 1000000.0)
} }

View File

@@ -3,6 +3,7 @@ use std::path::{Path};
use std::fs; use std::fs;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::sync::Mutex; use std::sync::Mutex;
use tracing::{debug};
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx}; use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
use crate::sal::heuristic::discovery::SystemFactSheet; use crate::sal::heuristic::discovery::SystemFactSheet;
@@ -16,6 +17,10 @@ pub struct GenericLinuxSal {
last_valid_temp: Mutex<(f32, Instant)>, last_valid_temp: Mutex<(f32, Instant)>,
current_pl1: Mutex<f32>, current_pl1: Mutex<f32>,
last_energy: Mutex<(u64, Instant)>, last_energy: Mutex<(u64, Instant)>,
// --- Original State for Restoration ---
original_pl1: Mutex<Option<u64>>,
original_pl2: Mutex<Option<u64>>,
} }
impl GenericLinuxSal { impl GenericLinuxSal {
@@ -34,6 +39,8 @@ impl GenericLinuxSal {
last_energy: Mutex::new((initial_energy, Instant::now())), last_energy: Mutex::new((initial_energy, Instant::now())),
fact_sheet: facts, fact_sheet: facts,
ctx, ctx,
original_pl1: Mutex::new(None),
original_pl2: Mutex::new(None),
} }
} }
@@ -95,7 +102,7 @@ impl SensorBus for GenericLinuxSal {
let delta_e = e2.wrapping_sub(e1); let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32(); let delta_t = t2.duration_since(t1).as_secs_f32();
*last = (e2, t2); *last = (e2, t2);
if delta_t < 0.01 { return Ok(0.0); } if delta_t < 0.05 { return Ok(0.0); }
Ok((delta_e as f32 / 1_000_000.0) / delta_t) Ok((delta_e as f32 / 1_000_000.0) / delta_t)
} }
@@ -160,12 +167,22 @@ impl ActuatorBus for GenericLinuxSal {
impl EnvironmentGuard for GenericLinuxSal { impl EnvironmentGuard for GenericLinuxSal {
fn suppress(&self) -> Result<()> { fn suppress(&self) -> Result<()> {
// Snapshot Power Limits
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
if let Ok(pl1) = fs::read_to_string(rapl_path.join("constraint_0_power_limit_uw")) {
*self.original_pl1.lock().unwrap() = pl1.trim().parse().ok();
}
if let Ok(pl2) = fs::read_to_string(rapl_path.join("constraint_1_power_limit_uw")) {
*self.original_pl2.lock().unwrap() = pl2.trim().parse().ok();
}
}
let mut suppressed = self.suppressed_services.lock().unwrap(); let mut suppressed = self.suppressed_services.lock().unwrap();
for conflict_id in &self.fact_sheet.active_conflicts { for conflict_id in &self.fact_sheet.active_conflicts {
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) { if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
for service in &conflict.services { for service in &conflict.services {
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", service]).is_ok() { if self.ctx.runner.run("systemctl", &["is-active", "--quiet", service]).is_ok() {
self.ctx.runner.run("systemctl", &["stop", service])?; let _ = self.ctx.runner.run("systemctl", &["stop", service]);
suppressed.push(service.clone()); suppressed.push(service.clone());
} }
} }
@@ -175,6 +192,16 @@ impl EnvironmentGuard for GenericLinuxSal {
} }
fn restore(&self) -> Result<()> { fn restore(&self) -> Result<()> {
// Restore Power Limits
if let Some(rapl_path) = self.fact_sheet.rapl_paths.first() {
if let Some(pl1) = *self.original_pl1.lock().unwrap() {
let _ = fs::write(rapl_path.join("constraint_0_power_limit_uw"), pl1.to_string());
}
if let Some(pl2) = *self.original_pl2.lock().unwrap() {
let _ = fs::write(rapl_path.join("constraint_1_power_limit_uw"), pl2.to_string());
}
}
let mut suppressed = self.suppressed_services.lock().unwrap(); let mut suppressed = self.suppressed_services.lock().unwrap();
for service in suppressed.drain(..) { for service in suppressed.drain(..) {
let _ = self.ctx.runner.run("systemctl", &["start", &service]); let _ = self.ctx.runner.run("systemctl", &["start", &service]);

View File

@@ -1,11 +1,11 @@
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration}; use std::time::{Duration};
use std::thread; use std::thread;
use std::sync::mpsc; use std::sync::mpsc;
use std::collections::HashMap; use std::collections::HashMap;
use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict, Discovery, Benchmarking}; use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict, Discovery, Benchmarking};
use crate::sys::SyscallRunner;
use tracing::{debug, warn}; use tracing::{debug, warn};
/// Registry of dynamically discovered paths for configs and tools. /// Registry of dynamically discovered paths for configs and tools.
@@ -31,6 +31,7 @@ pub struct SystemFactSheet {
/// Probes the system for hardware sensors, actuators, service conflicts, and paths. /// Probes the system for hardware sensors, actuators, service conflicts, and paths.
pub fn discover_facts( pub fn discover_facts(
base_path: &Path, base_path: &Path,
runner: &dyn SyscallRunner,
discovery: &Discovery, discovery: &Discovery,
conflicts: &[Conflict], conflicts: &[Conflict],
bench_config: Benchmarking, bench_config: Benchmarking,
@@ -45,7 +46,7 @@ pub fn discover_facts(
let mut active_conflicts = Vec::new(); let mut active_conflicts = Vec::new();
for conflict in conflicts { for conflict in conflicts {
for service in &conflict.services { for service in &conflict.services {
if is_service_active(service) { if is_service_active(runner, service) {
debug!("Detected active conflict: {} (Service: {})", conflict.id, service); debug!("Detected active conflict: {} (Service: {})", conflict.id, service);
active_conflicts.push(conflict.id.clone()); active_conflicts.push(conflict.id.clone());
break; break;
@@ -93,7 +94,6 @@ fn discover_paths(base_path: &Path, discovery: &Discovery) -> PathRegistry {
break; break;
} }
} }
// If not found, use the first one as default if any exist
if !registry.configs.contains_key(id) { if !registry.configs.contains_key(id) {
if let Some(first) = candidates.first() { if let Some(first) = candidates.first() {
registry.configs.insert(id.clone(), PathBuf::from(first)); registry.configs.insert(id.clone(), PathBuf::from(first));
@@ -142,7 +142,6 @@ fn discover_hwmon(base_path: &Path, cfg: &SensorDiscovery) -> (Option<PathBuf>,
for hw_entry in hw_entries.flatten() { for hw_entry in hw_entries.flatten() {
let file_name = hw_entry.file_name().into_string().unwrap_or_default(); let file_name = hw_entry.file_name().into_string().unwrap_or_default();
// Temperature Sensors
if file_name.starts_with("temp") && file_name.ends_with("_label") { if file_name.starts_with("temp") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) { if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.temp_labels.iter().any(|l| label.contains(l)) { if cfg.temp_labels.iter().any(|l| label.contains(l)) {
@@ -154,7 +153,6 @@ fn discover_hwmon(base_path: &Path, cfg: &SensorDiscovery) -> (Option<PathBuf>,
} }
} }
// Fan Sensors
if file_name.starts_with("fan") && file_name.ends_with("_label") { if file_name.starts_with("fan") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) { if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.fan_labels.iter().any(|l| label.contains(l)) { if cfg.fan_labels.iter().any(|l| label.contains(l)) {
@@ -206,18 +204,9 @@ fn discover_rapl(base_path: &Path, cfg: &ActuatorDiscovery) -> Vec<PathBuf> {
paths paths
} }
/// Checks if a systemd service is currently active. /// Checks if a systemd service is currently active using the injected runner.
pub fn is_service_active(service: &str) -> bool { pub fn is_service_active(runner: &dyn SyscallRunner, service: &str) -> bool {
let status = Command::new("systemctl") runner.run("systemctl", &["is-active", "--quiet", service]).is_ok()
.arg("is-active")
.arg("--quiet")
.arg(service)
.status();
match status {
Ok(s) => s.success(),
Err(_) => false,
}
} }
/// Helper to read a sysfs file with a timeout. /// Helper to read a sysfs file with a timeout.

View File

@@ -24,7 +24,7 @@ impl HeuristicEngine {
.context("Failed to parse hardware_db.toml")?; .context("Failed to parse hardware_db.toml")?;
// 2. Discover Facts // 2. Discover Facts
let facts = discover_facts(&ctx.sysfs_base, &db.discovery, &db.conflicts, db.benchmarking.clone()); let facts = discover_facts(&ctx.sysfs_base, ctx.runner.as_ref(), &db.discovery, &db.conflicts, db.benchmarking.clone());
info!("System Identity: {} {}", facts.vendor, facts.model); info!("System Identity: {} {}", facts.vendor, facts.model);
// 3. Routing Logic // 3. Routing Logic

View File

@@ -1,5 +1,6 @@
use ember_tune_rs::sal::heuristic::discovery::discover_facts; use ember_tune_rs::sal::heuristic::discovery::discover_facts;
use ember_tune_rs::sal::heuristic::schema::{Discovery, SensorDiscovery, ActuatorDiscovery, Benchmarking}; use ember_tune_rs::sal::heuristic::schema::{Discovery, SensorDiscovery, ActuatorDiscovery, Benchmarking};
use ember_tune_rs::sys::MockSyscallRunner;
use crate::common::fakesys::FakeSysBuilder; use crate::common::fakesys::FakeSysBuilder;
mod common; mod common;
@@ -35,7 +36,9 @@ fn test_heuristic_discovery_with_fakesys() {
power_steps_watts: vec![10.0, 15.0], power_steps_watts: vec![10.0, 15.0],
}; };
let facts = discover_facts(&fake.base_path(), &discovery, &[], benchmarking); let runner = MockSyscallRunner::new();
let facts = discover_facts(&fake.base_path(), &runner, &discovery, &[], benchmarking);
assert_eq!(facts.vendor, "Dell Inc."); assert_eq!(facts.vendor, "Dell Inc.");
assert_eq!(facts.model, "XPS 13 9380"); assert_eq!(facts.model, "XPS 13 9380");

View File

@@ -28,6 +28,7 @@ fn test_orchestrator_e2e_state_machine() {
workload, workload,
telemetry_tx, telemetry_tx,
command_rx, command_rx,
None,
); );
// For the purpose of this architecture audit, we've demonstrated the // For the purpose of this architecture audit, we've demonstrated the