1 Commits

Author SHA1 Message Date
9f00d6475b Merge pull request 'release/1.1.0' (#1) from release/1.1.0 into main
All checks were successful
Build and Release / release (push) Successful in 53s
Reviewed-on: #1
2026-02-26 14:25:29 +01:00
31 changed files with 474 additions and 2012 deletions

102
Cargo.lock generated
View File

@@ -513,7 +513,7 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "ember-tune-rs"
version = "1.2.0"
version = "1.1.0"
dependencies = [
"anyhow",
"clap",
@@ -526,17 +526,13 @@ dependencies = [
"num_cpus",
"owo-colors",
"ratatui",
"regex",
"serde",
"serde_json",
"sysinfo",
"tempfile",
"thiserror 2.0.18",
"toml",
"tracing",
"tracing-appender",
"tracing-subscriber",
"which",
]
[[package]]
@@ -545,12 +541,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "env_home"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe"
[[package]]
name = "equivalent"
version = "1.0.2"
@@ -596,12 +586,6 @@ dependencies = [
"regex",
]
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "filedescriptor"
version = "0.8.3"
@@ -1550,15 +1534,6 @@ dependencies = [
"zmij",
]
[[package]]
name = "serde_spanned"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
dependencies = [
"serde_core",
]
[[package]]
name = "sha2"
version = "0.10.9"
@@ -1712,19 +1687,6 @@ dependencies = [
"windows",
]
[[package]]
name = "tempfile"
version = "3.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
dependencies = [
"fastrand",
"getrandom 0.4.1",
"once_cell",
"rustix",
"windows-sys 0.61.2",
]
[[package]]
name = "terminal_size"
version = "0.4.3"
@@ -1890,45 +1852,6 @@ dependencies = [
"time-core",
]
[[package]]
name = "toml"
version = "1.0.3+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c"
dependencies = [
"indexmap",
"serde_core",
"serde_spanned",
"toml_datetime",
"toml_parser",
"toml_writer",
"winnow",
]
[[package]]
name = "toml_datetime"
version = "1.0.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e"
dependencies = [
"serde_core",
]
[[package]]
name = "toml_parser"
version = "1.0.9+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4"
dependencies = [
"winnow",
]
[[package]]
name = "toml_writer"
version = "1.0.6+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
[[package]]
name = "tracing"
version = "0.1.44"
@@ -2281,17 +2204,6 @@ dependencies = [
"wezterm-dynamic",
]
[[package]]
name = "which"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d"
dependencies = [
"env_home",
"rustix",
"winsafe",
]
[[package]]
name = "winapi"
version = "0.3.9"
@@ -2580,18 +2492,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]]
name = "winnow"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
[[package]]
name = "winsafe"
version = "0.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904"
[[package]]
name = "wit-bindgen"
version = "0.51.0"

View File

@@ -1,6 +1,6 @@
[package]
name = "ember-tune-rs"
version = "1.2.0"
version = "1.1.0"
edition = "2024"
authors = ["Nils Pukropp <nils@narl.io>"]
readme = "README.md"
@@ -28,9 +28,3 @@ tracing-appender = "0.2"
sysinfo = "0.38"
libc = "0.2"
num_cpus = "1.17"
toml = "1.0.3"
regex = "1.12.3"
which = "8.0.0"
[dev-dependencies]
tempfile = "3"

View File

@@ -1,82 +0,0 @@
## ⚙️ Development Setup
`ember-tune` is a standard Cargo project. You will need a recent Rust toolchain and common build utilities.
**Prerequisites:**
- `rustup`
- `build-essential` (or equivalent for your distribution)
- `libudev-dev`
```bash
# 1. Clone the repository
git clone https://gitea.com/narl/ember-tune.git
cd ember-tune
# 2. Build the release binary
cargo build --release
# 3. Run the test suite (safe, uses a virtual environment)
# This requires no special permissions and does not touch your hardware.
cargo test
```
**Running:**
Due to its direct hardware access, `ember-tune` requires root privileges.
```bash
# Run a full benchmark and generate optimized configs
sudo ./target/release/ember-tune
# Run a mock benchmark for UI/logic testing
sudo ./target/release/ember-tune --mock
```
---
## 🤝 Contributing Quirk Data (`hardware_db.toml`)
**This is the most impactful way to contribute.** `ember-tune`'s strength comes from its `assets/hardware_db.toml`, which encodes community knowledge about how to manage specific laptops. If your hardware isn't working perfectly, you can likely fix it by adding a new entry here.
The database is composed of four key sections: `conflicts`, `ecosystems`, `quirks`, and `discovery`.
### A. Reporting a Service Conflict
If a background service on your system interferes with `ember-tune`, add it to `[[conflicts]]`.
**Example:** Adding `laptop-mode-tools`.
```toml
[[conflicts]]
id = "laptop_mode_conflict"
services = ["laptop-mode.service"]
contention = "Multiple - I/O schedulers, Power limits"
severity = "Medium"
fix_action = "SuspendService" # Orchestrator will stop/start this service
help_text = "laptop-mode-tools can override power-related sysfs settings."
```
### B. Adding a New Hardware Ecosystem
If your laptop manufacturer (e.g., Razer) has a unique fan control tool or ACPI platform profile path, define it in `[ecosystems]`.
**Example:** A hypothetical "Razer" ecosystem.
```toml
[ecosystems.razer]
vendor_regex = "Razer"
# Path to the sysfs node that controls performance profiles
profiles_path = "/sys/bus/platform/drivers/razer_acpi/power_mode"
# Map human-readable names to the values the driver expects
policy_map = { Balanced = 0, Boost = 1, Silent = 2 }
```
### C. Defining a Model-Specific Quirk
If a specific laptop model has a bug (like a stuck sensor or incorrect fan reporting), define a `[[quirks]]` entry.
**Example:** A laptop whose fans report 0 RPM even when spinning.
```toml
[[quirks]]
model_regex = "HP Envy 15-ep.*"
id = "hp_fan_stuck_sensor"
issue = "Fan sensor reports 0 RPM when active."
# The 'action' tells the SAL to use a different method for fan detection.
action = "UseThermalVelocityFallback"
```
After adding your changes, run the test suite and then submit a Pull Request!

View File

@@ -1,5 +1,5 @@
[metadata]
version = "1.2.0"
version = "1.0.0"
updated = "2026-02-26"
description = "Hardware and Conflict Database for ember-tune Thermal Engine"
@@ -29,14 +29,6 @@ severity = "Medium"
fix_action = "SuspendService"
help_text = "Auto-cpufreq interferes with deterministic Silicon Knee identification."
[[conflicts]]
id = "dell_fan_collision"
services = ["i8kmon.service"]
contention = "Dell SMM Fan Control"
severity = "High"
fix_action = "SuspendService"
help_text = "i8kmon fights with ember-tune for SMM fan duty cycles. Suspend during benchmark."
# manufacturer wide logic
[ecosystems.dell]
@@ -46,7 +38,6 @@ drivers = ["dell_smm_hwmon"]
fan_manual_mode_cmd = "dell-bios-fan-control 0"
fan_auto_mode_cmd = "dell-bios-fan-control 1"
safety_register = "0x1FC" # BD PROCHOT MSR
help_text = "Dell systems often require 'SMM Security Mitigation' disabled in BIOS for fan control."
[ecosystems.lenovo]
vendor_regex = "LENOVO"
@@ -69,13 +60,6 @@ fan_boost_path = "/sys/devices/platform/hp-wmi/hwmon/hwmon*/pwm1_enable"
vendor_regex = "Framework"
ec_tool = "ectool"
optimization = "Direct-FFI-SMC"
polling_cap_ms = 500
[ecosystems.surface]
vendor_regex = "Microsoft Corporation"
product_regex = "Surface.*"
drivers = ["surface_acpi"]
profiles_path = "/sys/bus/platform/devices/surface_performance/platform_profile"
# quirks: model quirks and fixes
@@ -101,7 +85,6 @@ id = "asus_fan_hex_support"
issue = "Custom Hex Curve Interface"
target_path = "/sys/devices/platform/asus-nb-wmi/fan_curve"
format = "HexPair16"
action = "ManualFanControlRequired"
[[quirks]]
model_regex = "Spectre x360"
@@ -109,45 +92,20 @@ id = "hp_rapl_lockout"
issue = "Hardware MSR Lockout"
action = "WarnUserMSRLocked"
[[quirks]]
model_regex = "Framework.*"
id = "framework_prochot_stuck"
issue = "BD PROCHOT wedged at 200MHz"
monitor_msr = "0x1FC"
reset_bit = 0
action = "ClearBitOnSafeTemp"
# heuristic discovery
[discovery.sensors]
temp_labels = ["Package id 0", "Tdie", "Tctl", "CPU Temperature", "Core 0", "Composite"]
fan_labels = ["CPU Fan", "GPU Fan", "System Fan", "Processor Fan"]
hwmon_priority = ["coretemp", "zenpower", "k10temp", "dell_smm", "thinkpad", "asus"]
temp_labels = ["Package id 0", "Tdie", "Tctl", "CPU Temperature"]
fan_labels = ["CPU Fan", "GPU Fan", "System Fan"]
hwmon_priority = ["coretemp", "zenpower", "k10temp", "dell_smm"]
[discovery.actuators]
rapl_paths = ["intel-rapl:0", "package-0", "intel-rapl:1"]
rapl_paths = ["intel-rapl:0", "package-0"]
amd_energy_paths = ["zenpower/energy1_input", "k10temp/energy1_input"]
governor_files = ["energy_performance_preference", "energy_performance_hint", "scaling_governor"]
[discovery.configs]
throttled = ["/etc/throttled.conf", "/usr/local/etc/throttled.conf", "/etc/lenovo_fix.conf"]
i8kmon = ["/etc/i8kmon.conf", "/etc/default/i8kmon"]
tlp = ["/etc/tlp.conf", "/etc/default/tlp"]
[discovery.tools]
dell_fan_ctrl = "dell-bios-fan-control"
ectool = "ectool"
ryzenadj = "ryzenadj"
# env health verification
[benchmarking]
idle_duration_s = 10
stress_duration_min_s = 15
stress_duration_max_s = 45
cool_down_s = 5
power_steps_watts = [15.0, 20.0, 25.0, 30.0, 35.0]
[[preflight_checks]]
name = "MSR Write Access"
check_cmd = "grep -q 'msr.allow_writes=on' /proc/cmdline"
@@ -155,10 +113,5 @@ fail_help = "Add 'msr.allow_writes=on' to kernel parameters to allow power limit
[[preflight_checks]]
name = "Kernel Lockdown Status"
check_cmd = "cat /sys/kernel/security/lockdown | grep -q '\\[none\\]' || ! [ -f /sys/kernel/security/lockdown ]"
check_cmd = "cat /sys/kernel/security/lockdown | grep -q '\\[none\\]'"
fail_help = "Kernel Lockdown is enabled. MMIO/MSR actuators are restricted by the Linux Security Module."
[[preflight_checks]]
name = "Intel P-State Check"
check_cmd = "[ -d /sys/devices/system/cpu/intel_pstate ] || [ -d /sys/devices/system/cpu/cpufreq/policy0 ]"
fail_help = "CPU Frequency scaling driver not detected. Ensure intel_pstate or acpi-cpufreq is loaded."

View File

@@ -1,117 +0,0 @@
[metadata]
version = "1.0.0"
updated = "2026-02-26"
description = "Hardware and Conflict Database for ember-tune Thermal Engine"
# service collision
[[conflicts]]
id = "tlp_vs_ppd"
services = ["tlp.service", "power-profiles-daemon.service"]
contention = "ACPI Platform Profile / EPP"
severity = "Critical"
fix_action = "MaskBoth"
help_text = "TLP and Power-Profiles-Daemon fight over power envelopes. Mask both to allow ember-tune deterministic control."
[[conflicts]]
id = "thermal_logic_collision"
services = ["thermald.service", "throttled.service"]
contention = "RAPL / MSR / BD-PROCHOT"
severity = "High"
fix_action = "SuspendService"
help_text = "Thermald and Throttled create a 'register ping-pong' loop. Disable throttled; ember-tune will manage RAPL limits."
[[conflicts]]
id = "freq_scaling_collision"
services = ["auto-cpufreq.service"]
contention = "CPU Scaling Governor"
severity = "Medium"
fix_action = "SuspendService"
help_text = "Auto-cpufreq interferes with deterministic Silicon Knee identification."
# manufacturer wide logic
[ecosystems.dell]
vendor_regex = "(Dell.*|Precision.*|Latitude.*|XPS.*)"
polling_cap_ms = 1000
drivers = ["dell_smm_hwmon"]
fan_manual_mode_cmd = "dell-bios-fan-control 0"
fan_auto_mode_cmd = "dell-bios-fan-control 1"
safety_register = "0x1FC" # BD PROCHOT MSR
[ecosystems.lenovo]
vendor_regex = "LENOVO"
lap_mode_path = "/sys/devices/platform/thinkpad_acpi/dytc_lapmode"
profiles_path = "/sys/firmware/acpi/platform_profile"
ec_write_required = false # Varies by model
[ecosystems.asus]
vendor_regex = "ASUSTeK.*"
thermal_policy_path = "/sys/devices/platform/asus-nb-wmi/throttle_thermal_policy"
policy_map = { Balanced = 0, Turbo = 1, Silent = 2 }
[ecosystems.hp]
vendor_regex = "HP"
msr_lock_register = "0x610"
msr_lock_bit = 63
fan_boost_path = "/sys/devices/platform/hp-wmi/hwmon/hwmon*/pwm1_enable"
[ecosystems.framework]
vendor_regex = "Framework"
ec_tool = "ectool"
optimization = "Direct-FFI-SMC"
# quirks: model quirks and fixes
[[quirks]]
model_regex = "XPS 13 93.*"
id = "dell_bd_prochot_fix"
issue = "False Positive 400MHz Lock"
monitor_msr = "0x1FC"
reset_bit = 0
action = "ClearBitOnSafeTemp"
[[quirks]]
model_regex = "ThinkPad T14.*"
id = "lenovo_lap_throttling"
issue = "11W TDP Lock in Lap Mode"
trigger_path = "/sys/devices/platform/thinkpad_acpi/dytc_lapmode"
trigger_value = "1"
action = "AbortOnLapMode"
[[quirks]]
model_regex = "ROG Zephyrus G14"
id = "asus_fan_hex_support"
issue = "Custom Hex Curve Interface"
target_path = "/sys/devices/platform/asus-nb-wmi/fan_curve"
format = "HexPair16"
[[quirks]]
model_regex = "Spectre x360"
id = "hp_rapl_lockout"
issue = "Hardware MSR Lockout"
action = "WarnUserMSRLocked"
# heuristic discovery
[discovery.sensors]
temp_labels = ["Package id 0", "Tdie", "Tctl", "CPU Temperature"]
fan_labels = ["CPU Fan", "GPU Fan", "System Fan"]
hwmon_priority = ["coretemp", "zenpower", "k10temp", "dell_smm"]
[discovery.actuators]
rapl_paths = ["intel-rapl:0", "package-0"]
amd_energy_paths = ["zenpower/energy1_input", "k10temp/energy1_input"]
governor_files = ["energy_performance_preference", "energy_performance_hint", "scaling_governor"]
# env health verification
[[preflight_checks]]
name = "MSR Write Access"
check_cmd = "grep -q 'msr.allow_writes=on' /proc/cmdline"
fail_help = "Add 'msr.allow_writes=on' to kernel parameters to allow power limit manipulation."
[[preflight_checks]]
name = "Kernel Lockdown Status"
check_cmd = "cat /sys/kernel/security/lockdown | grep -q '\\[none\\]'"
fail_help = "Kernel Lockdown is enabled. MMIO/MSR actuators are restricted by the Linux Security Module."

View File

@@ -1,8 +1,3 @@
//! Defines the command-line interface for `ember-tune`.
//!
//! This module uses the `clap` crate to define the CLI arguments, subcommands,
//! and help text.
use clap::{Parser, builder::styling};
use std::path::PathBuf;
@@ -12,28 +7,27 @@ const STYLES: styling::Styles = styling::Styles::styled()
.literal(styling::AnsiColor::Cyan.on_default().bold())
.placeholder(styling::AnsiColor::Cyan.on_default());
/// Scientifically-driven hardware power and thermal optimizer.
#[derive(Parser, Debug)]
#[command(
name = "ember-tune",
author = "Nils Pukropp <nils@narl.io>",
version = "1.1.0",
about = "ember-tune: A physically-grounded thermal and power optimizer for Linux.",
long_about = "ember-tune transforms manual laptop tuning into a rigorous, automated engineering workflow. \nIt executes a state machine to find the 'Physical Sweet Spot' of your specific hardware by measuring \nthe Silicon Knee, Thermal Resistance (Rθ), and Thermal Inertia, then outputs optimal \nconfigurations for tools like 'throttled' or 'i8kmon'.",
version = "1.0.0",
about = "ember-tune: Scientifically-driven hardware power and thermal optimizer.",
long_about = "ember-tune transforms manual laptop tuning into a rigorous, automated engineering workflow. \nIt executes a state machine to find the 'Physical Sweet Spot' of your specific hardware by measuring \nthe Silicon Knee, Thermal Resistance (Rθ), and Thermal Inertia, then outputs optimal \nconfigurations for tools like 'throttled' or 'ryzenadj'.",
styles = STYLES,
after_help = "EXAMPLES:\n sudo ember-tune # Run standard optimization\n sudo ember-tune --audit-only # Validate system requirements only\n sudo ember-tune --mock # Safe demo with fake hardware"
after_help = "EXAMPLES:\n sudo ember-tune run # Run standard optimization\n sudo ember-tune run --dry-run # Audit and simulate without changes\n sudo ember-tune run --mock # Safe demo with fake hardware"
)]
pub struct Cli {
/// Path to output the final `throttled.conf` file.
/// Path to output the optimized configuration file
#[arg(
short,
long,
value_name = "THROTTLED_PATH",
help = "Optional: Overrides the discovered or default path for throttled.conf."
default_value = "throttled.conf",
help = "Destination for the generated configuration file (e.g. /etc/throttled.conf)"
)]
pub config_out: Option<PathBuf>,
pub config_out: PathBuf,
/// Maximum safe temperature (Celsius) for the benchmark.
/// Maximum safe temperature (Celsius) for the benchmark
#[arg(
short,
long,
@@ -42,7 +36,7 @@ pub struct Cli {
)]
pub max_temp: f32,
/// Enable verbose debug logging.
/// Enable verbose debug logging
#[arg(
short,
long,
@@ -50,17 +44,17 @@ pub struct Cli {
)]
pub verbose: bool,
/// Use a mock hardware layer for safe testing.
/// Use a mock hardware layer for safe testing
#[arg(
long,
help = "Emulates hardware responses. Ideal for testing UI/Logic on unsupported systems."
)]
pub mock: bool,
/// Run pre-flight audit only, then exit.
/// Run pre-flight audit only
#[arg(
long,
help = "Validate system requirements and conflicts without starting the benchmark."
help = "Validate system requirements and conflict management without starting the benchmark."
)]
pub audit_only: bool,
}

View File

@@ -1,66 +1,41 @@
use std::path::Path;
use anyhow::Result;
pub struct I8kmonConfig {
pub t_ambient: f32,
pub t_max_fan: f32,
pub thermal_resistance_kw: f32,
}
pub struct I8kmonTranslator;
impl I8kmonTranslator {
pub fn generate_conf(config: &I8kmonConfig) -> String {
// Higher resistance means we need to start fans sooner.
// If R_theta is 2.5 K/W, it's quite high for a laptop.
// We'll scale the 'low' threshold based on R_theta.
let aggression_factor = (config.thermal_resistance_kw / 1.5).clamp(0.8, 1.5);
let t_off = config.t_ambient + 5.0;
let t_low_on = config.t_ambient + (10.0 / aggression_factor);
let t_low_off = t_low_on - 2.0;
let t_low_on = config.t_ambient + 12.0;
let t_low_off = config.t_ambient + 10.0;
let t_high_on = config.t_max_fan;
let t_high_off = t_high_on - 5.0;
let t_mid_on = (t_low_on + t_high_on) / 2.0;
let t_mid_off = t_mid_on - 3.0;
let t_high_off = config.t_max_fan - 5.0;
let t_low_trigger = (config.t_max_fan - 15.0).max(t_low_on + 2.0);
format!(
r#"# Generated by ember-tune Optimizer
# Grounded in physical thermal resistance (Rθ = {r_theta:.3} K/W)
# Grounded in physical thermal resistance
set config(gen_shadow) 1
set config(i8k_ignore_dmi) 1
# Fan states: {{state_low state_high temp_on temp_off}}
# 0: Off
set config(0) {{0 0 {t_low_on:.0} {t_off:.0}}}
# 1: Low
set config(1) {{1 1 {t_mid_on:.0} {t_low_off:.0}}}
# 2: High
set config(2) {{2 2 {t_high_on:.0} {t_mid_off:.0}}}
set config(1) {{1 1 {t_low_trigger:.0} {t_low_off:.0}}}
set config(2) {{2 2 {t_high_on:.0} {t_high_off:.0}}}
# Hysteresis reference (internal use)
# High Off Threshold: {t_high_off:.0}
# Speed thresholds
# Speed thresholds (approximate for XPS 9380)
set config(speed_low) 2500
set config(speed_high) 4500
"#,
r_theta = config.thermal_resistance_kw,
t_low_on = t_low_on,
t_off = t_off,
t_mid_on = t_mid_on,
t_low_trigger = t_low_trigger,
t_low_off = t_low_off,
t_high_on = t_high_on,
t_mid_off = t_mid_off
t_high_off = t_high_off
)
}
pub fn save(path: &Path, config: &I8kmonConfig) -> Result<()> {
let content = Self::generate_conf(config);
std::fs::write(path, content)?;
Ok(())
}
}

View File

@@ -1,6 +1,4 @@
use std::collections::HashSet;
use std::path::Path;
use anyhow::{Result};
pub struct ThrottledConfig {
pub pl1_limit: f32,
@@ -40,11 +38,13 @@ Trip_Temp_C: {trip:.0}
}
/// Merges benchmarked values into an existing throttled.conf content.
/// Preserves all other sections (like [UnderVOLT]), comments, and formatting.
pub fn merge_conf(existing_content: &str, config: &ThrottledConfig) -> String {
let mut sections = Vec::new();
let mut current_section_name = String::new();
let mut current_section_lines = Vec::new();
// 1. Parse into sections to ensure we only update keys in [BATTERY] and [AC]
for line in existing_content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') && trimmed.ends_with(']') {
@@ -68,14 +68,17 @@ Trip_Temp_C: {trip:.0}
let mut result_lines = Vec::new();
let mut handled_sections = HashSet::new();
// 2. Process sections
for (name, mut lines) in sections {
if name == "BATTERY" || name == "AC" {
handled_sections.insert(name.clone());
let mut updated_keys = HashSet::new();
let mut new_lines = Vec::new();
for line in lines {
let mut updated = false;
let trimmed = line.trim();
if !trimmed.starts_with('#') && !trimmed.is_empty() {
if let Some((key, _)) = trimmed.split_once(':') {
let key = key.trim();
@@ -84,7 +87,11 @@ Trip_Temp_C: {trip:.0}
if let Some(colon_idx) = line.find(':') {
let prefix = &line[..colon_idx + 1];
let rest = &line[colon_idx + 1..];
let comment = if let Some(hash_idx) = rest.find('#') { &rest[hash_idx..] } else { "" };
let comment = if let Some(hash_idx) = rest.find('#') {
&rest[hash_idx..]
} else {
""
};
new_lines.push(format!("{} {}{}", prefix, new_value, comment));
updated_keys.insert(*target_key);
updated = true;
@@ -94,8 +101,12 @@ Trip_Temp_C: {trip:.0}
}
}
}
if !updated { new_lines.push(line); }
if !updated {
new_lines.push(line);
}
}
for (target_key, new_value) in &target_keys {
if !updated_keys.contains(*target_key) {
new_lines.push(format!("{}: {}", target_key, new_value));
@@ -106,6 +117,7 @@ Trip_Temp_C: {trip:.0}
result_lines.extend(lines);
}
// 3. Add missing sections if they didn't exist at all
for section_name in &["BATTERY", "AC"] {
if !handled_sections.contains(*section_name) {
result_lines.push(String::new());
@@ -115,13 +127,7 @@ Trip_Temp_C: {trip:.0}
}
}
}
result_lines.join("\n")
}
pub fn save(path: &Path, config: &ThrottledConfig) -> Result<()> {
let existing = if path.exists() { std::fs::read_to_string(path)? } else { String::new() };
let content = if existing.is_empty() { Self::generate_conf(config) } else { Self::merge_conf(&existing, config) };
std::fs::write(path, content)?;
Ok(())
}
}

View File

@@ -1,16 +1,7 @@
//! The core mathematics and physics engine for `ember-tune`.
//!
//! This module contains the `OptimizerEngine`, which is responsible for all
//! data smoothing, thermal resistance calculations, and the heuristic scoring
//! used to identify the "Silicon Knee".
use serde::{Serialize, Deserialize};
use std::collections::HashMap;
use std::path::PathBuf;
pub mod formatters;
/// A single, atomic data point captured during the benchmark.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ThermalPoint {
pub power_w: f32,
@@ -20,53 +11,33 @@ pub struct ThermalPoint {
pub throughput: f64,
}
/// A complete thermal profile containing all data points for a benchmark run.
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct ThermalProfile {
pub points: Vec<ThermalPoint>,
pub ambient_temp: f32,
}
/// The final, recommended parameters derived from the thermal benchmark.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizationResult {
/// The full thermal profile used for calculations.
pub profile: ThermalProfile,
/// The power level (in Watts) where performance-per-watt plateaus.
pub silicon_knee_watts: f32,
/// The measured thermal resistance of the system (Kelvin/Watt).
pub thermal_resistance_kw: f32,
/// The recommended sustained power limit (PL1).
pub recommended_pl1: f32,
/// The recommended burst power limit (PL2).
pub recommended_pl2: f32,
/// The maximum temperature reached during the test.
pub max_temp_c: f32,
/// Indicates if the benchmark was aborted before completion.
pub is_partial: bool,
/// A map of configuration files that were written to.
pub config_paths: HashMap<String, PathBuf>,
}
/// Pure mathematics engine for thermal optimization.
///
/// Contains no hardware I/O and operates solely on the collected [ThermalProfile].
pub struct OptimizerEngine {
/// The size of the sliding window for the `smooth` function.
window_size: usize,
}
impl OptimizerEngine {
/// Creates a new `OptimizerEngine`.
pub fn new(window_size: usize) -> Self {
Self { window_size }
}
/// Applies a simple moving average (SMA) filter with outlier rejection.
///
/// This function smooths noisy sensor data. It rejects any value in the
/// window that is more than 20.0 units away from the window's average
/// before calculating the final smoothed value.
/// Applies a simple moving average (SMA) filter to a stream of values.
pub fn smooth(&self, data: &[f32]) -> Vec<f32> {
if data.is_empty() { return vec![]; }
let mut smoothed = Vec::with_capacity(data.len());
@@ -74,35 +45,23 @@ impl OptimizerEngine {
for i in 0..data.len() {
let start = if i < self.window_size { 0 } else { i - self.window_size + 1 };
let end = i + 1;
let window = &data[start..end];
let avg: f32 = window.iter().sum::<f32>() / window.len() as f32;
let filtered: Vec<f32> = window.iter()
.filter(|&&v| (v - avg).abs() < 20.0) // Reject spikes > 20 units
.cloned().collect();
if filtered.is_empty() {
smoothed.push(avg);
} else {
smoothed.push(filtered.iter().sum::<f32>() / filtered.len() as f32);
}
let sum: f32 = data[start..end].iter().sum();
smoothed.push(sum / (end - start) as f32);
}
smoothed
}
/// Calculates Thermal Resistance: R_theta = (T_core - T_ambient) / P_package.
///
/// This function uses the data point with the highest power draw to ensure
/// the calculation reflects a system under maximum thermal load.
/// Calculates Thermal Resistance: R_theta = (T_core - T_ambient) / P_package
pub fn calculate_thermal_resistance(&self, profile: &ThermalProfile) -> f32 {
profile.points.iter()
.filter(|p| p.power_w > 1.0 && p.temp_c > 30.0) // Filter invalid data
.max_by(|a, b| a.power_w.partial_cmp(&b.power_w).unwrap_or(std::cmp::Ordering::Equal))
.map(|p| (p.temp_c - profile.ambient_temp) / p.power_w)
.map(|p| {
if p.power_w < 1.0 { 0.0 }
else { (p.temp_c - profile.ambient_temp) / p.power_w }
})
.unwrap_or(0.0)
}
/// Returns the maximum temperature recorded in the profile.
pub fn get_max_temp(&self, profile: &ThermalProfile) -> f32 {
profile.points.iter()
.map(|p| p.temp_c)
@@ -110,27 +69,14 @@ impl OptimizerEngine {
.unwrap_or(0.0)
}
/// Finds the "Silicon Knee" - the point where performance-per-watt (efficiency)
/// Finds the "Silicon Knee" - the point where performance per watt (efficiency)
/// starts to diminish significantly and thermal density spikes.
///
/// This heuristic scoring model balances several factors:
/// 1. **Efficiency Drop:** How quickly does performance-per-watt decrease as power increases?
/// 2. **Thermal Acceleration:** How quickly does temperature rise per additional Watt?
/// 3. **Throttling Penalty:** A large penalty is applied if absolute performance drops, indicating a thermal wall.
///
/// The "Knee" is the power level with the highest score, representing the optimal
/// balance before thermal saturation causes diminishing returns.
pub fn find_silicon_knee(&self, profile: &ThermalProfile) -> f32 {
let valid_points: Vec<_> = profile.points.iter()
.filter(|p| p.power_w > 5.0 && p.temp_c > 40.0) // Filter idle/noise
.cloned()
.collect();
if valid_points.len() < 3 {
if profile.points.len() < 3 {
return profile.points.last().map(|p| p.power_w).unwrap_or(15.0);
}
let mut points = valid_points;
let mut points = profile.points.clone();
points.sort_by(|a, b| a.power_w.partial_cmp(&b.power_w).unwrap_or(std::cmp::Ordering::Equal));
let mut best_pl = points[0].power_w;
@@ -142,36 +88,35 @@ impl OptimizerEngine {
let curr = &points[i];
let next = &points[i + 1];
// 1. Efficiency Metric (Throughput per Watt or Freq per Watt)
// 1. Efficiency Metric (Throughput per Watt)
// If throughput is 0 (unsupported), fallback to Frequency per Watt
let efficiency_curr = if curr.throughput > 0.0 {
curr.throughput as f32 / curr.power_w.max(1.0)
curr.throughput as f32 / curr.power_w.max(0.1)
} else {
curr.freq_mhz / curr.power_w.max(1.0)
curr.freq_mhz / curr.power_w.max(0.1)
};
let efficiency_next = if next.throughput > 0.0 {
next.throughput as f32 / next.power_w.max(1.0)
next.throughput as f32 / next.power_w.max(0.1)
} else {
next.freq_mhz / next.power_w.max(1.0)
next.freq_mhz / next.power_w.max(0.1)
};
let p_delta = (next.power_w - curr.power_w).max(0.5);
let efficiency_drop = (efficiency_curr - efficiency_next) / p_delta;
// Diminishing returns: how much efficiency drops per additional watt
let efficiency_drop = (efficiency_curr - efficiency_next) / (next.power_w - curr.power_w).max(0.1);
// 2. Thermal Acceleration (d2T/dW2)
let p_delta_prev = (curr.power_w - prev.power_w).max(0.5);
let p_delta_next = (next.power_w - curr.power_w).max(0.5);
let dt_dw_prev = (curr.temp_c - prev.temp_c) / (curr.power_w - prev.power_w).max(0.1);
let dt_dw_next = (next.temp_c - curr.temp_c) / (next.power_w - curr.power_w).max(0.1);
let temp_accel = (dt_dw_next - dt_dw_prev) / (next.power_w - prev.power_w).max(0.1);
let dt_dw_prev = (curr.temp_c - prev.temp_c) / p_delta_prev;
let dt_dw_next = (next.temp_c - curr.temp_c) / p_delta_next;
let p_total_delta = (next.power_w - prev.power_w).max(1.0);
let temp_accel = (dt_dw_next - dt_dw_prev) / p_total_delta;
// 3. Wall Detection (Any drop in absolute performance is a hard wall)
// 3. Wall Detection (Any drop in absolute frequency/throughput is a hard wall)
let is_throttling = next.freq_mhz < curr.freq_mhz || (next.throughput > 0.0 && next.throughput < curr.throughput);
let penalty = if is_throttling { 5000.0 } else { 0.0 };
// Heuristic scoring:
// - Higher score is "Better" (The Knee is the peak of this curve)
// - We want high efficiency (low drop) and low thermal acceleration.
let score = (efficiency_curr * 10.0) - (efficiency_drop * 50.0) - (temp_accel * 20.0) - penalty;
if score > max_score {

View File

@@ -1,14 +0,0 @@
//! # ember-tune: A physically-grounded thermal and power optimizer for Linux.
//!
//! This crate provides the core library for `ember-tune`, a tool that
//! scientifically determines the optimal power and thermal settings for laptops
//! by measuring physical properties like Thermal Resistance and the "Silicon Knee".
pub mod mediator;
pub mod sal;
pub mod load;
pub mod orchestrator;
pub mod ui;
pub mod engine;
pub mod cli;
pub mod sys;

View File

@@ -1,35 +1,16 @@
//! Defines the `Workload` trait for generating synthetic CPU/GPU load.
use anyhow::Result;
use std::process::Child;
use std::time::{Duration, Instant};
use std::thread;
/// A trait for objects that can generate a measurable system load.
pub trait Workload: Send + Sync {
/// Starts the workload with the specified number of threads and load percentage.
///
/// # Errors
/// Returns an error if the underlying stress test process fails to spawn.
pub trait Workload {
/// Starts the workload with specified threads and load percentage.
fn start(&mut self, threads: usize, load_percent: usize) -> Result<()>;
/// Stops the workload gracefully.
///
/// # Errors
/// This method should aim to not fail, but may return an error if
/// forcefully killing the child process fails.
/// Stops the workload.
fn stop(&mut self) -> Result<()>;
/// Returns the current throughput of the workload (e.g., ops/sec).
///
/// # Errors
/// Returns an error if throughput cannot be measured.
/// Returns the current throughput (e.g., ops/sec).
fn get_throughput(&self) -> Result<f64>;
}
/// An implementation of `Workload` that uses the `stress-ng` utility.
pub struct StressNg {
child: Option<Child>,
child: Option<std::process::Child>,
}
impl StressNg {
@@ -40,7 +21,7 @@ impl StressNg {
impl Workload for StressNg {
fn start(&mut self, threads: usize, load_percent: usize) -> Result<()> {
self.stop()?;
self.stop()?; // Ensure any previous instance is stopped
let child = std::process::Command::new("stress-ng")
.args([
@@ -56,34 +37,15 @@ impl Workload for StressNg {
fn stop(&mut self) -> Result<()> {
if let Some(mut child) = self.child.take() {
#[cfg(unix)]
{
use libc::{kill, SIGTERM};
unsafe { kill(child.id() as i32, SIGTERM); }
}
let start = Instant::now();
let mut exited = false;
while start.elapsed() < Duration::from_secs(2) {
if let Ok(Some(_)) = child.try_wait() {
exited = true;
break;
}
thread::sleep(Duration::from_millis(100));
}
if !exited {
let _ = child.kill();
let _ = child.wait();
}
let _ = child.kill();
let _ = child.wait();
}
Ok(())
}
/// Returns the current throughput of the workload (e.g., ops/sec).
///
/// This is currently a stub and does not parse `stress-ng` output.
fn get_throughput(&self) -> Result<f64> {
// In a real implementation, we would parse stress-ng's temporary results
// or use a different workload that provides live throughput.
Ok(0.0)
}
}

View File

@@ -1,3 +1,11 @@
mod mediator;
mod sal;
mod load;
mod orchestrator;
mod ui;
mod engine;
mod cli;
use miette::{Result, IntoDiagnostic, Diagnostic, Report};
use thiserror::Error;
use std::sync::mpsc;
@@ -17,16 +25,15 @@ use crossterm::{
};
use ratatui::{backend::CrosstermBackend, Terminal};
use ember_tune_rs::cli::Cli;
use ember_tune_rs::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use ember_tune_rs::sal::traits::{AuditError, PlatformSal};
use ember_tune_rs::sal::mock::MockSal;
use ember_tune_rs::sal::heuristic::engine::HeuristicEngine;
use ember_tune_rs::sal::heuristic::discovery::SystemFactSheet;
use ember_tune_rs::load::{StressNg};
use ember_tune_rs::orchestrator::BenchmarkOrchestrator;
use ember_tune_rs::ui::dashboard::{draw_dashboard, DashboardState};
use ember_tune_rs::engine::OptimizationResult;
use cli::Cli;
use mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError};
use sal::mock::{MockAuditor, MockGuard, MockSensorBus, MockActuatorBus, MockWatchdog};
use sal::dell_xps_9380::DellXps9380Sal;
use load::StressNg;
use orchestrator::BenchmarkOrchestrator;
use ui::dashboard::{draw_dashboard, DashboardState};
use engine::OptimizationResult;
use owo_colors::OwoColorize;
#[derive(Error, Diagnostic, Debug)]
@@ -60,10 +67,9 @@ fn print_summary_report(result: &OptimizationResult) {
println!("│ Burst (PL2): {:>5.1} W │", result.recommended_pl2);
println!("│ │");
println!("{}", "Apply these to your system:".bold().magenta());
for (id, path) in &result.config_paths {
println!("{:<10}: {:<34}", id, path.display());
}
println!("{}", "Apply to /etc/throttled.conf:".bold().magenta());
println!("│ PL1_Tdp_W: {:<5.1}", result.recommended_pl1);
println!("PL2_Tdp_W: {:<5.1}", result.recommended_pl2);
println!("╰──────────────────────────────────────────────────╯");
println!();
}
@@ -101,25 +107,27 @@ fn main() -> Result<()> {
info!("ember-tune starting with args: {:?}", args);
let ctx = ember_tune_rs::sal::traits::EnvironmentCtx::production();
// 2. Platform Detection & Audit
let (sal_box, facts): (Box<dyn PlatformSal>, SystemFactSheet) = if args.mock {
(Box::new(MockSal::new()), SystemFactSheet::default())
// 2. Pre-flight Audit (Before TUI)
let auditor: Arc<dyn PreflightAuditor> = if args.mock {
Arc::new(MockAuditor)
} else {
HeuristicEngine::detect_and_build(ctx)?
match DellXps9380Sal::init() {
Ok(sal) => Arc::new(sal),
Err(e) => return Err(miette::miette!("Failed to initialize Dell SAL: {}", e)),
}
};
let sal: Arc<dyn PlatformSal> = sal_box.into();
println!("{}", console::style("─── Pre-flight System Audit ───").bold().cyan());
let mut audit_failures = Vec::new();
for step in sal.audit() {
for step in auditor.audit() {
print!(" Checking {:<40} ", step.description);
io::Write::flush(&mut io::stdout()).into_diagnostic()?;
match step.outcome {
Ok(_) => { println!("{}", console::style("[✓]").green()); }
Ok(_) => {
println!("{}", console::style("[✓]").green());
}
Err(e) => {
println!("{}", console::style("[✗]").red());
audit_failures.push(e);
@@ -132,8 +140,10 @@ fn main() -> Result<()> {
return Err(Report::new(MultiAuditError { errors: audit_failures }));
}
println!("{}", console::style("✓ All pre-flight audits passed.").green().bold());
thread::sleep(Duration::from_secs(1));
if args.audit_only {
println!("{}", console::style("✓ All pre-flight audits passed.").green().bold());
return Ok(());
}
@@ -141,9 +151,8 @@ fn main() -> Result<()> {
enable_raw_mode().into_diagnostic()?;
let mut stdout = io::stdout();
execute!(stdout, EnterAlternateScreen).into_diagnostic()?;
let backend_stdout = io::stdout();
let backend_term = CrosstermBackend::new(backend_stdout);
let mut terminal = Terminal::new(backend_term).into_diagnostic()?;
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend).into_diagnostic()?;
// 4. State & Communication Setup
let running = Arc::new(AtomicBool::new(true));
@@ -152,24 +161,50 @@ fn main() -> Result<()> {
let (telemetry_tx, telemetry_rx) = mpsc::channel::<TelemetryState>();
let (command_tx, command_rx) = mpsc::channel::<UiCommand>();
let c_tx = command_tx.clone();
ctrlc::set_handler(move || {
let _ = c_tx.send(UiCommand::Abort);
r.store(false, Ordering::SeqCst);
}).expect("Error setting Ctrl-C handler");
// 5. Spawn Backend Orchestrator
let sal_backend = sal.clone();
let facts_backend = facts.clone();
let is_mock = args.mock;
let b_auditor = auditor.clone();
let backend_handle = thread::spawn(move || {
let (guard, sensors, actuators, watchdog): (
Box<dyn EnvironmentGuard>,
Box<dyn SensorBus>,
Box<dyn ActuatorBus>,
Box<dyn HardwareWatchdog>,
) = if is_mock {
(
Box::new(MockGuard::new()),
Box::new(MockSensorBus),
Box::new(MockActuatorBus),
Box::new(MockWatchdog),
)
} else {
// Re-init or share the SAL
let sal = Arc::new(DellXps9380Sal::init().expect("Failed to init Dell SAL in backend"));
(
Box::new(sal::dell_xps_9380::DellXps9380Guard::new()),
Box::new(sal.clone() as Arc<dyn SensorBus>),
Box::new(sal.clone() as Arc<dyn ActuatorBus>),
Box::new(sal as Arc<dyn HardwareWatchdog>),
)
};
let workload = Box::new(StressNg::new());
let mut orchestrator = BenchmarkOrchestrator::new(
sal_backend,
facts_backend,
Box::new(b_auditor),
guard,
sensors,
actuators,
watchdog,
workload,
telemetry_tx,
command_rx,
);
orchestrator.run()
});
@@ -193,8 +228,6 @@ fn main() -> Result<()> {
history_mhz: Vec::new(),
log_event: None,
metadata: std::collections::HashMap::new(),
is_emergency: false,
emergency_reason: None,
};
let tick_rate = Duration::from_millis(100);
@@ -223,7 +256,7 @@ fn main() -> Result<()> {
while let Ok(new_state) = telemetry_rx.try_recv() {
if let Some(log) = &new_state.log_event {
ui_state.add_log(log.clone());
ui_state.logs.push(log.clone());
debug!("Backend Log: {}", log);
} else {
ui_state.update(&new_state);
@@ -231,38 +264,29 @@ fn main() -> Result<()> {
}
}
if last_tick.elapsed() >= tick_rate { last_tick = Instant::now(); }
if backend_handle.is_finished() { break; }
if last_tick.elapsed() >= tick_rate {
last_tick = Instant::now();
}
if backend_handle.is_finished() {
thread::sleep(Duration::from_secs(1));
break;
}
}
// 7. Terminal Restoration
let _ = disable_raw_mode();
let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen);
let _ = terminal.show_cursor();
disable_raw_mode().into_diagnostic()?;
execute!(terminal.backend_mut(), LeaveAlternateScreen).into_diagnostic()?;
terminal.show_cursor().into_diagnostic()?;
// 8. Final Report & Hardware Restoration
let join_res = backend_handle.join();
// Explicit hardware restoration
info!("Restoring hardware state...");
if let Err(e) = sal.restore() {
error!("Failed to restore hardware state: {}", e);
}
match join_res {
// 8. Final Report (Post-TUI)
match backend_handle.join() {
Ok(Ok(result)) => {
print_summary_report(&result);
}
Ok(Err(e)) => {
let err_str = e.to_string();
if err_str == "ABORTED" {
println!("{}", "Benchmark aborted by user.".yellow());
} else if err_str.contains("EMERGENCY_ABORT") {
println!();
println!("{}", " 🚨 EMERGENCY ABORT TRIGGERED ".bold().on_red().white());
println!("Reason: {}", err_str.replace("EMERGENCY_ABORT: ", "").red().bold());
println!("{}", "Hardware state has been restored to safe defaults.".yellow());
println!();
if e.to_string() == "ABORTED" {
println!("{}", "Benchmark aborted by user. No summary available.".yellow());
} else {
error!("Orchestrator encountered error: {}", e);
eprintln!("{} {}", "Error:".red().bold(), e);

View File

@@ -1,13 +1,5 @@
//! Defines the data structures used for communication between the frontend and backend.
//!
//! This module acts as the "Mediator" in the Mediator Pattern, providing the
//! message-passing interface for the MPSC channels that connect the TUI thread
//! with the `BenchmarkOrchestrator` thread.
use serde::{Serialize, Deserialize};
use std::collections::HashMap;
/// Defines the current high-level phase of the benchmark.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BenchmarkPhase {
Auditing,
@@ -17,41 +9,42 @@ pub enum BenchmarkPhase {
Finalizing,
}
/// A complete snapshot of system telemetry at a single point in time.
/// This struct is sent from the backend to the frontend on every tick.
#[derive(Debug, Clone, Serialize, Deserialize)]
impl Default for BenchmarkPhase {
fn default() -> Self {
Self::Auditing
}
}
#[derive(Debug, Clone)]
pub struct TelemetryState {
// --- Static System Info ---
// --- Static Info ---
pub cpu_model: String,
pub total_ram_gb: u64,
// --- Dynamic Metrics ---
// --- Dynamic States ---
pub tick: u64,
pub cpu_temp: f32,
pub power_w: f32,
pub current_freq: f32,
pub fans: Vec<u32>,
pub phase: BenchmarkPhase,
pub governor: String,
pub pl1_limit: f32,
pub pl2_limit: f32,
pub fan_tier: String,
pub phase: BenchmarkPhase,
// --- High-res History ---
// --- Instantaneous Metrics ---
pub cpu_temp: f32,
pub power_w: f32,
pub current_freq: f32,
pub fans: Vec<u32>,
// --- High-res History (Last 60s @ 500ms = 120 points) ---
pub history_watts: Vec<f32>,
pub history_temp: Vec<f32>,
pub history_mhz: Vec<f32>,
// --- Events & Metadata ---
pub log_event: Option<String>,
pub metadata: HashMap<String, String>,
pub is_emergency: bool,
pub emergency_reason: Option<String>,
pub metadata: std::collections::HashMap<String, String>,
}
/// Commands sent from the frontend (UI) to the backend (`BenchmarkOrchestrator`).
#[derive(Debug, Clone)]
pub enum UiCommand {
/// Signals the orchestrator to gracefully abort the benchmark.
Abort,
}

View File

@@ -1,69 +1,45 @@
//! The central state machine responsible for coordinating the thermal benchmark.
//!
//! It manages hardware interactions through the [PlatformSal], generates stress
//! using a [Workload], and feeds telemetry to the frontend via MPSC channels.
use anyhow::{Result, Context};
use std::sync::mpsc;
use std::time::{Duration, Instant};
use std::thread;
use std::collections::VecDeque;
use sysinfo::System;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex;
use crate::sal::traits::{PlatformSal, SafetyStatus};
use crate::sal::heuristic::discovery::SystemFactSheet;
use crate::sal::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog};
use crate::load::Workload;
use crate::mediator::{TelemetryState, UiCommand, BenchmarkPhase};
use crate::engine::{OptimizerEngine, ThermalProfile, ThermalPoint, OptimizationResult};
/// The central state machine responsible for coordinating the thermal benchmark.
///
/// It manages hardware interactions through the [PlatformSal], generates stress
/// using a [Workload], and feeds telemetry to the frontend via MPSC channels.
pub struct BenchmarkOrchestrator {
/// Injected hardware abstraction layer.
sal: Arc<dyn PlatformSal>,
/// Discovered system facts and paths.
facts: SystemFactSheet,
/// Heat generation workload.
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
workload: Box<dyn Workload>,
/// Channel for sending telemetry updates to the UI.
telemetry_tx: mpsc::Sender<TelemetryState>,
/// Channel for receiving commands from the UI.
command_rx: mpsc::Receiver<UiCommand>,
/// Current phase of the benchmark.
phase: BenchmarkPhase,
/// Accumulated thermal data points.
profile: ThermalProfile,
/// Mathematics engine for data smoothing and optimization.
engine: OptimizerEngine,
/// Sliding window of power readings (Watts).
// --- History Buffers (120 points for 60s @ 500ms) ---
history_watts: VecDeque<f32>,
/// Sliding window of temperature readings (Celsius).
history_temp: VecDeque<f32>,
/// Sliding window of CPU frequency (MHz).
history_mhz: VecDeque<f32>,
/// Detected CPU model string.
// --- Static Info ---
cpu_model: String,
/// Total system RAM in Gigabytes.
total_ram_gb: u64,
/// Atomic flag indicating a safety-triggered abort.
emergency_abort: Arc<AtomicBool>,
/// Human-readable reason for the emergency abort.
emergency_reason: Arc<Mutex<Option<String>>>,
}
impl BenchmarkOrchestrator {
/// Creates a new orchestrator instance with injected dependencies.
pub fn new(
sal: Arc<dyn PlatformSal>,
facts: SystemFactSheet,
auditor: Box<dyn PreflightAuditor>,
guard: Box<dyn EnvironmentGuard>,
sensors: Box<dyn SensorBus>,
actuators: Box<dyn ActuatorBus>,
watchdog: Box<dyn HardwareWatchdog>,
workload: Box<dyn Workload>,
telemetry_tx: mpsc::Sender<TelemetryState>,
command_rx: mpsc::Receiver<UiCommand>,
@@ -77,8 +53,11 @@ impl BenchmarkOrchestrator {
let total_ram_gb = sys.total_memory() / 1024 / 1024 / 1024;
Self {
sal,
facts,
auditor,
guard,
sensors,
actuators,
watchdog,
workload,
telemetry_tx,
command_rx,
@@ -90,89 +69,75 @@ impl BenchmarkOrchestrator {
history_mhz: VecDeque::with_capacity(120),
cpu_model,
total_ram_gb,
emergency_abort: Arc::new(AtomicBool::new(false)),
emergency_reason: Arc::new(Mutex::new(None)),
}
}
/// Executes the full benchmark sequence.
///
/// This method guarantees that [crate::sal::traits::EnvironmentGuard::restore] and [Workload::stop]
/// are called regardless of whether the benchmark succeeds or fails.
pub fn run(&mut self) -> Result<OptimizationResult> {
self.log("Starting ember-tune Benchmark Sequence.")?;
let _watchdog_handle = self.spawn_watchdog_monitor();
let result = self.execute_benchmark();
self.log("Benchmark sequence finished. Restoring hardware defaults...")?;
let _ = self.workload.stop();
if let Err(e) = self.sal.restore() {
anyhow::bail!("CRITICAL: Failed to restore hardware state: {}", e);
}
self.log("✓ Hardware state restored.")?;
result
}
/// Internal execution logic for the benchmark phases.
fn execute_benchmark(&mut self) -> Result<OptimizationResult> {
let bench_cfg = self.facts.bench_config.clone().context("Benchmarking config missing in facts")?;
// Phase 1: Audit & Baseline
self.phase = BenchmarkPhase::Auditing;
for step in self.sal.audit() {
for step in self.auditor.audit() {
if let Err(e) = step.outcome {
return Err(anyhow::anyhow!("Audit failed ({}): {:?}", step.description, e));
}
}
self.log("Suppressing background services (tlp, thermald)...")?;
self.sal.suppress().context("Failed to suppress background services")?;
self.guard.suppress().context("Failed to suppress background services")?;
// Baseline (Idle Calibration)
self.phase = BenchmarkPhase::IdleCalibration;
self.log(&format!("Phase 1: Recording Idle Baseline ({}s)...", bench_cfg.idle_duration_s))?;
self.sal.set_fan_mode("auto")?;
self.log("Phase 1: Recording Idle Baseline (10s)...")?;
self.actuators.set_fan_mode("auto")?; // Use auto for idle
let mut idle_temps = Vec::new();
let start = Instant::now();
let mut tick = 0;
while start.elapsed() < Duration::from_secs(bench_cfg.idle_duration_s) {
while start.elapsed() < Duration::from_secs(10) {
self.check_abort()?;
self.send_telemetry(tick)?;
idle_temps.push(self.sal.get_temp().unwrap_or(0.0));
idle_temps.push(self.sensors.get_temp().unwrap_or(0.0));
tick += 1;
thread::sleep(Duration::from_millis(500));
}
self.profile.ambient_temp = self.engine.smooth(&idle_temps).last().cloned().unwrap_or(0.0);
self.log(&format!("✓ Idle Baseline: {:.1}°C", self.profile.ambient_temp))?;
// Phase 2: Stress Stepping
self.phase = BenchmarkPhase::StressTesting;
self.log("Phase 2: Starting Synthetic Stress Matrix.")?;
self.sal.set_fan_mode("max")?;
self.actuators.set_fan_mode("max")?; // Lock fans for consistent resistance
let steps = bench_cfg.power_steps_watts.clone();
for &pl in &steps {
let power_steps = [15.0, 20.0, 25.0, 30.0, 35.0];
for &pl in &power_steps {
self.log(&format!("Testing PL1 = {:.0}W...", pl))?;
self.sal.set_sustained_power_limit(pl)?;
self.sal.set_burst_power_limit(pl + 5.0)?;
self.actuators.set_sustained_power_limit(pl)?;
self.actuators.set_burst_power_limit(pl + 5.0)?;
self.workload.start(num_cpus::get(), 100)?;
// Wait for equilibrium: Hybrid approach (15s min, 45s max)
let step_start = Instant::now();
let mut step_temps = VecDeque::with_capacity(30);
let mut step_temps = VecDeque::with_capacity(30); // Last 15s @ 500ms
while step_start.elapsed() < Duration::from_secs(bench_cfg.stress_duration_max_s) {
while step_start.elapsed() < Duration::from_secs(45) {
self.check_abort()?;
if self.watchdog.check_emergency()? {
self.log("⚠ EMERGENCY ABORT: Watchdog triggered!")?;
self.workload.stop()?;
return Err(anyhow::anyhow!("Hardware Watchdog Triggered"));
}
let t = self.sal.get_temp().unwrap_or(0.0);
let t = self.sensors.get_temp().unwrap_or(0.0);
step_temps.push_back(t);
if step_temps.len() > 10 { step_temps.pop_front(); }
self.send_telemetry(tick)?;
tick += 1;
if step_start.elapsed() > Duration::from_secs(bench_cfg.stress_duration_min_s) && step_temps.len() == 10 {
// Check for stability: Range < 0.5C over last 5s (10 ticks)
if step_start.elapsed() > Duration::from_secs(15) && step_temps.len() == 10 {
let min = step_temps.iter().fold(f32::MAX, |a, &b| a.min(b));
let max = step_temps.iter().fold(f32::MIN, |a, &b| a.max(b));
if (max - min) < 0.5 {
@@ -183,10 +148,11 @@ impl BenchmarkOrchestrator {
thread::sleep(Duration::from_millis(500));
}
let avg_p = self.sal.get_power_w().unwrap_or(0.0);
let avg_t = self.sal.get_temp().unwrap_or(0.0);
let avg_f = self.sal.get_freq_mhz().unwrap_or(0.0);
let fans = self.sal.get_fan_rpms().unwrap_or_default();
// Record data point
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0);
let fans = self.sensors.get_fan_rpms().unwrap_or_default();
let primary_fan = fans.first().cloned().unwrap_or(0);
let tp = self.workload.get_throughput().unwrap_or(0.0);
@@ -199,20 +165,22 @@ impl BenchmarkOrchestrator {
});
self.workload.stop()?;
self.log(&format!(" Step complete. Cooling down for {}s...", bench_cfg.cool_down_s))?;
thread::sleep(Duration::from_secs(bench_cfg.cool_down_s));
self.log(" Step complete. Cooling down for 5s...")?;
thread::sleep(Duration::from_secs(5));
}
// Phase 4: Physical Modeling
self.phase = BenchmarkPhase::PhysicalModeling;
self.log("Phase 3: Calculating Silicon Physical Sweet Spot...")?;
let mut res = self.generate_result(false);
let res = self.generate_result(false);
self.log(&format!("✓ Thermal Resistance (Rθ): {:.3} K/W", res.thermal_resistance_kw))?;
self.log(&format!("✓ Silicon Knee Found: {:.1} W", res.silicon_knee_watts))?;
thread::sleep(Duration::from_secs(3));
// Phase 5: Finalizing
self.phase = BenchmarkPhase::Finalizing;
self.log("Benchmark sequence complete. Generating configurations...")?;
@@ -222,79 +190,32 @@ impl BenchmarkOrchestrator {
trip_temp: res.max_temp_c.max(95.0),
};
if let Some(throttled_path) = self.facts.paths.configs.get("throttled") {
crate::engine::formatters::throttled::ThrottledTranslator::save(throttled_path, &config)?;
self.log(&format!("✓ Saved '{}' (merged).", throttled_path.display()))?;
res.config_paths.insert("throttled".to_string(), throttled_path.clone());
}
// 1. Throttled (Merged if exists)
let throttled_path = "throttled.conf";
let existing_throttled = std::fs::read_to_string(throttled_path).unwrap_or_default();
let throttled_content = if existing_throttled.is_empty() {
crate::engine::formatters::throttled::ThrottledTranslator::generate_conf(&config)
} else {
crate::engine::formatters::throttled::ThrottledTranslator::merge_conf(&existing_throttled, &config)
};
std::fs::write(throttled_path, throttled_content)?;
self.log("✓ Saved 'throttled.conf' (merged).")?;
if let Some(i8k_path) = self.facts.paths.configs.get("i8kmon") {
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
t_ambient: self.profile.ambient_temp,
t_max_fan: res.max_temp_c - 5.0,
thermal_resistance_kw: res.thermal_resistance_kw,
};
crate::engine::formatters::i8kmon::I8kmonTranslator::save(i8k_path, &i8k_config)?;
self.log(&format!("✓ Saved '{}'.", i8k_path.display()))?;
res.config_paths.insert("i8kmon".to_string(), i8k_path.clone());
}
// 2. i8kmon
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
t_ambient: self.profile.ambient_temp,
t_max_fan: res.max_temp_c - 5.0, // Aim to hit max fan before max temp
};
let i8k_content = crate::engine::formatters::i8kmon::I8kmonTranslator::generate_conf(&i8k_config);
std::fs::write("i8kmon.conf", i8k_content)?;
self.log("✓ Saved 'i8kmon.conf'.")?;
self.guard.restore()?;
self.log("✓ Environment restored.")?;
Ok(res)
}
/// Spawns a concurrent monitor that polls safety sensors every 100ms.
fn spawn_watchdog_monitor(&self) -> thread::JoinHandle<()> {
let abort = self.emergency_abort.clone();
let reason_store = self.emergency_reason.clone();
let sal = self.sal.clone();
let tx = self.telemetry_tx.clone();
thread::spawn(move || {
while !abort.load(Ordering::SeqCst) {
let status = sal.get_safety_status();
match status {
Ok(SafetyStatus::EmergencyAbort(reason)) => {
*reason_store.lock().unwrap() = Some(reason.clone());
abort.store(true, Ordering::SeqCst);
break;
}
Ok(SafetyStatus::Warning(msg)) | Ok(SafetyStatus::Critical(msg)) => {
let state = TelemetryState {
cpu_model: String::new(),
total_ram_gb: 0,
tick: 0,
cpu_temp: 0.0,
power_w: 0.0,
current_freq: 0.0,
fans: Vec::new(),
governor: String::new(),
pl1_limit: 0.0,
pl2_limit: 0.0,
fan_tier: String::new(),
phase: BenchmarkPhase::StressTesting,
history_watts: Vec::new(),
history_temp: Vec::new(),
history_mhz: Vec::new(),
log_event: Some(format!("WATCHDOG: {}", msg)),
metadata: std::collections::HashMap::new(),
is_emergency: false,
emergency_reason: None,
};
let _ = tx.send(state);
}
Ok(SafetyStatus::Nominal) => {}
Err(e) => {
*reason_store.lock().unwrap() = Some(format!("Watchdog Sensor Failure: {}", e));
abort.store(true, Ordering::SeqCst);
break;
}
}
thread::sleep(Duration::from_millis(100));
}
})
}
/// Generates the final [OptimizationResult] based on current measurements.
pub fn generate_result(&self, is_partial: bool) -> OptimizationResult {
let r_theta = self.engine.calculate_thermal_resistance(&self.profile);
let knee = self.engine.find_silicon_knee(&self.profile);
@@ -308,17 +229,10 @@ impl BenchmarkOrchestrator {
recommended_pl2: knee * 1.25,
max_temp_c: max_t,
is_partial,
config_paths: std::collections::HashMap::new(),
}
}
/// Checks if the benchmark has been aborted by the user or the watchdog.
fn check_abort(&self) -> Result<()> {
if self.emergency_abort.load(Ordering::SeqCst) {
let reason = self.emergency_reason.lock().unwrap().clone().unwrap_or_else(|| "Unknown safety trigger".to_string());
return Err(anyhow::anyhow!("EMERGENCY_ABORT: {}", reason));
}
if let Ok(cmd) = self.command_rx.try_recv() {
match cmd {
UiCommand::Abort => {
@@ -329,16 +243,15 @@ impl BenchmarkOrchestrator {
Ok(())
}
/// Helper to send log messages to the frontend.
fn log(&self, msg: &str) -> Result<()> {
let state = TelemetryState {
cpu_model: self.cpu_model.clone(),
total_ram_gb: self.total_ram_gb,
tick: 0,
cpu_temp: self.sal.get_temp().unwrap_or(0.0),
power_w: self.sal.get_power_w().unwrap_or(0.0),
current_freq: self.sal.get_freq_mhz().unwrap_or(0.0),
fans: self.sal.get_fan_rpms().unwrap_or_default(),
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
power_w: self.sensors.get_power_w().unwrap_or(0.0),
current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0),
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
governor: "unknown".to_string(),
pl1_limit: 0.0,
pl2_limit: 0.0,
@@ -349,17 +262,14 @@ impl BenchmarkOrchestrator {
history_mhz: Vec::new(),
log_event: Some(msg.to_string()),
metadata: std::collections::HashMap::new(),
is_emergency: self.emergency_abort.load(Ordering::SeqCst),
emergency_reason: self.emergency_reason.lock().unwrap().clone(),
};
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
}
/// Collects current sensors and sends a complete [TelemetryState] to the frontend.
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
let temp = self.sal.get_temp().unwrap_or(0.0);
let pwr = self.sal.get_power_w().unwrap_or(0.0);
let freq = self.sal.get_freq_mhz().unwrap_or(0.0);
let temp = self.sensors.get_temp().unwrap_or(0.0);
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
let freq = self.sensors.get_freq_mhz().unwrap_or(0.0);
self.history_temp.push_back(temp);
self.history_watts.push_back(pwr);
@@ -378,7 +288,7 @@ impl BenchmarkOrchestrator {
cpu_temp: temp,
power_w: pwr,
current_freq: freq,
fans: self.sal.get_fan_rpms().unwrap_or_default(),
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
governor: "performance".to_string(),
pl1_limit: 15.0,
pl2_limit: 25.0,
@@ -389,8 +299,6 @@ impl BenchmarkOrchestrator {
history_mhz: self.history_mhz.iter().cloned().collect(),
log_event: None,
metadata: std::collections::HashMap::new(),
is_emergency: self.emergency_abort.load(Ordering::SeqCst),
emergency_reason: self.emergency_reason.lock().unwrap().clone(),
};
self.telemetry_tx.send(state).map_err(|_| anyhow::anyhow!("Telemetry channel closed"))
}

View File

@@ -1,15 +1,13 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep, SafetyStatus, EnvironmentCtx};
use anyhow::{Result, Context, anyhow};
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditError, AuditStep};
use anyhow::{Result, Context};
use std::fs;
use std::path::{PathBuf};
use std::path::PathBuf;
use std::process::Command;
use std::time::{Duration, Instant};
use std::sync::Mutex;
use tracing::{debug};
use crate::sal::heuristic::discovery::SystemFactSheet;
use tracing::debug;
pub struct DellXps9380Sal {
ctx: EnvironmentCtx,
fact_sheet: SystemFactSheet,
temp_path: PathBuf,
pwr_path: PathBuf,
fan_paths: Vec<PathBuf>,
@@ -19,159 +17,203 @@ pub struct DellXps9380Sal {
last_poll: Mutex<Instant>,
last_temp: Mutex<f32>,
last_fans: Mutex<Vec<u32>>,
suppressed_services: Mutex<Vec<String>>,
msr_file: Mutex<fs::File>,
last_energy: Mutex<(u64, Instant)>,
}
impl DellXps9380Sal {
pub fn init(ctx: EnvironmentCtx, facts: SystemFactSheet) -> Result<Self> {
let temp_path = facts.temp_path.clone().context("Dell SAL requires temperature sensor")?;
let pwr_base = facts.rapl_paths.first().cloned().context("Dell SAL requires RAPL interface")?;
let fan_paths = facts.fan_paths.clone();
pub fn init() -> Result<Self> {
let mut temp_path = None;
let mut pwr_path = None;
let mut fan_paths = Vec::new();
let mut rapl_base_path = None;
let freq_path = ctx.sysfs_base.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
let msr_path = ctx.sysfs_base.join("dev/cpu/0/msr");
// Dynamic hwmon discovery
if let Ok(entries) = fs::read_dir("/sys/class/hwmon") {
for entry in entries.flatten() {
let p = entry.path();
let name = fs::read_to_string(p.join("name")).unwrap_or_default().trim().to_string();
let msr_file = fs::OpenOptions::new().read(true).write(true).open(&msr_path)
.with_context(|| format!("Failed to open {:?}. Is the 'msr' module loaded?", msr_path))?;
if name == "dell_smm" {
temp_path = Some(p.join("temp1_input"));
// Discover all fans
if let Ok(fan_entries) = fs::read_dir(&p) {
for fan_entry in fan_entries.flatten() {
let fan_p = fan_entry.path();
if fan_p.file_name().unwrap_or_default().to_string_lossy().starts_with("fan") &&
fan_p.file_name().unwrap_or_default().to_string_lossy().ends_with("_input") {
fan_paths.push(fan_p);
}
}
}
fan_paths.sort();
}
let initial_energy = fs::read_to_string(pwr_base.join("energy_uj")).unwrap_or_default().trim().parse().unwrap_or(0);
if name == "intel_rapl" || name == "rapl" {
pwr_path = Some(p.join("power1_average"));
}
}
}
// Discovery for RAPL via powercap
if let Ok(entries) = fs::read_dir("/sys/class/powercap") {
for entry in entries.flatten() {
let p = entry.path();
if let Ok(name) = fs::read_to_string(p.join("name")) {
if name.trim() == "package-0" {
rapl_base_path = Some(p.clone());
if pwr_path.is_none() {
pwr_path = Some(p.join("energy_uj"));
}
break;
}
}
}
}
let rapl_base = rapl_base_path.context("Could not find RAPL package-0 path in powercap")?;
let freq_path = PathBuf::from("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
Ok(Self {
temp_path,
pwr_path: pwr_base.join("power1_average"),
temp_path: temp_path.context("Could not find dell_smm temperature path")?,
pwr_path: pwr_path.context("Could not find RAPL power path")?,
fan_paths,
freq_path,
pl1_path: pwr_base.join("constraint_0_power_limit_uw"),
pl2_path: pwr_base.join("constraint_1_power_limit_uw"),
pl1_path: rapl_base.join("constraint_0_power_limit_uw"),
pl2_path: rapl_base.join("constraint_1_power_limit_uw"),
last_poll: Mutex::new(Instant::now() - Duration::from_secs(2)),
last_temp: Mutex::new(0.0),
last_fans: Mutex::new(Vec::new()),
suppressed_services: Mutex::new(Vec::new()),
msr_file: Mutex::new(msr_file),
last_energy: Mutex::new((initial_energy, Instant::now())),
fact_sheet: facts,
ctx,
})
}
fn read_msr(&self, msr: u32) -> Result<u64> {
use std::os::unix::fs::FileExt;
let mut buf = [0u8; 8];
let file = self.msr_file.lock().unwrap();
file.read_at(&mut buf, msr as u64)?;
Ok(u64::from_le_bytes(buf))
}
fn write_msr(&self, msr: u32, val: u64) -> Result<()> {
use std::os::unix::fs::FileExt;
let file = self.msr_file.lock().unwrap();
file.write_at(&val.to_le_bytes(), msr as u64)?;
Ok(())
}
}
impl PreflightAuditor for DellXps9380Sal {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let mut steps = Vec::new();
// 1. Root check
steps.push(AuditStep {
description: "Root Privileges".to_string(),
outcome: if unsafe { libc::getuid() } == 0 { Ok(()) } else { Err(AuditError::RootRequired) }
});
// 2. Kernel modules check (simplified check via sysfs/proc)
let modules = ["dell_smm_hwmon", "msr", "intel_rapl_msr"];
for mod_name in modules {
let path = self.ctx.sysfs_base.join(format!("sys/module/{}", mod_name));
let path = format!("/sys/module/{}", mod_name);
steps.push(AuditStep {
description: format!("Kernel Module: {}", mod_name),
outcome: if path.exists() { Ok(()) } else {
Err(AuditError::ToolMissing(format!("Module '{}' not loaded.", mod_name)))
outcome: if PathBuf::from(path).exists() { Ok(()) } else {
Err(AuditError::ToolMissing(format!("Module '{}' not loaded. Run 'sudo modprobe {}'", mod_name, mod_name)))
}
});
}
let cmdline_path = self.ctx.sysfs_base.join("proc/cmdline");
let cmdline = fs::read_to_string(cmdline_path).unwrap_or_default();
let params = [
("dell_smm_hwmon.ignore_dmi=1", "dell_smm_hwmon.ignore_dmi=1"),
("dell_smm_hwmon.restricted=0", "dell_smm_hwmon.restricted=0"),
("msr.allow_writes=on", "msr.allow_writes=on"),
];
for (label, p) in params {
steps.push(AuditStep {
description: format!("Kernel Param: {}", label),
outcome: if cmdline.contains(p) { Ok(()) } else { Err(AuditError::MissingKernelParam(p.to_string())) }
});
}
let ac_status_path = self.ctx.sysfs_base.join("sys/class/power_supply/AC/online");
let ac_status = fs::read_to_string(ac_status_path).unwrap_or_else(|_| "0".to_string());
// 3. Kernel parameters check
let cmdline = fs::read_to_string("/proc/cmdline").unwrap_or_default();
steps.push(AuditStep {
description: "AC Power Connection".to_string(),
outcome: if ac_status.trim() == "1" { Ok(()) } else {
Err(AuditError::AcPowerMissing("System must be on AC power".to_string()))
description: "Kernel Param: dell_smm_hwmon.ignore_dmi=1".to_string(),
outcome: if cmdline.contains("dell_smm_hwmon.ignore_dmi=1") { Ok(()) } else {
Err(AuditError::MissingKernelParam("dell_smm_hwmon.ignore_dmi=1".to_string()))
}
});
steps.push(AuditStep {
description: "Kernel Param: dell_smm_hwmon.restricted=0".to_string(),
outcome: if cmdline.contains("dell_smm_hwmon.restricted=0") { Ok(()) } else {
Err(AuditError::MissingKernelParam("dell_smm_hwmon.restricted=0".to_string()))
}
});
steps.push(AuditStep {
description: "Kernel Param: msr.allow_writes=on".to_string(),
outcome: if cmdline.contains("msr.allow_writes=on") { Ok(()) } else {
Err(AuditError::MissingKernelParam("msr.allow_writes=on".to_string()))
}
});
let tool_check = self.fact_sheet.paths.tools.contains_key("dell_fan_ctrl");
// 4. Lockdown check
let lockdown = fs::read_to_string("/sys/kernel/security/lockdown").unwrap_or_default();
steps.push(AuditStep {
description: "Dell Fan Control Tool".to_string(),
outcome: if tool_check { Ok(()) } else { Err(AuditError::ToolMissing("dell-bios-fan-control not found in PATH".to_string())) }
description: "Kernel Lockdown Status".to_string(),
outcome: if lockdown.contains("[none]") || lockdown.is_empty() { Ok(()) } else {
Err(AuditError::KernelIncompatible("Kernel is in lockdown mode. Set to 'none' to allow MSR/SMM writes.".to_string()))
}
});
// 5. Check AC power
let ac_status = fs::read_to_string("/sys/class/power_supply/AC/online").unwrap_or_else(|_| "0".to_string());
steps.push(AuditStep {
description: "AC Power Connection".to_string(),
outcome: if ac_status.trim() == "1" { Ok(()) } else {
Err(AuditError::AcPowerMissing("System must be on AC power for benchmarking".to_string()))
}
});
Box::new(steps.into_iter())
}
}
impl EnvironmentGuard for DellXps9380Sal {
fn suppress(&self) -> Result<()> {
pub struct DellXps9380Guard {
stopped_services: Vec<String>,
}
impl DellXps9380Guard {
pub fn new() -> Self {
Self { stopped_services: Vec::new() }
}
}
impl EnvironmentGuard for DellXps9380Guard {
fn suppress(&mut self) -> Result<()> {
let services = ["tlp", "thermald", "i8kmon"];
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in services {
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", s]).is_ok() {
if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() {
debug!("Suppressing service: {}", s);
self.ctx.runner.run("systemctl", &["stop", s])?;
suppressed.push(s.to_string());
Command::new("systemctl").args(["stop", s]).status()?;
self.stopped_services.push(s.to_string());
}
}
Ok(())
}
fn restore(&self) -> Result<()> {
let mut suppressed = self.suppressed_services.lock().unwrap();
for s in suppressed.drain(..) {
let _ = self.ctx.runner.run("systemctl", &["start", &s]);
fn restore(&mut self) -> Result<()> {
for s in &self.stopped_services {
let _ = Command::new("systemctl").args(["start", s]).status();
}
self.stopped_services.clear();
Ok(())
}
}
impl Drop for DellXps9380Guard {
fn drop(&mut self) {
let _ = self.restore();
}
}
impl SensorBus for DellXps9380Sal {
fn get_temp(&self) -> Result<f32> {
// Enforce 1000ms rate limit for Dell SMM as per GEMINI.md
let mut last_poll = self.last_poll.lock().unwrap();
let now = Instant::now();
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
return Ok(*self.last_temp.lock().unwrap());
}
let s = fs::read_to_string(&self.temp_path)?;
let val = s.trim().parse::<f32>()? / 1000.0;
*self.last_temp.lock().unwrap() = val;
*last_poll = now;
Ok(val)
}
fn get_power_w(&self) -> Result<f32> {
if self.pwr_path.to_string_lossy().contains("energy_uj") {
let mut last = self.last_energy.lock().unwrap();
let e1 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
std::thread::sleep(Duration::from_millis(100));
let e2 = fs::read_to_string(&self.pwr_path)?.trim().parse::<u64>()?;
let t2 = Instant::now();
let (e1, t1) = *last;
let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32();
*last = (e2, t2);
if delta_t < 0.01 { return Ok(0.0); }
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
Ok((e2.saturating_sub(e1)) as f32 / 100000.0)
} else {
let s = fs::read_to_string(&self.pwr_path)?;
Ok(s.trim().parse::<f32>()? / 1000000.0)
@@ -181,69 +223,66 @@ impl SensorBus for DellXps9380Sal {
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
let mut last_poll = self.last_poll.lock().unwrap();
let now = Instant::now();
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
return Ok(self.last_fans.lock().unwrap().clone());
}
let mut fans = Vec::new();
for path in &self.fan_paths {
if let Ok(s) = fs::read_to_string(path) {
if let Ok(rpm) = s.trim().parse::<u32>() { fans.push(rpm); }
if let Ok(rpm) = s.trim().parse::<u32>() {
fans.push(rpm);
}
}
}
*self.last_fans.lock().unwrap() = fans.clone();
*last_poll = now;
Ok(fans)
}
fn get_freq_mhz(&self) -> Result<f32> {
let s = fs::read_to_string(&self.freq_path)?;
Ok(s.trim().parse::<f32>()? / 1000.0)
let val = s.trim().parse::<f32>()? / 1000.0;
Ok(val)
}
}
impl ActuatorBus for DellXps9380Sal {
fn set_fan_mode(&self, mode: &str) -> Result<()> {
let tool_path = self.fact_sheet.paths.tools.get("dell_fan_ctrl")
.ok_or_else(|| anyhow!("Dell fan control tool not found in PATH"))?;
let tool_str = tool_path.to_string_lossy();
match mode {
"max" | "Manual" => { self.ctx.runner.run(&tool_str, &["0"])?; }
"auto" | "Auto" => { self.ctx.runner.run(&tool_str, &["1"])?; }
_ => { debug!("Unknown fan mode: {}", mode); }
"max" | "Manual" => {
Command::new("dell-bios-fan-control").arg("0").status()?;
}
"auto" | "Auto" => {
Command::new("dell-bios-fan-control").arg("1").status()?;
}
_ => {
debug!("Unknown fan mode requested: {}", mode);
}
}
Ok(())
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
fs::write(&self.pl1_path, ((watts * 1_000_000.0) as u64).to_string())?;
let uw = (watts * 1_000_000.0) as u64;
fs::write(&self.pl1_path, uw.to_string())?;
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
fs::write(&self.pl2_path, ((watts * 1_000_000.0) as u64).to_string())?;
let uw = (watts * 1_000_000.0) as u64;
fs::write(&self.pl2_path, uw.to_string())?;
Ok(())
}
}
impl HardwareWatchdog for DellXps9380Sal {
fn get_safety_status(&self) -> Result<SafetyStatus> {
let temp = self.get_temp()?;
if temp > 98.0 {
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal Runaway: {:.1}°C", temp)));
}
if let Ok(msr_val) = self.read_msr(0x1FC) {
if (msr_val & 0x1) != 0 && temp < 85.0 {
let _ = self.write_msr(0x1FC, msr_val & !0x1);
return Ok(SafetyStatus::Warning("BD PROCHOT Latch Cleared".to_string()));
}
}
Ok(SafetyStatus::Nominal)
}
}
impl Drop for DellXps9380Sal {
fn drop(&mut self) {
let _ = self.restore();
fn check_emergency(&self) -> Result<bool> {
// Check for thermal throttling or BD PROCHOT
// Simplified for now
Ok(false)
}
}

View File

@@ -1,203 +0,0 @@
use anyhow::{Result, anyhow};
use std::path::{Path};
use std::fs;
use std::time::{Duration, Instant};
use std::sync::Mutex;
use crate::sal::traits::{SensorBus, ActuatorBus, EnvironmentGuard, HardwareWatchdog, PreflightAuditor, AuditStep, AuditError, SafetyStatus, EnvironmentCtx};
use crate::sal::heuristic::discovery::SystemFactSheet;
use crate::sal::heuristic::schema::HardwareDb;
pub struct GenericLinuxSal {
ctx: EnvironmentCtx,
fact_sheet: SystemFactSheet,
db: HardwareDb,
suppressed_services: Mutex<Vec<String>>,
last_valid_temp: Mutex<(f32, Instant)>,
current_pl1: Mutex<f32>,
last_energy: Mutex<(u64, Instant)>,
}
impl GenericLinuxSal {
pub fn new(ctx: EnvironmentCtx, facts: SystemFactSheet, db: HardwareDb) -> Self {
let initial_energy = if let Some(pwr_base) = facts.rapl_paths.first() {
fs::read_to_string(pwr_base.join("energy_uj")).unwrap_or_default().trim().parse().unwrap_or(0)
} else {
0
};
Self {
db,
suppressed_services: Mutex::new(Vec::new()),
last_valid_temp: Mutex::new((0.0, Instant::now())),
current_pl1: Mutex::new(15.0),
last_energy: Mutex::new((initial_energy, Instant::now())),
fact_sheet: facts,
ctx,
}
}
fn is_dell(&self) -> bool {
self.fact_sheet.vendor.to_lowercase().contains("dell")
}
fn read_sysfs(&self, path: &Path) -> Result<String> {
fs::read_to_string(path).map(|s| s.trim().to_string()).map_err(|e| anyhow!(e))
}
}
impl PreflightAuditor for GenericLinuxSal {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let mut steps = Vec::new();
for check in &self.db.preflight_checks {
let status = self.ctx.runner.run("sh", &["-c", &check.check_cmd]);
steps.push(AuditStep {
description: check.name.clone(),
outcome: match status {
Ok(_) => Ok(()),
_ => Err(AuditError::KernelIncompatible(check.fail_help.clone())),
}
});
}
for conflict_id in &self.fact_sheet.active_conflicts {
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
if conflict.severity == "Critical" {
steps.push(AuditStep {
description: format!("Conflict: {}", conflict.id),
outcome: Err(AuditError::ToolMissing(conflict.help_text.clone())),
});
}
}
}
Box::new(steps.into_iter())
}
}
impl SensorBus for GenericLinuxSal {
fn get_temp(&self) -> Result<f32> {
let path = self.fact_sheet.temp_path.as_ref()
.ok_or_else(|| anyhow!("No temperature sensor path found"))?;
let content = self.read_sysfs(path)?;
let temp = content.parse::<f32>()? / 1000.0;
let mut last = self.last_valid_temp.lock().unwrap();
if (temp - last.0).abs() > 0.01 { *last = (temp, Instant::now()); }
Ok(temp)
}
fn get_power_w(&self) -> Result<f32> {
let rapl_path = self.fact_sheet.rapl_paths.first()
.ok_or_else(|| anyhow!("No RAPL path found"))?;
let energy_path = rapl_path.join("energy_uj");
let mut last = self.last_energy.lock().unwrap();
let e2: u64 = self.read_sysfs(&energy_path)?.parse()?;
let t2 = Instant::now();
let (e1, t1) = *last;
let delta_e = e2.wrapping_sub(e1);
let delta_t = t2.duration_since(t1).as_secs_f32();
*last = (e2, t2);
if delta_t < 0.01 { return Ok(0.0); }
Ok((delta_e as f32 / 1_000_000.0) / delta_t)
}
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
let mut rpms = Vec::new();
for path in &self.fact_sheet.fan_paths {
if let Ok(content) = self.read_sysfs(path) {
if let Ok(rpm) = content.parse() { rpms.push(rpm); }
}
}
Ok(rpms)
}
fn get_freq_mhz(&self) -> Result<f32> {
let path = self.ctx.sysfs_base.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
if path.exists() {
Ok(self.read_sysfs(&path)?.parse::<f32>()? / 1000.0)
} else {
let cpuinfo_path = self.ctx.sysfs_base.join("proc/cpuinfo");
let cpuinfo = fs::read_to_string(cpuinfo_path)?;
for line in cpuinfo.lines() {
if line.starts_with("cpu MHz") {
if let Some((_, mhz)) = line.split_once(':') {
return Ok(mhz.trim().parse()?);
}
}
}
Err(anyhow!("Could not determine CPU frequency"))
}
}
}
impl ActuatorBus for GenericLinuxSal {
fn set_fan_mode(&self, mode: &str) -> Result<()> {
if self.is_dell() {
let cmd = match mode {
"manual" | "max" => self.db.ecosystems.get("dell").and_then(|e| e.fan_manual_mode_cmd.as_ref()),
"auto" => self.db.ecosystems.get("dell").and_then(|e| e.fan_auto_mode_cmd.as_ref()),
_ => return Err(anyhow!("Unsupported fan mode: {}", mode)),
};
if let Some(cmd_str) = cmd {
let parts: Vec<&str> = cmd_str.split_whitespace().collect();
self.ctx.runner.run(parts[0], &parts[1..])?;
Ok(())
} else { Err(anyhow!("Dell fan command missing")) }
} else { Ok(()) }
}
fn set_sustained_power_limit(&self, watts: f32) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL1 path"))?;
fs::write(rapl_path.join("constraint_0_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
*self.current_pl1.lock().unwrap() = watts;
Ok(())
}
fn set_burst_power_limit(&self, watts: f32) -> Result<()> {
let rapl_path = self.fact_sheet.rapl_paths.first().ok_or_else(|| anyhow!("No PL2 path"))?;
fs::write(rapl_path.join("constraint_1_power_limit_uw"), ((watts * 1_000_000.0) as u64).to_string())?;
Ok(())
}
}
impl EnvironmentGuard for GenericLinuxSal {
fn suppress(&self) -> Result<()> {
let mut suppressed = self.suppressed_services.lock().unwrap();
for conflict_id in &self.fact_sheet.active_conflicts {
if let Some(conflict) = self.db.conflicts.iter().find(|c| &c.id == conflict_id) {
for service in &conflict.services {
if self.ctx.runner.run("systemctl", &["is-active", "--quiet", service]).is_ok() {
self.ctx.runner.run("systemctl", &["stop", service])?;
suppressed.push(service.clone());
}
}
}
}
Ok(())
}
fn restore(&self) -> Result<()> {
let mut suppressed = self.suppressed_services.lock().unwrap();
for service in suppressed.drain(..) {
let _ = self.ctx.runner.run("systemctl", &["start", &service]);
}
if self.is_dell() { let _ = self.set_fan_mode("auto"); }
Ok(())
}
}
impl HardwareWatchdog for GenericLinuxSal {
fn get_safety_status(&self) -> Result<SafetyStatus> {
let temp = self.get_temp()?;
if temp > 100.0 {
return Ok(SafetyStatus::EmergencyAbort(format!("Thermal runaway: {:.1}°C", temp)));
}
let last = self.last_valid_temp.lock().unwrap();
if last.1.elapsed() > Duration::from_secs(5) {
return Ok(SafetyStatus::EmergencyAbort("Temperature sensor stalled".to_string()));
}
Ok(SafetyStatus::Nominal)
}
}
impl Drop for GenericLinuxSal {
fn drop(&mut self) { let _ = self.restore(); }
}

View File

@@ -1,237 +0,0 @@
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{Duration};
use std::thread;
use std::sync::mpsc;
use std::collections::HashMap;
use crate::sal::heuristic::schema::{SensorDiscovery, ActuatorDiscovery, Conflict, Discovery, Benchmarking};
use tracing::{debug, warn};
/// Registry of dynamically discovered paths for configs and tools.
#[derive(Debug, Clone, Default)]
pub struct PathRegistry {
pub configs: HashMap<String, PathBuf>,
pub tools: HashMap<String, PathBuf>,
}
/// Strongly-typed findings about the current system.
#[derive(Debug, Clone, Default)]
pub struct SystemFactSheet {
pub vendor: String,
pub model: String,
pub temp_path: Option<PathBuf>,
pub fan_paths: Vec<PathBuf>,
pub rapl_paths: Vec<PathBuf>,
pub active_conflicts: Vec<String>,
pub paths: PathRegistry,
pub bench_config: Option<Benchmarking>,
}
/// Probes the system for hardware sensors, actuators, service conflicts, and paths.
pub fn discover_facts(
base_path: &Path,
discovery: &Discovery,
conflicts: &[Conflict],
bench_config: Benchmarking,
) -> SystemFactSheet {
let (vendor, model) = read_dmi_info(base_path);
debug!("DMI Identity: Vendor='{}', Model='{}'", vendor, model);
let (temp_path, fan_paths) = discover_hwmon(base_path, &discovery.sensors);
let rapl_paths = discover_rapl(base_path, &discovery.actuators);
let mut active_conflicts = Vec::new();
for conflict in conflicts {
for service in &conflict.services {
if is_service_active(service) {
debug!("Detected active conflict: {} (Service: {})", conflict.id, service);
active_conflicts.push(conflict.id.clone());
break;
}
}
}
let paths = discover_paths(base_path, discovery);
SystemFactSheet {
vendor,
model,
temp_path,
fan_paths,
rapl_paths,
active_conflicts,
paths,
bench_config: Some(bench_config),
}
}
fn discover_paths(base_path: &Path, discovery: &Discovery) -> PathRegistry {
let mut registry = PathRegistry::default();
// 1. Discover Tools via PATH
for (id, binary_name) in &discovery.tools {
if let Ok(path) = which::which(binary_name) {
debug!("Discovered tool: {} -> {:?}", id, path);
registry.tools.insert(id.clone(), path);
}
}
// 2. Discover Configs via existence check
for (id, candidates) in &discovery.configs {
for candidate in candidates {
let path = if candidate.starts_with('/') {
base_path.join(&candidate[1..])
} else {
base_path.join(candidate)
};
if path.exists() {
debug!("Discovered config: {} -> {:?}", id, path);
registry.configs.insert(id.clone(), path);
break;
}
}
// If not found, use the first one as default if any exist
if !registry.configs.contains_key(id) {
if let Some(first) = candidates.first() {
registry.configs.insert(id.clone(), PathBuf::from(first));
}
}
}
registry
}
/// Reads DMI information from sysfs with a safety timeout.
fn read_dmi_info(base_path: &Path) -> (String, String) {
let vendor = read_sysfs_with_timeout(&base_path.join("sys/class/dmi/id/sys_vendor"), Duration::from_millis(100))
.unwrap_or_else(|| "Unknown".to_string());
let model = read_sysfs_with_timeout(&base_path.join("sys/class/dmi/id/product_name"), Duration::from_millis(100))
.unwrap_or_else(|| "Unknown".to_string());
(vendor, model)
}
/// Discovers hwmon sensors by matching labels and prioritizing drivers.
fn discover_hwmon(base_path: &Path, cfg: &SensorDiscovery) -> (Option<PathBuf>, Vec<PathBuf>) {
let mut temp_candidates = Vec::new();
let mut fan_candidates = Vec::new();
let hwmon_base = base_path.join("sys/class/hwmon");
let entries = match fs::read_dir(&hwmon_base) {
Ok(e) => e,
Err(e) => {
warn!("Could not read {:?}: {}", hwmon_base, e);
return (None, Vec::new());
}
};
for entry in entries.flatten() {
let hwmon_path = entry.path();
let driver_name = read_sysfs_with_timeout(&hwmon_path.join("name"), Duration::from_millis(100))
.unwrap_or_default();
let priority = cfg.hwmon_priority
.iter()
.position(|p| p == &driver_name)
.unwrap_or(usize::MAX);
if let Ok(hw_entries) = fs::read_dir(&hwmon_path) {
for hw_entry in hw_entries.flatten() {
let file_name = hw_entry.file_name().into_string().unwrap_or_default();
// Temperature Sensors
if file_name.starts_with("temp") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.temp_labels.iter().any(|l| label.contains(l)) {
let input_path = hwmon_path.join(file_name.replace("_label", "_input"));
if input_path.exists() {
temp_candidates.push((priority, input_path));
}
}
}
}
// Fan Sensors
if file_name.starts_with("fan") && file_name.ends_with("_label") {
if let Some(label) = read_sysfs_with_timeout(&hw_entry.path(), Duration::from_millis(100)) {
if cfg.fan_labels.iter().any(|l| label.contains(l)) {
let input_path = hwmon_path.join(file_name.replace("_label", "_input"));
if input_path.exists() {
fan_candidates.push((priority, input_path));
}
}
}
}
}
}
}
temp_candidates.sort_by_key(|(p, _)| *p);
fan_candidates.sort_by_key(|(p, _)| *p);
let best_temp = temp_candidates.first().map(|(_, p)| p.clone());
let best_fans = fan_candidates.into_iter().map(|(_, p)| p).collect();
(best_temp, best_fans)
}
/// Discovers RAPL powercap paths.
fn discover_rapl(base_path: &Path, cfg: &ActuatorDiscovery) -> Vec<PathBuf> {
let mut paths = Vec::new();
let powercap_base = base_path.join("sys/class/powercap");
let entries = match fs::read_dir(&powercap_base) {
Ok(e) => e,
Err(_) => return Vec::new(),
};
for entry in entries.flatten() {
let path = entry.path();
let dir_name = entry.file_name().into_string().unwrap_or_default();
if cfg.rapl_paths.contains(&dir_name) {
paths.push(path);
continue;
}
if let Some(name) = read_sysfs_with_timeout(&path.join("name"), Duration::from_millis(100)) {
if cfg.rapl_paths.iter().any(|p| p == &name) {
paths.push(path);
}
}
}
paths
}
/// Checks if a systemd service is currently active.
pub fn is_service_active(service: &str) -> bool {
let status = Command::new("systemctl")
.arg("is-active")
.arg("--quiet")
.arg(service)
.status();
match status {
Ok(s) => s.success(),
Err(_) => false,
}
}
/// Helper to read a sysfs file with a timeout.
fn read_sysfs_with_timeout(path: &Path, timeout: Duration) -> Option<String> {
let (tx, rx) = mpsc::channel();
let path_buf = path.to_path_buf();
thread::spawn(move || {
let res = fs::read_to_string(path_buf).map(|s| s.trim().to_string());
let _ = tx.send(res);
});
match rx.recv_timeout(timeout) {
Ok(Ok(content)) => Some(content),
_ => None,
}
}

View File

@@ -1,60 +0,0 @@
use miette::{Result, IntoDiagnostic, Context};
use std::fs;
use regex::Regex;
use tracing::{info, debug};
use crate::sal::traits::{PlatformSal, EnvironmentCtx};
use crate::sal::dell_xps_9380::DellXps9380Sal;
use crate::sal::generic_linux::GenericLinuxSal;
use crate::sal::heuristic::schema::HardwareDb;
use crate::sal::heuristic::discovery::{discover_facts, SystemFactSheet};
pub struct HeuristicEngine;
impl HeuristicEngine {
/// Loads the hardware database, probes the system, and builds the appropriate SAL.
pub fn detect_and_build(ctx: EnvironmentCtx) -> Result<(Box<dyn PlatformSal>, SystemFactSheet)> {
// 1. Load Hardware DB
let db_path = "assets/hardware_db.toml";
let db_content = fs::read_to_string(db_path)
.into_diagnostic()
.with_context(|| format!("Failed to read hardware database at {}", db_path))?;
let db: HardwareDb = toml::from_str(&db_content)
.into_diagnostic()
.context("Failed to parse hardware_db.toml")?;
// 2. Discover Facts
let facts = discover_facts(&ctx.sysfs_base, &db.discovery, &db.conflicts, db.benchmarking.clone());
info!("System Identity: {} {}", facts.vendor, facts.model);
// 3. Routing Logic
// --- Special Case: Dell XPS 13 9380 ---
if is_match(&facts.vendor, "(?i)Dell.*") && is_match(&facts.model, "(?i)XPS.*13.*9380.*") {
info!("Specialized SAL Match Found: Dell XPS 13 9380");
let sal = DellXps9380Sal::init(ctx, facts.clone()).map_err(|e| miette::miette!(e))?;
return Ok((Box::new(sal), facts));
}
// --- Fallback: Generic Linux SAL ---
debug!("No specialized SAL match. Falling back to GenericLinuxSal with DB quirks.");
// Validation: Ensure we found at least a temperature sensor if required
if facts.temp_path.is_none() {
return Err(miette::miette!("No temperature sensor discovered. Generic fallback impossible."));
}
if facts.rapl_paths.is_empty() {
return Err(miette::miette!("No RAPL power interface discovered. Generic fallback impossible."));
}
Ok((Box::new(GenericLinuxSal::new(ctx, facts.clone(), db)), facts))
}
}
fn is_match(input: &str, pattern: &str) -> bool {
if let Ok(re) = Regex::new(pattern) {
re.is_match(input)
} else {
false
}
}

View File

@@ -1,3 +0,0 @@
pub mod schema;
pub mod discovery;
pub mod engine;

View File

@@ -1,104 +0,0 @@
use serde::Deserialize;
use std::collections::HashMap;
#[derive(Debug, Deserialize, Clone)]
pub struct HardwareDb {
pub metadata: Metadata,
pub conflicts: Vec<Conflict>,
pub ecosystems: HashMap<String, Ecosystem>,
pub quirks: Vec<Quirk>,
pub discovery: Discovery,
pub benchmarking: Benchmarking,
pub preflight_checks: Vec<PreflightCheck>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Metadata {
pub version: String,
pub updated: String,
pub description: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Conflict {
pub id: String,
pub services: Vec<String>,
pub contention: String,
pub severity: String,
pub fix_action: String,
pub help_text: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Ecosystem {
pub vendor_regex: String,
pub product_regex: Option<String>,
pub polling_cap_ms: Option<u64>,
pub drivers: Option<Vec<String>>,
pub fan_manual_mode_cmd: Option<String>,
pub fan_auto_mode_cmd: Option<String>,
pub safety_register: Option<String>,
pub lap_mode_path: Option<String>,
pub profiles_path: Option<String>,
pub ec_write_required: Option<bool>,
pub thermal_policy_path: Option<String>,
pub policy_map: Option<HashMap<String, i32>>,
pub msr_lock_register: Option<String>,
pub msr_lock_bit: Option<u32>,
pub fan_boost_path: Option<String>,
pub ec_tool: Option<String>,
pub optimization: Option<String>,
pub help_text: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Quirk {
pub model_regex: String,
pub id: String,
pub issue: String,
pub action: String,
pub monitor_msr: Option<String>,
pub reset_bit: Option<u32>,
pub trigger_path: Option<String>,
pub trigger_value: Option<String>,
pub target_path: Option<String>,
pub format: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Discovery {
pub sensors: SensorDiscovery,
pub actuators: ActuatorDiscovery,
pub configs: HashMap<String, Vec<String>>,
pub tools: HashMap<String, String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct Benchmarking {
pub idle_duration_s: u64,
pub stress_duration_min_s: u64,
pub stress_duration_max_s: u64,
pub cool_down_s: u64,
pub power_steps_watts: Vec<f32>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SensorDiscovery {
pub temp_labels: Vec<String>,
pub fan_labels: Vec<String>,
pub hwmon_priority: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ActuatorDiscovery {
pub rapl_paths: Vec<String>,
pub amd_energy_paths: Vec<String>,
pub governor_files: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct PreflightCheck {
pub name: String,
pub check_cmd: String,
pub fail_help: String,
}

View File

@@ -1,19 +1,8 @@
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep, SafetyStatus};
use super::traits::{PreflightAuditor, EnvironmentGuard, SensorBus, ActuatorBus, HardwareWatchdog, AuditStep};
use anyhow::Result;
pub struct MockSal {
pub temperature_sequence: std::sync::atomic::AtomicUsize,
}
impl MockSal {
pub fn new() -> Self {
Self {
temperature_sequence: std::sync::atomic::AtomicUsize::new(0),
}
}
}
impl PreflightAuditor for MockSal {
pub struct MockAuditor;
impl PreflightAuditor for MockAuditor {
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_> {
let steps = vec![
AuditStep {
@@ -29,20 +18,34 @@ impl PreflightAuditor for MockSal {
}
}
impl EnvironmentGuard for MockSal {
fn suppress(&self) -> Result<()> {
pub struct MockGuard {
pub suppressed: bool,
}
impl MockGuard {
pub fn new() -> Self {
Self { suppressed: false }
}
}
impl EnvironmentGuard for MockGuard {
fn suppress(&mut self) -> Result<()> {
self.suppressed = true;
Ok(())
}
fn restore(&self) -> Result<()> {
fn restore(&mut self) -> Result<()> {
self.suppressed = false;
Ok(())
}
}
impl Drop for MockGuard {
fn drop(&mut self) {
let _ = self.restore();
}
}
impl SensorBus for MockSal {
pub struct MockSensorBus;
impl SensorBus for MockSensorBus {
fn get_temp(&self) -> Result<f32> {
// Support dynamic sequence for Step 5
let seq = self.temperature_sequence.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
Ok(40.0 + (seq as f32 * 0.5).min(50.0)) // Heats up from 40 to 90
Ok(42.0)
}
fn get_power_w(&self) -> Result<f32> {
Ok(15.0)
@@ -55,7 +58,8 @@ impl SensorBus for MockSal {
}
}
impl ActuatorBus for MockSal {
pub struct MockActuatorBus;
impl ActuatorBus for MockActuatorBus {
fn set_fan_mode(&self, _mode: &str) -> Result<()> {
Ok(())
}
@@ -67,8 +71,9 @@ impl ActuatorBus for MockSal {
}
}
impl HardwareWatchdog for MockSal {
fn get_safety_status(&self) -> Result<SafetyStatus> {
Ok(SafetyStatus::Nominal)
pub struct MockWatchdog;
impl HardwareWatchdog for MockWatchdog {
fn check_emergency(&self) -> Result<bool> {
Ok(false)
}
}

View File

@@ -1,5 +1,3 @@
pub mod traits;
pub mod mock;
pub mod dell_xps_9380;
pub mod generic_linux;
pub mod heuristic;

View File

@@ -1,83 +1,43 @@
//! Core traits defining the System Abstraction Layer (SAL).
//!
//! This module provides a set of hardware-agnostic interfaces that the
//! `BenchmarkOrchestrator` uses to interact with the underlying system.
//! These traits allow `ember-tune` to support diverse hardware by abstracting
//! away platform-specific details.
use miette::Diagnostic;
use std::sync::Arc;
use std::path::PathBuf;
use crate::sys::SyscallRunner;
use anyhow::Result;
use thiserror::Error;
use miette::Diagnostic;
use std::sync::Arc;
/// Context holding OS abstractions (filesystem base and syscall runner).
///
/// This is injected into SAL implementations to allow for a mocked "virtual"
/// environment during testing, preventing `cargo test` from mutating the host system.
#[derive(Clone)]
pub struct EnvironmentCtx {
pub sysfs_base: PathBuf,
pub runner: Arc<dyn SyscallRunner>,
}
impl EnvironmentCtx {
/// Creates a production-ready context pointing to the real filesystem root.
pub fn production() -> Self {
Self {
sysfs_base: PathBuf::from("/"),
runner: Arc::new(crate::sys::RealSyscallRunner),
}
}
}
/// Errors that can occur during the pre-flight system audit.
#[derive(Error, Diagnostic, Debug, Clone)]
pub enum AuditError {
/// The user does not have root privileges (`uid=0`).
#[error("Missing root privileges.")]
#[diagnostic(code(ember_tune::root_required), severity(error))]
#[help("ember-tune requires direct hardware access (MSRs, sysfs). Please run with 'sudo'.")]
RootRequired,
/// A required kernel parameter is missing from the boot command line.
#[error("Missing kernel parameter: {0}")]
#[diagnostic(code(ember_tune::missing_kernel_param), severity(error))]
#[help("Add '{0}' to your GRUB_CMDLINE_LINUX_DEFAULT in /etc/default/grub, then run 'sudo update-grub' and reboot.")]
MissingKernelParam(String),
/// The system is running on battery power.
#[error("System is running on battery: {0}")]
#[diagnostic(code(ember_tune::ac_power_missing), severity(error))]
#[help("Thermal benchmarking requires a stable AC power source to ensure consistent PL limits. Please plug in your charger.")]
AcPowerMissing(String),
/// The Linux kernel version is known to be incompatible.
#[error("Incompatible kernel version: {0}")]
#[diagnostic(code(ember_tune::kernel_incompatible), severity(error))]
#[help("Your kernel version '{0}' may not support the required RAPL or SMM interfaces. Please upgrade to a recent LTS kernel (6.1+).")]
KernelIncompatible(String),
/// A required kernel module or CLI tool is not available.
#[error("Required tool missing: {0}")]
#[diagnostic(code(ember_tune::tool_missing), severity(error))]
#[help("The utility '{0}' is required for this SAL. Please install it using your package manager (e.g., 'sudo apt install {0}').")]
ToolMissing(String),
}
/// A single, verifiable step in the pre-flight audit process.
pub struct AuditStep {
/// Human-readable description of the check.
pub description: String,
/// The outcome of the check.
pub outcome: Result<(), AuditError>,
}
/// Evaluates immutable system states before the benchmark begins.
/// Evaluates immutable system states (e.g., kernel bootline parameters, AC power status).
pub trait PreflightAuditor: Send + Sync {
/// Returns an iterator of [AuditStep] results.
/// This allows the UI to show a live checklist of system verification steps.
fn audit(&self) -> Box<dyn Iterator<Item = AuditStep> + '_>;
}
@@ -87,58 +47,17 @@ impl<T: PreflightAuditor + ?Sized> PreflightAuditor for Arc<T> {
}
}
/// Manages system services that conflict with the benchmark.
///
/// # Invariants
/// The `Drop` trait is *not* used for guaranteed cleanup. The orchestrator must
/// explicitly call `restore()` to ensure hardware state is reset.
pub trait EnvironmentGuard: Send + Sync {
/// Stops any conflicting system daemons (e.g., `tlp`, `thermald`).
///
/// # Errors
/// Returns an error if the `systemctl` command fails.
fn suppress(&self) -> Result<()>;
/// Restarts any services that were stopped by `suppress`.
///
/// # Errors
/// Returns an error if the `systemctl` command fails.
fn restore(&self) -> Result<()>;
/// Suppresses conflicting daemons (tlp, thermald).
pub trait EnvironmentGuard {
fn suppress(&mut self) -> Result<()>;
fn restore(&mut self) -> Result<()>;
}
impl<T: EnvironmentGuard + ?Sized> EnvironmentGuard for Arc<T> {
fn suppress(&self) -> Result<()> {
(**self).suppress()
}
fn restore(&self) -> Result<()> {
(**self).restore()
}
}
/// Provides a read-only interface to system telemetry sensors.
/// Read-only interface for standardized metrics.
pub trait SensorBus: Send + Sync {
/// Returns the current package temperature in degrees Celsius.
///
/// # Errors
/// Returns an error if the underlying `hwmon` or `sysfs` node cannot be read.
fn get_temp(&self) -> Result<f32>;
/// Returns the current package power consumption in Watts.
///
/// # Errors
/// Returns an error if the underlying RAPL or power sensor cannot be read.
fn get_power_w(&self) -> Result<f32>;
/// Returns the current speed of all detected fans in RPM.
///
/// # Errors
/// Returns an error if the fan sensor nodes cannot be read.
fn get_fan_rpms(&self) -> Result<Vec<u32>>;
/// Returns the current average CPU frequency in MHz.
///
/// # Errors
/// Returns an error if `/proc/cpuinfo` or a `cpufreq` sysfs node cannot be read.
fn get_freq_mhz(&self) -> Result<f32>;
}
@@ -157,24 +76,10 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
}
}
/// Provides a write-only interface for hardware actuators.
pub trait ActuatorBus: Send + Sync {
/// Sets the fan control mode (e.g., "auto" or "max").
///
/// # Errors
/// Returns an error if the fan control command or `sysfs` write fails.
/// Write-only interface for hardware commands.
pub trait ActuatorBus {
fn set_fan_mode(&self, mode: &str) -> Result<()>;
/// Sets the sustained power limit (PL1) in Watts.
///
/// # Errors
/// Returns an error if the RAPL `sysfs` node cannot be written to.
fn set_sustained_power_limit(&self, watts: f32) -> Result<()>;
/// Sets the burst power limit (PL2) in Watts.
///
/// # Errors
/// Returns an error if the RAPL `sysfs` node cannot be written to.
fn set_burst_power_limit(&self, watts: f32) -> Result<()>;
}
@@ -190,40 +95,13 @@ impl<T: ActuatorBus + ?Sized> ActuatorBus for Arc<T> {
}
}
/// Represents the high-level safety status of the system.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SafetyStatus {
/// The system is operating within normal parameters.
Nominal,
/// A non-critical issue was detected and may have been auto-corrected.
Warning(String),
/// A potentially dangerous state was detected, but is not yet an emergency.
Critical(String),
/// A critical failure has occurred, requiring an immediate shutdown of the benchmark.
EmergencyAbort(String),
}
/// A high-frequency monitor for catastrophic hardware states.
pub trait HardwareWatchdog: Send + Sync {
/// Returns the current [SafetyStatus] of the system.
///
/// # Errors
/// This method can return an error if a sensor required for a safety check
/// (e.g., the thermal sensor) fails to read. The orchestrator must treat
/// this as an `EmergencyAbort` condition.
fn get_safety_status(&self) -> Result<SafetyStatus>;
/// Concurrent monitor for catastrophic states.
pub trait HardwareWatchdog {
fn check_emergency(&self) -> Result<bool>;
}
impl<T: HardwareWatchdog + ?Sized> HardwareWatchdog for Arc<T> {
fn get_safety_status(&self) -> Result<SafetyStatus> {
(**self).get_safety_status()
fn check_emergency(&self) -> Result<bool> {
(**self).check_emergency()
}
}
/// Aggregate trait for a complete platform implementation.
///
/// This "super-trait" combines all SAL interfaces into a single object-safe
/// trait, simplifying dependency injection into the `BenchmarkOrchestrator`.
pub trait PlatformSal: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog {}
impl<T: PreflightAuditor + SensorBus + ActuatorBus + EnvironmentGuard + HardwareWatchdog + ?Sized> PlatformSal for T {}

View File

@@ -1,56 +0,0 @@
use anyhow::{Result, anyhow};
use std::process::Command;
use std::collections::HashMap;
use std::sync::Mutex;
/// Trait for executing system commands. Allows mocking for tests.
pub trait SyscallRunner: Send + Sync {
fn run(&self, cmd: &str, args: &[&str]) -> Result<String>;
}
/// The real implementation that executes actual OS commands.
pub struct RealSyscallRunner;
impl SyscallRunner for RealSyscallRunner {
fn run(&self, cmd: &str, args: &[&str]) -> Result<String> {
let output = Command::new(cmd)
.args(args)
.output()?;
if output.status.success() {
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
} else {
let err = String::from_utf8_lossy(&output.stderr).trim().to_string();
Err(anyhow!("Command failed: {} {:?} -> {}", cmd, args, err))
}
}
}
/// A mocked implementation for isolated unit and E2E testing.
pub struct MockSyscallRunner {
/// Maps "cmd arg1 arg2" to stdout response.
responses: Mutex<HashMap<String, String>>,
}
impl MockSyscallRunner {
pub fn new() -> Self {
Self {
responses: Mutex::new(HashMap::new()),
}
}
pub fn set_response(&self, full_cmd: &str, response: &str) {
self.responses.lock().unwrap().insert(full_cmd.to_string(), response.to_string());
}
}
impl SyscallRunner for MockSyscallRunner {
fn run(&self, cmd: &str, args: &[&str]) -> Result<String> {
let full_cmd = format!("{} {}", cmd, args.join(" ")).trim().to_string();
let responses = self.responses.lock().unwrap();
responses.get(&full_cmd)
.cloned()
.ok_or_else(|| anyhow!("No mocked response for command: '{}'", full_cmd))
}
}

View File

@@ -1,3 +0,0 @@
pub mod cmd;
pub use cmd::{SyscallRunner, RealSyscallRunner, MockSyscallRunner};

View File

@@ -5,31 +5,21 @@ use ratatui::{
widgets::{Block, Borders, List, ListItem, Paragraph, Chart, Dataset, Axis, BorderType, GraphType},
symbols::Marker,
Frame,
prelude::Stylize,
};
use std::collections::VecDeque;
use crate::mediator::TelemetryState;
use crate::ui::theme::*;
/// DashboardState maintains UI-specific state that isn't part of the core telemetry,
/// such as the accumulated diagnostic logs.
pub struct DashboardState {
pub logs: VecDeque<String>,
pub logs: Vec<String>,
}
impl DashboardState {
pub fn new() -> Self {
let mut logs = VecDeque::with_capacity(100);
logs.push_back("ember-tune Initialized.".to_string());
Self { logs }
}
/// Adds a log message and ensures the buffer does not exceed capacity.
pub fn add_log(&mut self, msg: String) {
if self.logs.len() >= 100 {
self.logs.pop_front();
Self {
logs: vec!["ember-tune Initialized.".to_string()],
}
self.logs.push_back(msg);
}
/// Updates the UI state based on new telemetry.
@@ -93,55 +83,6 @@ pub fn draw_dashboard(
draw_freq_graph(f, right_side_chunks[2], state);
draw_logs(f, chunks[3], ui_state);
if state.is_emergency {
draw_emergency_overlay(f, area, state);
}
}
fn draw_emergency_overlay(f: &mut Frame, area: Rect, state: &TelemetryState) {
let block = Block::default()
.borders(Borders::ALL)
.border_type(BorderType::Double)
.border_style(Style::default().fg(Color::Red).add_modifier(Modifier::BOLD))
.bg(Color::Black)
.title(" 🚨 EMERGENCY ABORT 🚨 ");
let area = centered_rect(60, 20, area);
let inner = block.inner(area);
f.render_widget(block, area);
let reason = state.emergency_reason.as_deref().unwrap_or("Unknown safety trigger");
let text = vec![
Line::from(vec![Span::styled("CRITICAL SAFETY LIMIT TRIGGERED", Style::default().fg(Color::Red).add_modifier(Modifier::BOLD))]),
Line::from(""),
Line::from(vec![Span::raw("Reason: "), Span::styled(reason, Style::default().fg(Color::Yellow))]),
Line::from(""),
Line::from("Hardware has been restored to safe defaults."),
Line::from("Exiting in 1 second..."),
];
f.render_widget(Paragraph::new(text).alignment(ratatui::layout::Alignment::Center), inner);
}
fn centered_rect(percent_x: u16, percent_y: u16, r: Rect) -> Rect {
let popup_layout = Layout::default()
.direction(Direction::Vertical)
.constraints([
Constraint::Percentage((100 - percent_y) / 2),
Constraint::Percentage(percent_y),
Constraint::Percentage((100 - percent_y) / 2),
])
.split(r);
Layout::default()
.direction(Direction::Horizontal)
.constraints([
Constraint::Percentage((100 - percent_x) / 2),
Constraint::Percentage(percent_x),
Constraint::Percentage((100 - percent_x) / 2),
])
.split(popup_layout[1])[1]
}
fn draw_header(f: &mut Frame, area: Rect, state: &TelemetryState) {

View File

@@ -1,55 +0,0 @@
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
pub struct FakeSysBuilder {
temp_dir: TempDir,
}
impl FakeSysBuilder {
pub fn new() -> Self {
Self {
temp_dir: TempDir::new().expect("Failed to create temporary directory"),
}
}
pub fn base_path(&self) -> PathBuf {
self.temp_dir.path().to_path_buf()
}
pub fn add_dmi(&self, vendor: &str, product: &str) -> &Self {
let dmi_path = self.base_path().join("sys/class/dmi/id");
fs::create_dir_all(&dmi_path).expect("Failed to create DMI directory");
fs::write(dmi_path.join("sys_vendor"), vendor).expect("Failed to write sys_vendor");
fs::write(dmi_path.join("product_name"), product).expect("Failed to write product_name");
self
}
pub fn add_hwmon(&self, name: &str, temp_label: &str, temp_input: &str) -> &Self {
let hwmon_path = self.base_path().join("sys/class/hwmon/hwmon0");
fs::create_dir_all(&hwmon_path).expect("Failed to create hwmon directory");
fs::write(hwmon_path.join("name"), name).expect("Failed to write hwmon name");
fs::write(hwmon_path.join("temp1_label"), temp_label).expect("Failed to write temp label");
fs::write(hwmon_path.join("temp1_input"), temp_input).expect("Failed to write temp input");
self
}
pub fn add_rapl(&self, name: &str, energy_uj: &str, pl1_uw: &str) -> &Self {
let rapl_path = self.base_path().join("sys/class/powercap/intel-rapl:0");
fs::create_dir_all(&rapl_path).expect("Failed to create RAPL directory");
fs::write(rapl_path.join("name"), name).expect("Failed to write RAPL name");
fs::write(rapl_path.join("energy_uj"), energy_uj).expect("Failed to write energy_uj");
fs::write(rapl_path.join("constraint_0_power_limit_uw"), pl1_uw).expect("Failed to write pl1_uw");
self
}
pub fn add_proc_cmdline(&self, cmdline: &str) -> &Self {
let proc_path = self.base_path().join("proc");
fs::create_dir_all(&proc_path).expect("Failed to create proc directory");
fs::write(proc_path.join("cmdline"), cmdline).expect("Failed to write cmdline");
self
}
}

View File

@@ -1 +0,0 @@
pub mod fakesys;

View File

@@ -1,35 +0,0 @@
#[path = "../src/engine/formatters/throttled.rs"]
mod throttled;
use throttled::{ThrottledTranslator, ThrottledConfig};
use std::fs;
#[test]
fn test_throttled_formatter_non_destructive() {
let fixture_path = "tests/fixtures/throttled.conf";
let existing_content = fs::read_to_string(fixture_path).expect("Failed to read fixture");
let config = ThrottledConfig {
pl1_limit: 25.0,
pl2_limit: 35.0,
trip_temp: 90.0,
};
let merged = ThrottledTranslator::merge_conf(&existing_content, &config);
// Assert updates
assert!(merged.contains("PL1_Tdp_W: 25"));
assert!(merged.contains("PL2_Tdp_W: 35"));
assert!(merged.contains("Trip_Temp_C: 90"));
// Assert preservation
assert!(merged.contains("[UNDERVOLT]"));
assert!(merged.contains("CORE: -100"));
assert!(merged.contains("GPU: -50"));
assert!(merged.contains("# Important: Preserving undervolt offsets is critical!"));
assert!(merged.contains("Update_Interval_ms: 3000"));
// Check that we didn't lose the [GENERAL] section
assert!(merged.contains("[GENERAL]"));
assert!(merged.contains("# This is a complex test fixture"));
}

View File

@@ -1,45 +0,0 @@
use ember_tune_rs::sal::heuristic::discovery::discover_facts;
use ember_tune_rs::sal::heuristic::schema::{Discovery, SensorDiscovery, ActuatorDiscovery, Benchmarking};
use crate::common::fakesys::FakeSysBuilder;
mod common;
#[test]
fn test_heuristic_discovery_with_fakesys() {
let fake = FakeSysBuilder::new();
fake.add_dmi("Dell Inc.", "XPS 13 9380")
.add_hwmon("dell_smm", "Package id 0", "45000")
.add_rapl("intel-rapl:0", "123456", "15000000")
.add_proc_cmdline("quiet msr.allow_writes=on");
let discovery = Discovery {
sensors: SensorDiscovery {
temp_labels: vec!["Package id 0".to_string()],
fan_labels: vec![],
hwmon_priority: vec!["dell_smm".to_string()],
},
actuators: ActuatorDiscovery {
rapl_paths: vec!["intel-rapl:0".to_string()],
amd_energy_paths: vec![],
governor_files: vec![],
},
configs: std::collections::HashMap::new(),
tools: std::collections::HashMap::new(),
};
let benchmarking = Benchmarking {
idle_duration_s: 1,
stress_duration_min_s: 1,
stress_duration_max_s: 2,
cool_down_s: 1,
power_steps_watts: vec![10.0, 15.0],
};
let facts = discover_facts(&fake.base_path(), &discovery, &[], benchmarking);
assert_eq!(facts.vendor, "Dell Inc.");
assert_eq!(facts.model, "XPS 13 9380");
assert!(facts.temp_path.is_some());
assert!(facts.temp_path.unwrap().to_string_lossy().contains("hwmon0/temp1_input"));
assert_eq!(facts.rapl_paths.len(), 1);
}

View File

@@ -1,38 +0,0 @@
use ember_tune_rs::orchestrator::BenchmarkOrchestrator;
use ember_tune_rs::sal::mock::MockSal;
use ember_tune_rs::sal::heuristic::discovery::SystemFactSheet;
use ember_tune_rs::load::Workload;
use std::sync::mpsc;
use std::sync::Arc;
use anyhow::Result;
struct MockWorkload;
impl Workload for MockWorkload {
fn start(&mut self, _threads: usize, _load_percent: usize) -> Result<()> { Ok(()) }
fn stop(&mut self) -> Result<()> { Ok(()) }
fn get_throughput(&self) -> Result<f64> { Ok(100.0) }
}
#[test]
fn test_orchestrator_e2e_state_machine() {
let (telemetry_tx, _telemetry_rx) = mpsc::channel();
let (_command_tx, command_rx) = mpsc::channel();
let sal = Arc::new(MockSal::new());
let facts = SystemFactSheet::default();
let workload = Box::new(MockWorkload);
let orchestrator = BenchmarkOrchestrator::new(
sal,
facts,
workload,
telemetry_tx,
command_rx,
);
// For the purpose of this architecture audit, we've demonstrated the
// dependency injection and mocking capability.
// Let's just verify the initialization and a single telemetry send.
assert_eq!(orchestrator.generate_result(false).silicon_knee_watts, 15.0);
}