Compare commits
7 Commits
989e6d4325
...
9f00d6475b
| Author | SHA1 | Date | |
|---|---|---|---|
| 9f00d6475b | |||
| 48c3b46a0c | |||
| dc4c8281a9 | |||
| e27fb3c3ca | |||
| ab4d5828d5 | |||
| cab39a6478 | |||
| 7e2bef58d2 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -513,7 +513,7 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "ember-tune-rs"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ember-tune-rs"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
edition = "2024"
|
||||
authors = ["Nils Pukropp <nils@narl.io>"]
|
||||
readme = "README.md"
|
||||
|
||||
87
README.md
87
README.md
@@ -1,87 +0,0 @@
|
||||
# xps-thermal-bench(8)
|
||||
|
||||
## NAME
|
||||
xps-thermal-bench - Scientific, power-aware thermal optimizer for Dell XPS laptops
|
||||
|
||||
## SYNOPSIS
|
||||
**xps-thermal-bench** [*OPTIONS*]
|
||||
|
||||
## DESCRIPTION
|
||||
**xps-thermal-bench** is a professional diagnostic utility that automates the generation and deployment of optimized **i8kmon(1)** and **throttled** configurations. It uses scientific thermal resistance modeling and real-world throughput benchmarks to find the perfect balance between performance and acoustics for your specific hardware unit.
|
||||
|
||||
### Key Scientific Features:
|
||||
* **Environmental Calibration**: Samples your natural ambient idle temperature at startup to anchor fan curves correctly for your specific session (TTY vs. Desktop).
|
||||
* **Thermal Resistance ($R_{\theta}$) Modeling**: Calculates the exact cooling efficiency (C/W) of your heatsink to determine sustainable power limits.
|
||||
* **Thermal Inertia Tracking**: Measures how fast your laptop sheds heat to dynamically tune fan hysteresis, preventing the high-pitched "RPM hunting" common in thin-and-light chassis.
|
||||
* **Silicon Knee Detection**: Identifies your CPU's unique throttling point by monitoring frequency stability jitter during load.
|
||||
* **Interactive Deployment**: One-key installation of generated profiles directly to the system with automatic service restarts.
|
||||
|
||||
## WORKFLOW
|
||||
1. **System Audit**: The tool validates your kernel modules (`dell_smm`), power state (AC is required for accuracy), and interfering services.
|
||||
2. **Calibration**: Captures your current environmental thermal floor.
|
||||
3. **Benchmarking**: Runs a matrix of loads (25%–100%) against every fan tier.
|
||||
4. **Optimization**: Calculates Quiet, Balanced, and Performance profiles using the gathered telemetry.
|
||||
5. **Installation**: Press **'i'** in the TUI to deploy your favorite profile and restart services immediately.
|
||||
|
||||
## OPTIONS
|
||||
**-b, --baseline-temp** *CELSIUS*
|
||||
Target temperature for the cooling phase between tests. Default: 50.
|
||||
|
||||
**-c, --critical-temp** *CELSIUS*
|
||||
Safety ceiling at which a test is aborted. Default: 95.
|
||||
|
||||
**-C, --i8kmon-conf** *PATH*
|
||||
Override the path to the i8kmon configuration. (Default: Auto-discovered via systemd).
|
||||
|
||||
**-T, --throttled-conf** *PATH*
|
||||
Override the path to the throttled configuration. (Default: Auto-discovered via systemd).
|
||||
|
||||
**-e, --export-dir** *DIR*
|
||||
Directory to write generated configurations and backups.
|
||||
|
||||
**-f, --history-file** *PATH*
|
||||
Path to the JSON telemetry database. Default: thermal_history.json.
|
||||
|
||||
**-q, --quick**
|
||||
Fast-track mode (skips intermediate 50% and 75% load tiers).
|
||||
|
||||
**-s, --stressor** *STRESSOR*
|
||||
stress-ng workload type (e.g., `matrixprod`, `avx`, `cpu`, `fft`). Default: matrixprod.
|
||||
|
||||
**--no-tui**
|
||||
Disables the interactive terminal UI for a structured stdout log.
|
||||
|
||||
**--skip-checks**
|
||||
Bypasses the system audit. Use only if you know your hardware is compatible.
|
||||
|
||||
**--reprocess** [*HISTORY_JSON*]
|
||||
Generates new profiles and power limits from existing data without hardware tests.
|
||||
|
||||
## KEYBINDS (TUI Mode)
|
||||
**TAB**
|
||||
Cycle between Quiet, Balanced, and Performance profile previews.
|
||||
**i**
|
||||
**Install** the currently selected profile to the system and restart services.
|
||||
**q**
|
||||
Quit and restore original system state (governors, frequencies, services).
|
||||
|
||||
## SAFETY & CLEANUP
|
||||
The tool is designed to be "interruption-safe." Even if the program panics or is killed via `Ctrl+C`, it utilizes the Rust `Drop` trait to ensure:
|
||||
* Original CPU scaling governors are restored.
|
||||
* CPU frequency limits are unlocked.
|
||||
* Background power management services (like `auto-cpufreq` or `tlp`) are restarted.
|
||||
|
||||
## SETUP
|
||||
### Prerequisites
|
||||
- **Kernel**: `dell_smm_hwmon` must be loaded.
|
||||
- **Power**: AC adapter must be connected.
|
||||
- **Dependencies**: `stress-ng`, `i8kmon`, and `throttled`.
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
cargo build --release
|
||||
sudo ./target/release/xps-thermal-bench
|
||||
```
|
||||
|
||||
## SEE ALSO
|
||||
**i8kmon(1)**, **throttled(8)**, **stress-ng(1)**
|
||||
117
assets/hardware_db.toml
Normal file
117
assets/hardware_db.toml
Normal file
@@ -0,0 +1,117 @@
|
||||
[metadata]
|
||||
version = "1.0.0"
|
||||
updated = "2026-02-26"
|
||||
description = "Hardware and Conflict Database for ember-tune Thermal Engine"
|
||||
|
||||
# service collision
|
||||
|
||||
[[conflicts]]
|
||||
id = "tlp_vs_ppd"
|
||||
services = ["tlp.service", "power-profiles-daemon.service"]
|
||||
contention = "ACPI Platform Profile / EPP"
|
||||
severity = "Critical"
|
||||
fix_action = "MaskBoth"
|
||||
help_text = "TLP and Power-Profiles-Daemon fight over power envelopes. Mask both to allow ember-tune deterministic control."
|
||||
|
||||
[[conflicts]]
|
||||
id = "thermal_logic_collision"
|
||||
services = ["thermald.service", "throttled.service"]
|
||||
contention = "RAPL / MSR / BD-PROCHOT"
|
||||
severity = "High"
|
||||
fix_action = "SuspendService"
|
||||
help_text = "Thermald and Throttled create a 'register ping-pong' loop. Disable throttled; ember-tune will manage RAPL limits."
|
||||
|
||||
[[conflicts]]
|
||||
id = "freq_scaling_collision"
|
||||
services = ["auto-cpufreq.service"]
|
||||
contention = "CPU Scaling Governor"
|
||||
severity = "Medium"
|
||||
fix_action = "SuspendService"
|
||||
help_text = "Auto-cpufreq interferes with deterministic Silicon Knee identification."
|
||||
|
||||
# manufacturer wide logic
|
||||
|
||||
[ecosystems.dell]
|
||||
vendor_regex = "(Dell.*|Precision.*|Latitude.*|XPS.*)"
|
||||
polling_cap_ms = 1000
|
||||
drivers = ["dell_smm_hwmon"]
|
||||
fan_manual_mode_cmd = "dell-bios-fan-control 0"
|
||||
fan_auto_mode_cmd = "dell-bios-fan-control 1"
|
||||
safety_register = "0x1FC" # BD PROCHOT MSR
|
||||
|
||||
[ecosystems.lenovo]
|
||||
vendor_regex = "LENOVO"
|
||||
lap_mode_path = "/sys/devices/platform/thinkpad_acpi/dytc_lapmode"
|
||||
profiles_path = "/sys/firmware/acpi/platform_profile"
|
||||
ec_write_required = false # Varies by model
|
||||
|
||||
[ecosystems.asus]
|
||||
vendor_regex = "ASUSTeK.*"
|
||||
thermal_policy_path = "/sys/devices/platform/asus-nb-wmi/throttle_thermal_policy"
|
||||
policy_map = { Balanced = 0, Turbo = 1, Silent = 2 }
|
||||
|
||||
[ecosystems.hp]
|
||||
vendor_regex = "HP"
|
||||
msr_lock_register = "0x610"
|
||||
msr_lock_bit = 63
|
||||
fan_boost_path = "/sys/devices/platform/hp-wmi/hwmon/hwmon*/pwm1_enable"
|
||||
|
||||
[ecosystems.framework]
|
||||
vendor_regex = "Framework"
|
||||
ec_tool = "ectool"
|
||||
optimization = "Direct-FFI-SMC"
|
||||
|
||||
# quirks: model quirks and fixes
|
||||
|
||||
[[quirks]]
|
||||
model_regex = "XPS 13 93.*"
|
||||
id = "dell_bd_prochot_fix"
|
||||
issue = "False Positive 400MHz Lock"
|
||||
monitor_msr = "0x1FC"
|
||||
reset_bit = 0
|
||||
action = "ClearBitOnSafeTemp"
|
||||
|
||||
[[quirks]]
|
||||
model_regex = "ThinkPad T14.*"
|
||||
id = "lenovo_lap_throttling"
|
||||
issue = "11W TDP Lock in Lap Mode"
|
||||
trigger_path = "/sys/devices/platform/thinkpad_acpi/dytc_lapmode"
|
||||
trigger_value = "1"
|
||||
action = "AbortOnLapMode"
|
||||
|
||||
[[quirks]]
|
||||
model_regex = "ROG Zephyrus G14"
|
||||
id = "asus_fan_hex_support"
|
||||
issue = "Custom Hex Curve Interface"
|
||||
target_path = "/sys/devices/platform/asus-nb-wmi/fan_curve"
|
||||
format = "HexPair16"
|
||||
|
||||
[[quirks]]
|
||||
model_regex = "Spectre x360"
|
||||
id = "hp_rapl_lockout"
|
||||
issue = "Hardware MSR Lockout"
|
||||
action = "WarnUserMSRLocked"
|
||||
|
||||
# heuristic discovery
|
||||
|
||||
[discovery.sensors]
|
||||
temp_labels = ["Package id 0", "Tdie", "Tctl", "CPU Temperature"]
|
||||
fan_labels = ["CPU Fan", "GPU Fan", "System Fan"]
|
||||
hwmon_priority = ["coretemp", "zenpower", "k10temp", "dell_smm"]
|
||||
|
||||
[discovery.actuators]
|
||||
rapl_paths = ["intel-rapl:0", "package-0"]
|
||||
amd_energy_paths = ["zenpower/energy1_input", "k10temp/energy1_input"]
|
||||
governor_files = ["energy_performance_preference", "energy_performance_hint", "scaling_governor"]
|
||||
|
||||
# env health verification
|
||||
|
||||
[[preflight_checks]]
|
||||
name = "MSR Write Access"
|
||||
check_cmd = "grep -q 'msr.allow_writes=on' /proc/cmdline"
|
||||
fail_help = "Add 'msr.allow_writes=on' to kernel parameters to allow power limit manipulation."
|
||||
|
||||
[[preflight_checks]]
|
||||
name = "Kernel Lockdown Status"
|
||||
check_cmd = "cat /sys/kernel/security/lockdown | grep -q '\\[none\\]'"
|
||||
fail_help = "Kernel Lockdown is enabled. MMIO/MSR actuators are restricted by the Linux Security Module."
|
||||
@@ -1,105 +0,0 @@
|
||||
[meta]
|
||||
vendor = "Dell Inc."
|
||||
family = "XPS"
|
||||
model = "XPS 13 9380"
|
||||
|
||||
[[requirements]]
|
||||
id = "msr-writes"
|
||||
name = "MSR Write Access"
|
||||
check_type = "Cmdline"
|
||||
target = "msr.allow_writes=on"
|
||||
action = "Manual"
|
||||
severity = "Fatal"
|
||||
message = "Throttled requires MSR write access to bypass TDP locks."
|
||||
solution = "Add msr.allow_writes=on to kernel parameters."
|
||||
|
||||
[[requirements]]
|
||||
id = "dell-smm-ignore-dmi"
|
||||
name = "Dell SMM Ignore DMI"
|
||||
check_type = "Cmdline"
|
||||
target = "dell_smm_hwmon.ignore_dmi=1"
|
||||
action = "Manual"
|
||||
severity = "Fatal"
|
||||
message = "Required to force load fan control driver on this model."
|
||||
solution = "Add dell_smm_hwmon.ignore_dmi=1 to kernel parameters."
|
||||
|
||||
[[requirements]]
|
||||
id = "bios-fan-ctrl"
|
||||
name = "BIOS Fan Control"
|
||||
check_type = "Service"
|
||||
target = "dell-bios-fan-control"
|
||||
invert = true
|
||||
action = "AutoRestore"
|
||||
severity = "Warning"
|
||||
message = "BIOS overrides i8kmon, causing fan pulsing."
|
||||
solution = "Tool will disable BIOS fan control (0) during run."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "gpe-storm-fix"
|
||||
name = "Interrupt Storm Fix"
|
||||
check_type = "Cmdline"
|
||||
target = "acpi_mask_gpe=0x6E"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "GPE 0x6E often storms on 9380, wasting 20% CPU."
|
||||
solution = "Add acpi_mask_gpe=0x6E to kernel parameters."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "s3-sleep"
|
||||
name = "S3 Deep Sleep"
|
||||
check_type = "Cmdline"
|
||||
target = "mem_sleep_default=deep"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "Modern Standby (s2idle) fails to sleep properly on 9380."
|
||||
solution = "Add mem_sleep_default=deep to force S3 sleep."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "psr-fix"
|
||||
name = "Panel Self Refresh Fix"
|
||||
check_type = "Cmdline"
|
||||
target = "i915.enable_psr=0"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "PSR causes screen freezes on 9380."
|
||||
solution = "Add i915.enable_psr=0 to kernel parameters."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "color-range-fix"
|
||||
name = "Full RGB Color Range"
|
||||
check_type = "Cmdline"
|
||||
target = "i915.color_range=2"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "Ensures full 0-255 RGB range on external monitors."
|
||||
solution = "Add i915.color_range=2 to kernel parameters."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "nvme-latency-fix"
|
||||
name = "NVMe Latency Fix"
|
||||
check_type = "Cmdline"
|
||||
target = "nvme_core.default_ps_max_latency_us=5500"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "Prevents SSD hangs on certain 9380 firmware."
|
||||
solution = "Add nvme_core.default_ps_max_latency_us=5500 to kernel parameters."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "audio-pop-fix"
|
||||
name = "Audio Pop Fix"
|
||||
check_type = "Cmdline"
|
||||
target = "snd_hda_intel.power_save=0"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "Prevents 'popping' sound in headphones."
|
||||
solution = "Add snd_hda_intel.power_save=0 to kernel parameters."
|
||||
|
||||
[[diagnostics]]
|
||||
id = "intel-sgx"
|
||||
name = "Intel SGX State"
|
||||
check_type = "File"
|
||||
target = "/dev/sgx"
|
||||
action = "Manual"
|
||||
severity = "Warning"
|
||||
message = "SGX must be 'Software Controlled' for MSR access."
|
||||
solution = "Set Intel SGX to 'Software Controlled' in BIOS."
|
||||
@@ -40,7 +40,7 @@ pub struct Cli {
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
help = "Writes high-resolution diagnostic logs to /tmp/ember-tune.log"
|
||||
help = "Writes high-resolution diagnostic logs to /var/log/ember-tune.log"
|
||||
)]
|
||||
pub verbose: bool,
|
||||
|
||||
|
||||
41
src/engine/formatters/i8kmon.rs
Normal file
41
src/engine/formatters/i8kmon.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
pub struct I8kmonConfig {
|
||||
pub t_ambient: f32,
|
||||
pub t_max_fan: f32,
|
||||
}
|
||||
|
||||
pub struct I8kmonTranslator;
|
||||
|
||||
impl I8kmonTranslator {
|
||||
pub fn generate_conf(config: &I8kmonConfig) -> String {
|
||||
let t_off = config.t_ambient + 5.0;
|
||||
let t_low_on = config.t_ambient + 12.0;
|
||||
let t_low_off = config.t_ambient + 10.0;
|
||||
let t_high_on = config.t_max_fan;
|
||||
let t_high_off = config.t_max_fan - 5.0;
|
||||
let t_low_trigger = (config.t_max_fan - 15.0).max(t_low_on + 2.0);
|
||||
|
||||
format!(
|
||||
r#"# Generated by ember-tune Optimizer
|
||||
# Grounded in physical thermal resistance
|
||||
|
||||
set config(gen_shadow) 1
|
||||
set config(i8k_ignore_dmi) 1
|
||||
|
||||
# Fan states: {{state_low state_high temp_on temp_off}}
|
||||
set config(0) {{0 0 {t_low_on:.0} {t_off:.0}}}
|
||||
set config(1) {{1 1 {t_low_trigger:.0} {t_low_off:.0}}}
|
||||
set config(2) {{2 2 {t_high_on:.0} {t_high_off:.0}}}
|
||||
|
||||
# Speed thresholds (approximate for XPS 9380)
|
||||
set config(speed_low) 2500
|
||||
set config(speed_high) 4500
|
||||
"#,
|
||||
t_low_on = t_low_on,
|
||||
t_off = t_off,
|
||||
t_low_trigger = t_low_trigger,
|
||||
t_low_off = t_low_off,
|
||||
t_high_on = t_high_on,
|
||||
t_high_off = t_high_off
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod throttled;
|
||||
pub mod i8kmon;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub struct ThrottledConfig {
|
||||
pub pl1_limit: f32,
|
||||
@@ -11,7 +12,7 @@ impl ThrottledTranslator {
|
||||
pub fn generate_conf(config: &ThrottledConfig) -> String {
|
||||
format!(
|
||||
r#"[GENERAL]
|
||||
# Generated by FerroTherm Optimizer
|
||||
# Generated by ember-tune Optimizer
|
||||
# Physical Sweet Spot found at {pl1:.1}W
|
||||
|
||||
[BATTERY]
|
||||
@@ -35,4 +36,98 @@ Trip_Temp_C: {trip:.0}
|
||||
trip = config.trip_temp
|
||||
)
|
||||
}
|
||||
|
||||
/// Merges benchmarked values into an existing throttled.conf content.
|
||||
/// Preserves all other sections (like [UnderVOLT]), comments, and formatting.
|
||||
pub fn merge_conf(existing_content: &str, config: &ThrottledConfig) -> String {
|
||||
let mut sections = Vec::new();
|
||||
let mut current_section_name = String::new();
|
||||
let mut current_section_lines = Vec::new();
|
||||
|
||||
// 1. Parse into sections to ensure we only update keys in [BATTERY] and [AC]
|
||||
for line in existing_content.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with('[') && trimmed.ends_with(']') {
|
||||
if !current_section_lines.is_empty() || !current_section_name.is_empty() {
|
||||
sections.push((current_section_name.clone(), current_section_lines.clone()));
|
||||
}
|
||||
current_section_name = trimmed[1..trimmed.len() - 1].to_string();
|
||||
current_section_lines = vec![line.to_string()];
|
||||
} else {
|
||||
current_section_lines.push(line.to_string());
|
||||
}
|
||||
}
|
||||
sections.push((current_section_name, current_section_lines));
|
||||
|
||||
let target_keys = [
|
||||
("PL1_Tdp_W", format!("{:.0}", config.pl1_limit)),
|
||||
("PL2_Tdp_W", format!("{:.0}", config.pl2_limit)),
|
||||
("Trip_Temp_C", format!("{:.0}", config.trip_temp)),
|
||||
];
|
||||
|
||||
let mut result_lines = Vec::new();
|
||||
let mut handled_sections = HashSet::new();
|
||||
|
||||
// 2. Process sections
|
||||
for (name, mut lines) in sections {
|
||||
if name == "BATTERY" || name == "AC" {
|
||||
handled_sections.insert(name.clone());
|
||||
let mut updated_keys = HashSet::new();
|
||||
|
||||
let mut new_lines = Vec::new();
|
||||
for line in lines {
|
||||
let mut updated = false;
|
||||
let trimmed = line.trim();
|
||||
|
||||
if !trimmed.starts_with('#') && !trimmed.is_empty() {
|
||||
if let Some((key, _)) = trimmed.split_once(':') {
|
||||
let key = key.trim();
|
||||
for (target_key, new_value) in &target_keys {
|
||||
if key == *target_key {
|
||||
if let Some(colon_idx) = line.find(':') {
|
||||
let prefix = &line[..colon_idx + 1];
|
||||
let rest = &line[colon_idx + 1..];
|
||||
let comment = if let Some(hash_idx) = rest.find('#') {
|
||||
&rest[hash_idx..]
|
||||
} else {
|
||||
""
|
||||
};
|
||||
new_lines.push(format!("{} {}{}", prefix, new_value, comment));
|
||||
updated_keys.insert(*target_key);
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !updated {
|
||||
new_lines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
for (target_key, new_value) in &target_keys {
|
||||
if !updated_keys.contains(*target_key) {
|
||||
new_lines.push(format!("{}: {}", target_key, new_value));
|
||||
}
|
||||
}
|
||||
lines = new_lines;
|
||||
}
|
||||
result_lines.extend(lines);
|
||||
}
|
||||
|
||||
// 3. Add missing sections if they didn't exist at all
|
||||
for section_name in &["BATTERY", "AC"] {
|
||||
if !handled_sections.contains(*section_name) {
|
||||
result_lines.push(String::new());
|
||||
result_lines.push(format!("[{}]", section_name));
|
||||
for (target_key, new_value) in &target_keys {
|
||||
result_lines.push(format!("{}: {}", target_key, new_value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result_lines.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,8 +69,8 @@ impl OptimizerEngine {
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
|
||||
/// Finds the "Silicon Knee" - the point where performance per watt plateaus
|
||||
/// and thermal density spikes.
|
||||
/// Finds the "Silicon Knee" - the point where performance per watt (efficiency)
|
||||
/// starts to diminish significantly and thermal density spikes.
|
||||
pub fn find_silicon_knee(&self, profile: &ThermalProfile) -> f32 {
|
||||
if profile.points.len() < 3 {
|
||||
return profile.points.last().map(|p| p.power_w).unwrap_or(15.0);
|
||||
@@ -82,27 +82,42 @@ impl OptimizerEngine {
|
||||
let mut best_pl = points[0].power_w;
|
||||
let mut max_score = f32::MIN;
|
||||
|
||||
// Use a sliding window (3 points) to calculate gradients more robustly
|
||||
for i in 1..points.len() - 1 {
|
||||
let prev = &points[i - 1];
|
||||
let curr = &points[i];
|
||||
let next = &points[i + 1];
|
||||
|
||||
// 1. Performance Gradient (dMHz/dW)
|
||||
let dmhz_dw_prev = (curr.freq_mhz - prev.freq_mhz) / (curr.power_w - prev.power_w).max(0.1);
|
||||
let dmhz_dw_next = (next.freq_mhz - curr.freq_mhz) / (next.power_w - curr.power_w).max(0.1);
|
||||
let freq_diminish = dmhz_dw_prev - dmhz_dw_next;
|
||||
// 1. Efficiency Metric (Throughput per Watt)
|
||||
// If throughput is 0 (unsupported), fallback to Frequency per Watt
|
||||
let efficiency_curr = if curr.throughput > 0.0 {
|
||||
curr.throughput as f32 / curr.power_w.max(0.1)
|
||||
} else {
|
||||
curr.freq_mhz / curr.power_w.max(0.1)
|
||||
};
|
||||
|
||||
let efficiency_next = if next.throughput > 0.0 {
|
||||
next.throughput as f32 / next.power_w.max(0.1)
|
||||
} else {
|
||||
next.freq_mhz / next.power_w.max(0.1)
|
||||
};
|
||||
|
||||
// 2. Thermal Gradient (d2T/dW2)
|
||||
// Diminishing returns: how much efficiency drops per additional watt
|
||||
let efficiency_drop = (efficiency_curr - efficiency_next) / (next.power_w - curr.power_w).max(0.1);
|
||||
|
||||
// 2. Thermal Acceleration (d2T/dW2)
|
||||
let dt_dw_prev = (curr.temp_c - prev.temp_c) / (curr.power_w - prev.power_w).max(0.1);
|
||||
let dt_dw_next = (next.temp_c - curr.temp_c) / (next.power_w - curr.power_w).max(0.1);
|
||||
let temp_accel = (dt_dw_next - dt_dw_prev) / (next.power_w - prev.power_w).max(0.1);
|
||||
|
||||
// 3. Wall Detection
|
||||
let is_throttling = next.freq_mhz < curr.freq_mhz;
|
||||
let penalty = if is_throttling { 2000.0 } else { 0.0 };
|
||||
// 3. Wall Detection (Any drop in absolute frequency/throughput is a hard wall)
|
||||
let is_throttling = next.freq_mhz < curr.freq_mhz || (next.throughput > 0.0 && next.throughput < curr.throughput);
|
||||
let penalty = if is_throttling { 5000.0 } else { 0.0 };
|
||||
|
||||
// Heuristic scoring: Weight thermal acceleration and diminishing frequency gains
|
||||
let score = (freq_diminish * 2.0) + (temp_accel * 10.0) - penalty;
|
||||
// Heuristic scoring:
|
||||
// - Higher score is "Better" (The Knee is the peak of this curve)
|
||||
// - We want high efficiency (low drop) and low thermal acceleration.
|
||||
let score = (efficiency_curr * 10.0) - (efficiency_drop * 50.0) - (temp_accel * 20.0) - penalty;
|
||||
|
||||
if score > max_score {
|
||||
max_score = score;
|
||||
|
||||
@@ -75,7 +75,7 @@ fn print_summary_report(result: &OptimizationResult) {
|
||||
}
|
||||
|
||||
fn setup_logging(verbose: bool) -> tracing_appender::non_blocking::WorkerGuard {
|
||||
let file_appender = tracing_appender::rolling::never("/tmp", "ember-tune.log");
|
||||
let file_appender = tracing_appender::rolling::never("/var/log", "ember-tune.log");
|
||||
let (non_blocking, guard) = tracing_appender::non_blocking(file_appender);
|
||||
|
||||
let level = if verbose { tracing::Level::DEBUG } else { tracing::Level::INFO };
|
||||
@@ -217,7 +217,7 @@ fn main() -> Result<()> {
|
||||
cpu_temp: 0.0,
|
||||
power_w: 0.0,
|
||||
current_freq: 0.0,
|
||||
fan_rpm: 0,
|
||||
fans: Vec::new(),
|
||||
governor: "detecting".to_string(),
|
||||
pl1_limit: 0.0,
|
||||
pl2_limit: 0.0,
|
||||
|
||||
@@ -33,7 +33,7 @@ pub struct TelemetryState {
|
||||
pub cpu_temp: f32,
|
||||
pub power_w: f32,
|
||||
pub current_freq: f32,
|
||||
pub fan_rpm: u32,
|
||||
pub fans: Vec<u32>,
|
||||
|
||||
// --- High-res History (Last 60s @ 500ms = 120 points) ---
|
||||
pub history_watts: Vec<f32>,
|
||||
|
||||
@@ -151,15 +151,16 @@ impl BenchmarkOrchestrator {
|
||||
// Record data point
|
||||
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
|
||||
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
|
||||
let avg_f = 2500.0; // Mock frequency until SensorBus expanded
|
||||
let fan = self.sensors.get_fan_rpm().unwrap_or(0);
|
||||
let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0);
|
||||
let fans = self.sensors.get_fan_rpms().unwrap_or_default();
|
||||
let primary_fan = fans.first().cloned().unwrap_or(0);
|
||||
let tp = self.workload.get_throughput().unwrap_or(0.0);
|
||||
|
||||
self.profile.points.push(ThermalPoint {
|
||||
power_w: avg_p,
|
||||
temp_c: avg_t,
|
||||
freq_mhz: avg_f,
|
||||
fan_rpm: fan,
|
||||
fan_rpm: primary_fan,
|
||||
throughput: tp,
|
||||
});
|
||||
|
||||
@@ -181,7 +182,7 @@ impl BenchmarkOrchestrator {
|
||||
|
||||
// Phase 5: Finalizing
|
||||
self.phase = BenchmarkPhase::Finalizing;
|
||||
self.log("Benchmark sequence complete. Generating configuration...")?;
|
||||
self.log("Benchmark sequence complete. Generating configurations...")?;
|
||||
|
||||
let config = crate::engine::formatters::throttled::ThrottledConfig {
|
||||
pl1_limit: res.silicon_knee_watts,
|
||||
@@ -189,9 +190,25 @@ impl BenchmarkOrchestrator {
|
||||
trip_temp: res.max_temp_c.max(95.0),
|
||||
};
|
||||
|
||||
let conf_content = crate::engine::formatters::throttled::ThrottledTranslator::generate_conf(&config);
|
||||
std::fs::write("throttled.conf", conf_content)?;
|
||||
self.log("✓ Saved 'throttled.conf'.")?;
|
||||
// 1. Throttled (Merged if exists)
|
||||
let throttled_path = "throttled.conf";
|
||||
let existing_throttled = std::fs::read_to_string(throttled_path).unwrap_or_default();
|
||||
let throttled_content = if existing_throttled.is_empty() {
|
||||
crate::engine::formatters::throttled::ThrottledTranslator::generate_conf(&config)
|
||||
} else {
|
||||
crate::engine::formatters::throttled::ThrottledTranslator::merge_conf(&existing_throttled, &config)
|
||||
};
|
||||
std::fs::write(throttled_path, throttled_content)?;
|
||||
self.log("✓ Saved 'throttled.conf' (merged).")?;
|
||||
|
||||
// 2. i8kmon
|
||||
let i8k_config = crate::engine::formatters::i8kmon::I8kmonConfig {
|
||||
t_ambient: self.profile.ambient_temp,
|
||||
t_max_fan: res.max_temp_c - 5.0, // Aim to hit max fan before max temp
|
||||
};
|
||||
let i8k_content = crate::engine::formatters::i8kmon::I8kmonTranslator::generate_conf(&i8k_config);
|
||||
std::fs::write("i8kmon.conf", i8k_content)?;
|
||||
self.log("✓ Saved 'i8kmon.conf'.")?;
|
||||
|
||||
self.guard.restore()?;
|
||||
self.log("✓ Environment restored.")?;
|
||||
@@ -233,8 +250,8 @@ impl BenchmarkOrchestrator {
|
||||
tick: 0,
|
||||
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
|
||||
power_w: self.sensors.get_power_w().unwrap_or(0.0),
|
||||
current_freq: 0.0,
|
||||
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
|
||||
current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0),
|
||||
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
|
||||
governor: "unknown".to_string(),
|
||||
pl1_limit: 0.0,
|
||||
pl2_limit: 0.0,
|
||||
@@ -252,7 +269,7 @@ impl BenchmarkOrchestrator {
|
||||
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
|
||||
let temp = self.sensors.get_temp().unwrap_or(0.0);
|
||||
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
|
||||
let freq = 0.0;
|
||||
let freq = self.sensors.get_freq_mhz().unwrap_or(0.0);
|
||||
|
||||
self.history_temp.push_back(temp);
|
||||
self.history_watts.push_back(pwr);
|
||||
@@ -271,7 +288,7 @@ impl BenchmarkOrchestrator {
|
||||
cpu_temp: temp,
|
||||
power_w: pwr,
|
||||
current_freq: freq,
|
||||
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
|
||||
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
|
||||
governor: "performance".to_string(),
|
||||
pl1_limit: 15.0,
|
||||
pl2_limit: 25.0,
|
||||
|
||||
@@ -10,19 +10,20 @@ use tracing::debug;
|
||||
pub struct DellXps9380Sal {
|
||||
temp_path: PathBuf,
|
||||
pwr_path: PathBuf,
|
||||
fan_path: PathBuf,
|
||||
fan_paths: Vec<PathBuf>,
|
||||
freq_path: PathBuf,
|
||||
pl1_path: PathBuf,
|
||||
pl2_path: PathBuf,
|
||||
last_poll: Mutex<Instant>,
|
||||
last_temp: Mutex<f32>,
|
||||
last_fan: Mutex<u32>,
|
||||
last_fans: Mutex<Vec<u32>>,
|
||||
}
|
||||
|
||||
impl DellXps9380Sal {
|
||||
pub fn init() -> Result<Self> {
|
||||
let mut temp_path = None;
|
||||
let mut pwr_path = None;
|
||||
let mut fan_path = None;
|
||||
let mut fan_paths = Vec::new();
|
||||
let mut rapl_base_path = None;
|
||||
|
||||
// Dynamic hwmon discovery
|
||||
@@ -33,7 +34,17 @@ impl DellXps9380Sal {
|
||||
|
||||
if name == "dell_smm" {
|
||||
temp_path = Some(p.join("temp1_input"));
|
||||
fan_path = Some(p.join("fan1_input"));
|
||||
// Discover all fans
|
||||
if let Ok(fan_entries) = fs::read_dir(&p) {
|
||||
for fan_entry in fan_entries.flatten() {
|
||||
let fan_p = fan_entry.path();
|
||||
if fan_p.file_name().unwrap_or_default().to_string_lossy().starts_with("fan") &&
|
||||
fan_p.file_name().unwrap_or_default().to_string_lossy().ends_with("_input") {
|
||||
fan_paths.push(fan_p);
|
||||
}
|
||||
}
|
||||
}
|
||||
fan_paths.sort();
|
||||
}
|
||||
|
||||
if name == "intel_rapl" || name == "rapl" {
|
||||
@@ -59,16 +70,18 @@ impl DellXps9380Sal {
|
||||
}
|
||||
|
||||
let rapl_base = rapl_base_path.context("Could not find RAPL package-0 path in powercap")?;
|
||||
let freq_path = PathBuf::from("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
|
||||
|
||||
Ok(Self {
|
||||
temp_path: temp_path.context("Could not find dell_smm temperature path")?,
|
||||
pwr_path: pwr_path.context("Could not find RAPL power path")?,
|
||||
fan_path: fan_path.context("Could not find dell_smm fan path")?,
|
||||
fan_paths,
|
||||
freq_path,
|
||||
pl1_path: rapl_base.join("constraint_0_power_limit_uw"),
|
||||
pl2_path: rapl_base.join("constraint_1_power_limit_uw"),
|
||||
last_poll: Mutex::new(Instant::now() - Duration::from_secs(2)),
|
||||
last_temp: Mutex::new(0.0),
|
||||
last_fan: Mutex::new(0),
|
||||
last_fans: Mutex::new(Vec::new()),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -83,7 +96,19 @@ impl PreflightAuditor for DellXps9380Sal {
|
||||
outcome: if unsafe { libc::getuid() } == 0 { Ok(()) } else { Err(AuditError::RootRequired) }
|
||||
});
|
||||
|
||||
// 2. Kernel parameters check
|
||||
// 2. Kernel modules check (simplified check via sysfs/proc)
|
||||
let modules = ["dell_smm_hwmon", "msr", "intel_rapl_msr"];
|
||||
for mod_name in modules {
|
||||
let path = format!("/sys/module/{}", mod_name);
|
||||
steps.push(AuditStep {
|
||||
description: format!("Kernel Module: {}", mod_name),
|
||||
outcome: if PathBuf::from(path).exists() { Ok(()) } else {
|
||||
Err(AuditError::ToolMissing(format!("Module '{}' not loaded. Run 'sudo modprobe {}'", mod_name, mod_name)))
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Kernel parameters check
|
||||
let cmdline = fs::read_to_string("/proc/cmdline").unwrap_or_default();
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: dell_smm_hwmon.ignore_dmi=1".to_string(),
|
||||
@@ -91,6 +116,12 @@ impl PreflightAuditor for DellXps9380Sal {
|
||||
Err(AuditError::MissingKernelParam("dell_smm_hwmon.ignore_dmi=1".to_string()))
|
||||
}
|
||||
});
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: dell_smm_hwmon.restricted=0".to_string(),
|
||||
outcome: if cmdline.contains("dell_smm_hwmon.restricted=0") { Ok(()) } else {
|
||||
Err(AuditError::MissingKernelParam("dell_smm_hwmon.restricted=0".to_string()))
|
||||
}
|
||||
});
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Param: msr.allow_writes=on".to_string(),
|
||||
outcome: if cmdline.contains("msr.allow_writes=on") { Ok(()) } else {
|
||||
@@ -98,7 +129,16 @@ impl PreflightAuditor for DellXps9380Sal {
|
||||
}
|
||||
});
|
||||
|
||||
// 3. Check AC power
|
||||
// 4. Lockdown check
|
||||
let lockdown = fs::read_to_string("/sys/kernel/security/lockdown").unwrap_or_default();
|
||||
steps.push(AuditStep {
|
||||
description: "Kernel Lockdown Status".to_string(),
|
||||
outcome: if lockdown.contains("[none]") || lockdown.is_empty() { Ok(()) } else {
|
||||
Err(AuditError::KernelIncompatible("Kernel is in lockdown mode. Set to 'none' to allow MSR/SMM writes.".to_string()))
|
||||
}
|
||||
});
|
||||
|
||||
// 5. Check AC power
|
||||
let ac_status = fs::read_to_string("/sys/class/power_supply/AC/online").unwrap_or_else(|_| "0".to_string());
|
||||
steps.push(AuditStep {
|
||||
description: "AC Power Connection".to_string(),
|
||||
@@ -123,9 +163,10 @@ impl DellXps9380Guard {
|
||||
|
||||
impl EnvironmentGuard for DellXps9380Guard {
|
||||
fn suppress(&mut self) -> Result<()> {
|
||||
let services = ["tlp", "thermald"];
|
||||
let services = ["tlp", "thermald", "i8kmon"];
|
||||
for s in services {
|
||||
if Command::new("systemctl").args(["is-active", "--quiet", s]).status()?.success() {
|
||||
debug!("Suppressing service: {}", s);
|
||||
Command::new("systemctl").args(["stop", s]).status()?;
|
||||
self.stopped_services.push(s.to_string());
|
||||
}
|
||||
@@ -179,20 +220,32 @@ impl SensorBus for DellXps9380Sal {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
let mut last_poll = self.last_poll.lock().unwrap();
|
||||
let now = Instant::now();
|
||||
|
||||
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
|
||||
return Ok(*self.last_fan.lock().unwrap());
|
||||
return Ok(self.last_fans.lock().unwrap().clone());
|
||||
}
|
||||
|
||||
let s = fs::read_to_string(&self.fan_path)?;
|
||||
let val = s.trim().parse::<u32>()?;
|
||||
let mut fans = Vec::new();
|
||||
for path in &self.fan_paths {
|
||||
if let Ok(s) = fs::read_to_string(path) {
|
||||
if let Ok(rpm) = s.trim().parse::<u32>() {
|
||||
fans.push(rpm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*self.last_fan.lock().unwrap() = val;
|
||||
*self.last_fans.lock().unwrap() = fans.clone();
|
||||
*last_poll = now;
|
||||
|
||||
Ok(fans)
|
||||
}
|
||||
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
let s = fs::read_to_string(&self.freq_path)?;
|
||||
let val = s.trim().parse::<f32>()? / 1000.0;
|
||||
Ok(val)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,8 +50,11 @@ impl SensorBus for MockSensorBus {
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
Ok(15.0)
|
||||
}
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
Ok(2500)
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
Ok(vec![2500])
|
||||
}
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
Ok(3200.0)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,10 +54,11 @@ pub trait EnvironmentGuard {
|
||||
}
|
||||
|
||||
/// Read-only interface for standardized metrics.
|
||||
pub trait SensorBus {
|
||||
pub trait SensorBus: Send + Sync {
|
||||
fn get_temp(&self) -> Result<f32>;
|
||||
fn get_power_w(&self) -> Result<f32>;
|
||||
fn get_fan_rpm(&self) -> Result<u32>;
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>>;
|
||||
fn get_freq_mhz(&self) -> Result<f32>;
|
||||
}
|
||||
|
||||
impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
|
||||
@@ -67,8 +68,11 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
(**self).get_power_w()
|
||||
}
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
(**self).get_fan_rpm()
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
(**self).get_fan_rpms()
|
||||
}
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
(**self).get_freq_mhz()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub struct DashboardState {
|
||||
impl DashboardState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
logs: vec!["FerroTherm Initialized.".to_string()],
|
||||
logs: vec!["ember-tune Initialized.".to_string()],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ pub fn draw_dashboard(
|
||||
.direction(Direction::Vertical)
|
||||
.constraints([
|
||||
Constraint::Length(10), // Gauges
|
||||
Constraint::Length(3), // Cooling
|
||||
Constraint::Min(4), // Cooling (Increased for multiple fans)
|
||||
Constraint::Length(3), // CPU State
|
||||
Constraint::Min(4), // Metadata
|
||||
])
|
||||
@@ -92,7 +92,7 @@ fn draw_header(f: &mut Frame, area: Rect, state: &TelemetryState) {
|
||||
let hostname = std::env::var("HOSTNAME").unwrap_or_else(|_| "localhost".into());
|
||||
|
||||
let left = Span::styled(format!(" {} ", hostname), Style::default().fg(C_MAUVE).add_modifier(Modifier::BOLD));
|
||||
let center = Span::styled(" FERROTHERM THERMAL BENCH ", Style::default().fg(C_LAVENDER).add_modifier(Modifier::BOLD));
|
||||
let center = Span::styled(" EMBER-TUNE THERMAL BENCH ", Style::default().fg(C_LAVENDER).add_modifier(Modifier::BOLD));
|
||||
let right = Span::styled(format!(" UPTIME: {} ", uptime), Style::default().fg(C_SUBTEXT));
|
||||
|
||||
let total_width = area.width;
|
||||
@@ -182,13 +182,35 @@ fn draw_cooling(f: &mut Frame, area: Rect, state: &TelemetryState) {
|
||||
let inner = block.inner(area);
|
||||
f.render_widget(block, area);
|
||||
|
||||
let info = Line::from(vec![
|
||||
let mut lines = Vec::new();
|
||||
|
||||
// Line 1: Tier
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(" Tier: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(&state.fan_tier, Style::default().fg(C_TEAL)),
|
||||
Span::styled(" | RPM: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(format!("{}", state.fan_rpm), Style::default().fg(C_TEXT)),
|
||||
]);
|
||||
f.render_widget(Paragraph::new(info), inner);
|
||||
]));
|
||||
|
||||
// Line 2+: Fans
|
||||
if state.fans.is_empty() {
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(" Fans: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled("N/A", Style::default().fg(C_SUBTEXT)),
|
||||
]));
|
||||
} else if state.fans.len() == 1 {
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(" Fan: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(format!("{} RPM", state.fans[0]), Style::default().fg(C_TEXT)),
|
||||
]));
|
||||
} else {
|
||||
for (i, rpm) in state.fans.iter().enumerate() {
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!(" Fan {}: ", i + 1), Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(format!("{} RPM", rpm), Style::default().fg(C_TEXT)),
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
f.render_widget(Paragraph::new(lines), inner);
|
||||
}
|
||||
|
||||
fn draw_cpu_state(f: &mut Frame, area: Rect, state: &TelemetryState) {
|
||||
|
||||
Reference in New Issue
Block a user