186 lines
7.4 KiB
Rust
186 lines
7.4 KiB
Rust
//! The core mathematics and physics engine for `ember-tune`.
|
|
//!
|
|
//! This module contains the `OptimizerEngine`, which is responsible for all
|
|
//! data smoothing, thermal resistance calculations, and the heuristic scoring
|
|
//! used to identify the "Silicon Knee".
|
|
|
|
use serde::{Serialize, Deserialize};
|
|
use std::collections::HashMap;
|
|
use std::path::PathBuf;
|
|
|
|
pub mod formatters;
|
|
|
|
/// A single, atomic data point captured during the benchmark.
|
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
pub struct ThermalPoint {
|
|
pub power_w: f32,
|
|
pub temp_c: f32,
|
|
pub freq_mhz: f32,
|
|
pub fan_rpm: u32,
|
|
pub throughput: f64,
|
|
}
|
|
|
|
/// A complete thermal profile containing all data points for a benchmark run.
|
|
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
|
pub struct ThermalProfile {
|
|
pub points: Vec<ThermalPoint>,
|
|
pub ambient_temp: f32,
|
|
}
|
|
|
|
/// The final, recommended parameters derived from the thermal benchmark.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct OptimizationResult {
|
|
/// The full thermal profile used for calculations.
|
|
pub profile: ThermalProfile,
|
|
/// The power level (in Watts) where performance-per-watt plateaus.
|
|
pub silicon_knee_watts: f32,
|
|
/// The measured thermal resistance of the system (Kelvin/Watt).
|
|
pub thermal_resistance_kw: f32,
|
|
/// The recommended sustained power limit (PL1).
|
|
pub recommended_pl1: f32,
|
|
/// The recommended burst power limit (PL2).
|
|
pub recommended_pl2: f32,
|
|
/// The maximum temperature reached during the test.
|
|
pub max_temp_c: f32,
|
|
/// Indicates if the benchmark was aborted before completion.
|
|
pub is_partial: bool,
|
|
/// A map of configuration files that were written to.
|
|
pub config_paths: HashMap<String, PathBuf>,
|
|
}
|
|
|
|
/// Pure mathematics engine for thermal optimization.
|
|
///
|
|
/// Contains no hardware I/O and operates solely on the collected [ThermalProfile].
|
|
pub struct OptimizerEngine {
|
|
/// The size of the sliding window for the `smooth` function.
|
|
window_size: usize,
|
|
}
|
|
|
|
impl OptimizerEngine {
|
|
/// Creates a new `OptimizerEngine`.
|
|
pub fn new(window_size: usize) -> Self {
|
|
Self { window_size }
|
|
}
|
|
|
|
/// Applies a simple moving average (SMA) filter with outlier rejection.
|
|
///
|
|
/// This function smooths noisy sensor data. It rejects any value in the
|
|
/// window that is more than 20.0 units away from the window's average
|
|
/// before calculating the final smoothed value.
|
|
pub fn smooth(&self, data: &[f32]) -> Vec<f32> {
|
|
if data.is_empty() { return vec![]; }
|
|
let mut smoothed = Vec::with_capacity(data.len());
|
|
|
|
for i in 0..data.len() {
|
|
let start = if i < self.window_size { 0 } else { i - self.window_size + 1 };
|
|
let end = i + 1;
|
|
|
|
let window = &data[start..end];
|
|
let avg: f32 = window.iter().sum::<f32>() / window.len() as f32;
|
|
let filtered: Vec<f32> = window.iter()
|
|
.filter(|&&v| (v - avg).abs() < 20.0) // Reject spikes > 20 units
|
|
.cloned().collect();
|
|
|
|
if filtered.is_empty() {
|
|
smoothed.push(avg);
|
|
} else {
|
|
smoothed.push(filtered.iter().sum::<f32>() / filtered.len() as f32);
|
|
}
|
|
}
|
|
smoothed
|
|
}
|
|
|
|
/// Calculates Thermal Resistance: R_theta = (T_core - T_ambient) / P_package.
|
|
///
|
|
/// This function uses the data point with the highest power draw to ensure
|
|
/// the calculation reflects a system under maximum thermal load.
|
|
pub fn calculate_thermal_resistance(&self, profile: &ThermalProfile) -> f32 {
|
|
profile.points.iter()
|
|
.filter(|p| p.power_w > 1.0 && p.temp_c > 30.0) // Filter invalid data
|
|
.max_by(|a, b| a.power_w.partial_cmp(&b.power_w).unwrap_or(std::cmp::Ordering::Equal))
|
|
.map(|p| (p.temp_c - profile.ambient_temp) / p.power_w)
|
|
.unwrap_or(0.0)
|
|
}
|
|
|
|
/// Returns the maximum temperature recorded in the profile.
|
|
pub fn get_max_temp(&self, profile: &ThermalProfile) -> f32 {
|
|
profile.points.iter()
|
|
.map(|p| p.temp_c)
|
|
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
|
|
.unwrap_or(0.0)
|
|
}
|
|
|
|
/// Finds the "Silicon Knee" - the point where performance-per-watt (efficiency)
|
|
/// starts to diminish significantly and thermal density spikes.
|
|
///
|
|
/// This heuristic scoring model balances several factors:
|
|
/// 1. **Efficiency Drop:** How quickly does performance-per-watt decrease as power increases?
|
|
/// 2. **Thermal Acceleration:** How quickly does temperature rise per additional Watt?
|
|
/// 3. **Throttling Penalty:** A large penalty is applied if absolute performance drops, indicating a thermal wall.
|
|
///
|
|
/// The "Knee" is the power level with the highest score, representing the optimal
|
|
/// balance before thermal saturation causes diminishing returns.
|
|
pub fn find_silicon_knee(&self, profile: &ThermalProfile) -> f32 {
|
|
let valid_points: Vec<_> = profile.points.iter()
|
|
.filter(|p| p.power_w > 5.0 && p.temp_c > 40.0) // Filter idle/noise
|
|
.cloned()
|
|
.collect();
|
|
|
|
if valid_points.len() < 3 {
|
|
return profile.points.last().map(|p| p.power_w).unwrap_or(15.0);
|
|
}
|
|
|
|
let mut points = valid_points;
|
|
points.sort_by(|a, b| a.power_w.partial_cmp(&b.power_w).unwrap_or(std::cmp::Ordering::Equal));
|
|
|
|
let mut best_pl = points[0].power_w;
|
|
let mut max_score = f32::MIN;
|
|
|
|
// Use a sliding window (3 points) to calculate gradients more robustly
|
|
for i in 1..points.len() - 1 {
|
|
let prev = &points[i - 1];
|
|
let curr = &points[i];
|
|
let next = &points[i + 1];
|
|
|
|
// 1. Efficiency Metric (Throughput per Watt or Freq per Watt)
|
|
let efficiency_curr = if curr.throughput > 0.0 {
|
|
curr.throughput as f32 / curr.power_w.max(1.0)
|
|
} else {
|
|
curr.freq_mhz / curr.power_w.max(1.0)
|
|
};
|
|
|
|
let efficiency_next = if next.throughput > 0.0 {
|
|
next.throughput as f32 / next.power_w.max(1.0)
|
|
} else {
|
|
next.freq_mhz / next.power_w.max(1.0)
|
|
};
|
|
|
|
let p_delta = (next.power_w - curr.power_w).max(0.5);
|
|
let efficiency_drop = (efficiency_curr - efficiency_next) / p_delta;
|
|
|
|
// 2. Thermal Acceleration (d2T/dW2)
|
|
let p_delta_prev = (curr.power_w - prev.power_w).max(0.5);
|
|
let p_delta_next = (next.power_w - curr.power_w).max(0.5);
|
|
|
|
let dt_dw_prev = (curr.temp_c - prev.temp_c) / p_delta_prev;
|
|
let dt_dw_next = (next.temp_c - curr.temp_c) / p_delta_next;
|
|
|
|
let p_total_delta = (next.power_w - prev.power_w).max(1.0);
|
|
let temp_accel = (dt_dw_next - dt_dw_prev) / p_total_delta;
|
|
|
|
// 3. Wall Detection (Any drop in absolute performance is a hard wall)
|
|
let is_throttling = next.freq_mhz < curr.freq_mhz || (next.throughput > 0.0 && next.throughput < curr.throughput);
|
|
let penalty = if is_throttling { 5000.0 } else { 0.0 };
|
|
|
|
let score = (efficiency_curr * 10.0) - (efficiency_drop * 50.0) - (temp_accel * 20.0) - penalty;
|
|
|
|
if score > max_score {
|
|
max_score = score;
|
|
best_pl = curr.power_w;
|
|
}
|
|
}
|
|
|
|
best_pl
|
|
}
|
|
}
|