implemented multiple fans
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -513,7 +513,7 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "ember-tune-rs"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ember-tune-rs"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
edition = "2024"
|
||||
authors = ["Nils Pukropp <nils@narl.io>"]
|
||||
readme = "README.md"
|
||||
|
||||
@@ -69,8 +69,8 @@ impl OptimizerEngine {
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
|
||||
/// Finds the "Silicon Knee" - the point where performance per watt plateaus
|
||||
/// and thermal density spikes.
|
||||
/// Finds the "Silicon Knee" - the point where performance per watt (efficiency)
|
||||
/// starts to diminish significantly and thermal density spikes.
|
||||
pub fn find_silicon_knee(&self, profile: &ThermalProfile) -> f32 {
|
||||
if profile.points.len() < 3 {
|
||||
return profile.points.last().map(|p| p.power_w).unwrap_or(15.0);
|
||||
@@ -82,27 +82,42 @@ impl OptimizerEngine {
|
||||
let mut best_pl = points[0].power_w;
|
||||
let mut max_score = f32::MIN;
|
||||
|
||||
// Use a sliding window (3 points) to calculate gradients more robustly
|
||||
for i in 1..points.len() - 1 {
|
||||
let prev = &points[i - 1];
|
||||
let curr = &points[i];
|
||||
let next = &points[i + 1];
|
||||
|
||||
// 1. Performance Gradient (dMHz/dW)
|
||||
let dmhz_dw_prev = (curr.freq_mhz - prev.freq_mhz) / (curr.power_w - prev.power_w).max(0.1);
|
||||
let dmhz_dw_next = (next.freq_mhz - curr.freq_mhz) / (next.power_w - curr.power_w).max(0.1);
|
||||
let freq_diminish = dmhz_dw_prev - dmhz_dw_next;
|
||||
// 1. Efficiency Metric (Throughput per Watt)
|
||||
// If throughput is 0 (unsupported), fallback to Frequency per Watt
|
||||
let efficiency_curr = if curr.throughput > 0.0 {
|
||||
curr.throughput as f32 / curr.power_w.max(0.1)
|
||||
} else {
|
||||
curr.freq_mhz / curr.power_w.max(0.1)
|
||||
};
|
||||
|
||||
let efficiency_next = if next.throughput > 0.0 {
|
||||
next.throughput as f32 / next.power_w.max(0.1)
|
||||
} else {
|
||||
next.freq_mhz / next.power_w.max(0.1)
|
||||
};
|
||||
|
||||
// 2. Thermal Gradient (d2T/dW2)
|
||||
// Diminishing returns: how much efficiency drops per additional watt
|
||||
let efficiency_drop = (efficiency_curr - efficiency_next) / (next.power_w - curr.power_w).max(0.1);
|
||||
|
||||
// 2. Thermal Acceleration (d2T/dW2)
|
||||
let dt_dw_prev = (curr.temp_c - prev.temp_c) / (curr.power_w - prev.power_w).max(0.1);
|
||||
let dt_dw_next = (next.temp_c - curr.temp_c) / (next.power_w - curr.power_w).max(0.1);
|
||||
let temp_accel = (dt_dw_next - dt_dw_prev) / (next.power_w - prev.power_w).max(0.1);
|
||||
|
||||
// 3. Wall Detection
|
||||
let is_throttling = next.freq_mhz < curr.freq_mhz;
|
||||
let penalty = if is_throttling { 2000.0 } else { 0.0 };
|
||||
// 3. Wall Detection (Any drop in absolute frequency/throughput is a hard wall)
|
||||
let is_throttling = next.freq_mhz < curr.freq_mhz || (next.throughput > 0.0 && next.throughput < curr.throughput);
|
||||
let penalty = if is_throttling { 5000.0 } else { 0.0 };
|
||||
|
||||
// Heuristic scoring: Weight thermal acceleration and diminishing frequency gains
|
||||
let score = (freq_diminish * 2.0) + (temp_accel * 10.0) - penalty;
|
||||
// Heuristic scoring:
|
||||
// - Higher score is "Better" (The Knee is the peak of this curve)
|
||||
// - We want high efficiency (low drop) and low thermal acceleration.
|
||||
let score = (efficiency_curr * 10.0) - (efficiency_drop * 50.0) - (temp_accel * 20.0) - penalty;
|
||||
|
||||
if score > max_score {
|
||||
max_score = score;
|
||||
|
||||
@@ -217,7 +217,7 @@ fn main() -> Result<()> {
|
||||
cpu_temp: 0.0,
|
||||
power_w: 0.0,
|
||||
current_freq: 0.0,
|
||||
fan_rpm: 0,
|
||||
fans: Vec::new(),
|
||||
governor: "detecting".to_string(),
|
||||
pl1_limit: 0.0,
|
||||
pl2_limit: 0.0,
|
||||
|
||||
@@ -33,7 +33,7 @@ pub struct TelemetryState {
|
||||
pub cpu_temp: f32,
|
||||
pub power_w: f32,
|
||||
pub current_freq: f32,
|
||||
pub fan_rpm: u32,
|
||||
pub fans: Vec<u32>,
|
||||
|
||||
// --- High-res History (Last 60s @ 500ms = 120 points) ---
|
||||
pub history_watts: Vec<f32>,
|
||||
|
||||
@@ -151,15 +151,16 @@ impl BenchmarkOrchestrator {
|
||||
// Record data point
|
||||
let avg_p = self.sensors.get_power_w().unwrap_or(0.0);
|
||||
let avg_t = self.sensors.get_temp().unwrap_or(0.0);
|
||||
let avg_f = 2500.0; // Mock frequency until SensorBus expanded
|
||||
let fan = self.sensors.get_fan_rpm().unwrap_or(0);
|
||||
let avg_f = self.sensors.get_freq_mhz().unwrap_or(0.0);
|
||||
let fans = self.sensors.get_fan_rpms().unwrap_or_default();
|
||||
let primary_fan = fans.first().cloned().unwrap_or(0);
|
||||
let tp = self.workload.get_throughput().unwrap_or(0.0);
|
||||
|
||||
self.profile.points.push(ThermalPoint {
|
||||
power_w: avg_p,
|
||||
temp_c: avg_t,
|
||||
freq_mhz: avg_f,
|
||||
fan_rpm: fan,
|
||||
fan_rpm: primary_fan,
|
||||
throughput: tp,
|
||||
});
|
||||
|
||||
@@ -233,8 +234,8 @@ impl BenchmarkOrchestrator {
|
||||
tick: 0,
|
||||
cpu_temp: self.sensors.get_temp().unwrap_or(0.0),
|
||||
power_w: self.sensors.get_power_w().unwrap_or(0.0),
|
||||
current_freq: 0.0,
|
||||
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
|
||||
current_freq: self.sensors.get_freq_mhz().unwrap_or(0.0),
|
||||
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
|
||||
governor: "unknown".to_string(),
|
||||
pl1_limit: 0.0,
|
||||
pl2_limit: 0.0,
|
||||
@@ -252,7 +253,7 @@ impl BenchmarkOrchestrator {
|
||||
fn send_telemetry(&mut self, tick: u64) -> Result<()> {
|
||||
let temp = self.sensors.get_temp().unwrap_or(0.0);
|
||||
let pwr = self.sensors.get_power_w().unwrap_or(0.0);
|
||||
let freq = 0.0;
|
||||
let freq = self.sensors.get_freq_mhz().unwrap_or(0.0);
|
||||
|
||||
self.history_temp.push_back(temp);
|
||||
self.history_watts.push_back(pwr);
|
||||
@@ -271,7 +272,7 @@ impl BenchmarkOrchestrator {
|
||||
cpu_temp: temp,
|
||||
power_w: pwr,
|
||||
current_freq: freq,
|
||||
fan_rpm: self.sensors.get_fan_rpm().unwrap_or(0),
|
||||
fans: self.sensors.get_fan_rpms().unwrap_or_default(),
|
||||
governor: "performance".to_string(),
|
||||
pl1_limit: 15.0,
|
||||
pl2_limit: 25.0,
|
||||
|
||||
@@ -10,19 +10,20 @@ use tracing::debug;
|
||||
pub struct DellXps9380Sal {
|
||||
temp_path: PathBuf,
|
||||
pwr_path: PathBuf,
|
||||
fan_path: PathBuf,
|
||||
fan_paths: Vec<PathBuf>,
|
||||
freq_path: PathBuf,
|
||||
pl1_path: PathBuf,
|
||||
pl2_path: PathBuf,
|
||||
last_poll: Mutex<Instant>,
|
||||
last_temp: Mutex<f32>,
|
||||
last_fan: Mutex<u32>,
|
||||
last_fans: Mutex<Vec<u32>>,
|
||||
}
|
||||
|
||||
impl DellXps9380Sal {
|
||||
pub fn init() -> Result<Self> {
|
||||
let mut temp_path = None;
|
||||
let mut pwr_path = None;
|
||||
let mut fan_path = None;
|
||||
let mut fan_paths = Vec::new();
|
||||
let mut rapl_base_path = None;
|
||||
|
||||
// Dynamic hwmon discovery
|
||||
@@ -33,7 +34,17 @@ impl DellXps9380Sal {
|
||||
|
||||
if name == "dell_smm" {
|
||||
temp_path = Some(p.join("temp1_input"));
|
||||
fan_path = Some(p.join("fan1_input"));
|
||||
// Discover all fans
|
||||
if let Ok(fan_entries) = fs::read_dir(&p) {
|
||||
for fan_entry in fan_entries.flatten() {
|
||||
let fan_p = fan_entry.path();
|
||||
if fan_p.file_name().unwrap_or_default().to_string_lossy().starts_with("fan") &&
|
||||
fan_p.file_name().unwrap_or_default().to_string_lossy().ends_with("_input") {
|
||||
fan_paths.push(fan_p);
|
||||
}
|
||||
}
|
||||
}
|
||||
fan_paths.sort();
|
||||
}
|
||||
|
||||
if name == "intel_rapl" || name == "rapl" {
|
||||
@@ -59,16 +70,18 @@ impl DellXps9380Sal {
|
||||
}
|
||||
|
||||
let rapl_base = rapl_base_path.context("Could not find RAPL package-0 path in powercap")?;
|
||||
let freq_path = PathBuf::from("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq");
|
||||
|
||||
Ok(Self {
|
||||
temp_path: temp_path.context("Could not find dell_smm temperature path")?,
|
||||
pwr_path: pwr_path.context("Could not find RAPL power path")?,
|
||||
fan_path: fan_path.context("Could not find dell_smm fan path")?,
|
||||
fan_paths,
|
||||
freq_path,
|
||||
pl1_path: rapl_base.join("constraint_0_power_limit_uw"),
|
||||
pl2_path: rapl_base.join("constraint_1_power_limit_uw"),
|
||||
last_poll: Mutex::new(Instant::now() - Duration::from_secs(2)),
|
||||
last_temp: Mutex::new(0.0),
|
||||
last_fan: Mutex::new(0),
|
||||
last_fans: Mutex::new(Vec::new()),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -179,20 +192,32 @@ impl SensorBus for DellXps9380Sal {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
let mut last_poll = self.last_poll.lock().unwrap();
|
||||
let now = Instant::now();
|
||||
|
||||
if now.duration_since(*last_poll) < Duration::from_millis(1000) {
|
||||
return Ok(*self.last_fan.lock().unwrap());
|
||||
return Ok(self.last_fans.lock().unwrap().clone());
|
||||
}
|
||||
|
||||
let s = fs::read_to_string(&self.fan_path)?;
|
||||
let val = s.trim().parse::<u32>()?;
|
||||
let mut fans = Vec::new();
|
||||
for path in &self.fan_paths {
|
||||
if let Ok(s) = fs::read_to_string(path) {
|
||||
if let Ok(rpm) = s.trim().parse::<u32>() {
|
||||
fans.push(rpm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*self.last_fan.lock().unwrap() = val;
|
||||
*self.last_fans.lock().unwrap() = fans.clone();
|
||||
*last_poll = now;
|
||||
|
||||
Ok(fans)
|
||||
}
|
||||
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
let s = fs::read_to_string(&self.freq_path)?;
|
||||
let val = s.trim().parse::<f32>()? / 1000.0;
|
||||
Ok(val)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,8 +50,11 @@ impl SensorBus for MockSensorBus {
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
Ok(15.0)
|
||||
}
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
Ok(2500)
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
Ok(vec![2500])
|
||||
}
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
Ok(3200.0)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,10 +54,11 @@ pub trait EnvironmentGuard {
|
||||
}
|
||||
|
||||
/// Read-only interface for standardized metrics.
|
||||
pub trait SensorBus {
|
||||
pub trait SensorBus: Send + Sync {
|
||||
fn get_temp(&self) -> Result<f32>;
|
||||
fn get_power_w(&self) -> Result<f32>;
|
||||
fn get_fan_rpm(&self) -> Result<u32>;
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>>;
|
||||
fn get_freq_mhz(&self) -> Result<f32>;
|
||||
}
|
||||
|
||||
impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
|
||||
@@ -67,8 +68,11 @@ impl<T: SensorBus + ?Sized> SensorBus for Arc<T> {
|
||||
fn get_power_w(&self) -> Result<f32> {
|
||||
(**self).get_power_w()
|
||||
}
|
||||
fn get_fan_rpm(&self) -> Result<u32> {
|
||||
(**self).get_fan_rpm()
|
||||
fn get_fan_rpms(&self) -> Result<Vec<u32>> {
|
||||
(**self).get_fan_rpms()
|
||||
}
|
||||
fn get_freq_mhz(&self) -> Result<f32> {
|
||||
(**self).get_freq_mhz()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub struct DashboardState {
|
||||
impl DashboardState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
logs: vec!["FerroTherm Initialized.".to_string()],
|
||||
logs: vec!["ember-tune Initialized.".to_string()],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ fn draw_header(f: &mut Frame, area: Rect, state: &TelemetryState) {
|
||||
let hostname = std::env::var("HOSTNAME").unwrap_or_else(|_| "localhost".into());
|
||||
|
||||
let left = Span::styled(format!(" {} ", hostname), Style::default().fg(C_MAUVE).add_modifier(Modifier::BOLD));
|
||||
let center = Span::styled(" FERROTHERM THERMAL BENCH ", Style::default().fg(C_LAVENDER).add_modifier(Modifier::BOLD));
|
||||
let center = Span::styled(" EMBER-TUNE THERMAL BENCH ", Style::default().fg(C_LAVENDER).add_modifier(Modifier::BOLD));
|
||||
let right = Span::styled(format!(" UPTIME: {} ", uptime), Style::default().fg(C_SUBTEXT));
|
||||
|
||||
let total_width = area.width;
|
||||
@@ -182,11 +182,20 @@ fn draw_cooling(f: &mut Frame, area: Rect, state: &TelemetryState) {
|
||||
let inner = block.inner(area);
|
||||
f.render_widget(block, area);
|
||||
|
||||
let fan_info = if state.fans.is_empty() {
|
||||
"N/A".to_string()
|
||||
} else {
|
||||
state.fans.iter()
|
||||
.map(|rpm| format!("{} RPM", rpm))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" | ")
|
||||
};
|
||||
|
||||
let info = Line::from(vec![
|
||||
Span::styled(" Tier: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(&state.fan_tier, Style::default().fg(C_TEAL)),
|
||||
Span::styled(" | RPM: ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(format!("{}", state.fan_rpm), Style::default().fg(C_TEXT)),
|
||||
Span::styled(" | ", Style::default().fg(C_LAVENDER)),
|
||||
Span::styled(fan_info, Style::default().fg(C_TEXT)),
|
||||
]);
|
||||
f.render_widget(Paragraph::new(info), inner);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user