Implement performance monitor
build / build-linux-amd64 (push) Successful in 1m54s

This commit is contained in:
2026-05-22 21:41:54 +02:00
parent 62a8870ea2
commit 3ab67e80e1
7 changed files with 1149 additions and 4 deletions
+427 -3
View File
@@ -6,7 +6,7 @@ use crate::api::admin::i18n::{t, tf1, tf2, tf3, Lang};
use crate::api::error::ApiError;
use crate::api::middleware::AuthedUser;
use crate::api::state::AppState;
use crate::database::{DashboardDeviceRow, LoginEventRow};
use crate::database::{DashboardDeviceRow, LoginEventRow, MetricsSampleRow, PerfEventRow};
use axum::extract::{Extension, Form, Path, Query};
use axum::response::Html;
use serde::Deserialize;
@@ -470,7 +470,36 @@ pub async fn detail(
.login_events_for_peer(&d.id, 50)
.await
.unwrap_or_default();
render_detail(lang, &d, &events)
// Performance: pull the most recent metrics sample for the
// "right now" card, plus 24 h of samples for the sparkline,
// plus the most recent perf events (boot/shutdown/memory-
// exhaustion etc.) for the "recent slow events" table.
// All three are best-effort — none of them is required for
// the detail page to render meaningfully.
let metrics_latest = state
.db
.metrics_latest(&d.id)
.await
.unwrap_or_default();
let since_24h = chrono::Utc::now().timestamp() - 24 * 3600;
let metrics_24h = state
.db
.metrics_samples_since(&d.id, since_24h)
.await
.unwrap_or_default();
let perf_events = state
.db
.perf_events_for_peer(&d.id, 20)
.await
.unwrap_or_default();
render_detail(
lang,
&d,
&events,
metrics_latest.as_ref(),
&metrics_24h,
&perf_events,
)
}
None => format!(
r##"<div class="space-y-4">
@@ -1142,7 +1171,14 @@ fn fmt_inv_value(v: Option<&serde_json::Value>) -> String {
}
}
fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventRow]) -> String {
fn render_detail(
lang: Lang,
d: &DashboardDeviceRow,
login_events: &[LoginEventRow],
metrics_latest: Option<&MetricsSampleRow>,
metrics_24h: &[MetricsSampleRow],
perf_events: &[PerfEventRow],
) -> String {
let parsed: serde_json::Value =
serde_json::from_str(&d.sysinfo_payload).unwrap_or(serde_json::Value::Null);
let pick = |k: &str| -> String {
@@ -1237,6 +1273,7 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
};
let login_section = render_login_events(lang, login_events);
let perf_section = render_performance(lang, metrics_latest, metrics_24h, perf_events);
format!(
r##"<div class="space-y-4">
@@ -1245,6 +1282,8 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
<div class="text-xs text-slate-500">{detail_view}</div>
</div>
{header}
<h3 class="text-sm font-semibold text-slate-300 mt-4">{performance}</h3>
{perf}
<h3 class="text-sm font-semibold text-slate-300 mt-4">{inventory}</h3>
{inv}
<h3 class="text-sm font-semibold text-slate-300 mt-4">{login_history}</h3>
@@ -1252,6 +1291,8 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
</div>"##,
back = back_button(lang),
detail_view = t(lang, "devices.detail_view"),
performance = t(lang, "devices.performance"),
perf = perf_section,
inventory = t(lang, "devices.inventory"),
header = header,
inv = inventory_section,
@@ -1260,6 +1301,389 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
)
}
/// Top-level Performance section: snapshot card, two sparklines (CPU /
/// memory), and a recent-events table. The whole thing is omitted in
/// favour of a "no data yet" panel when the agent hasn't reported.
fn render_performance(
lang: Lang,
latest: Option<&MetricsSampleRow>,
series: &[MetricsSampleRow],
events: &[PerfEventRow],
) -> String {
if latest.is_none() && series.is_empty() && events.is_empty() {
return format!(
r##"<div class="rounded-md border border-slate-700 bg-slate-900 p-3 text-sm text-slate-400">
{msg}
</div>"##,
msg = t(lang, "devices.perf_none"),
);
}
let snapshot = render_perf_snapshot(lang, latest);
let cpu_chart = render_sparkline(
lang,
series.iter().map(|s| (s.at, s.cpu_pct)).collect(),
100.0,
true,
t(lang, "devices.perf_cpu"),
);
let mem_chart = {
// Mem is reported as MB used / MB total; chart uses % so the
// y-axis stays comparable to the CPU panel.
let series_pct: Vec<(i64, f64)> = series
.iter()
.filter(|s| s.mem_total_mb > 0)
.map(|s| {
let pct = 100.0 * (s.mem_used_mb as f64) / (s.mem_total_mb as f64);
(s.at, pct)
})
.collect();
render_sparkline(lang, series_pct, 100.0, true, t(lang, "devices.perf_mem"))
};
let events_section = render_perf_events_table(lang, events);
format!(
r##"<div class="space-y-4">
{snapshot}
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
{cpu}
{mem}
</div>
{events}
</div>"##,
snapshot = snapshot,
cpu = cpu_chart,
mem = mem_chart,
events = events_section,
)
}
/// "Right now" card — the most recent metrics sample. Drawn as a 4-up
/// stat tile so the supporter can glance at CPU / memory / top
/// processes without reading a chart. Falls back to a thin "no live
/// data" pill when the agent has never reported.
fn render_perf_snapshot(lang: Lang, latest: Option<&MetricsSampleRow>) -> String {
let Some(s) = latest else {
return format!(
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3 text-xs text-slate-500">
{msg}
</div>"##,
msg = t(lang, "devices.perf_no_live"),
);
};
let now = chrono::Utc::now().timestamp();
let age = (now - s.at).max(0);
let age_str = fmt_age(age);
let cpu_color = pct_color(s.cpu_pct);
let mem_pct = if s.mem_total_mb > 0 {
100.0 * (s.mem_used_mb as f64) / (s.mem_total_mb as f64)
} else {
0.0
};
let mem_color = pct_color(mem_pct);
let mem_used_gb = (s.mem_used_mb as f64) / 1024.0;
let mem_total_gb = (s.mem_total_mb as f64) / 1024.0;
let top_cpu = if s.top_cpu_name.is_empty() {
"".to_string()
} else {
format!(
"{name} <span class=\"text-xs text-slate-400\">{pct:.0}%</span>",
name = html_escape(&s.top_cpu_name),
pct = s.top_cpu_pct,
)
};
let top_mem = if s.top_mem_name.is_empty() {
"".to_string()
} else {
let mb = s.top_mem_mb;
let mem_disp = if mb >= 1024 {
format!("{:.1} GB", (mb as f64) / 1024.0)
} else {
format!("{} MB", mb)
};
format!(
"{name} <span class=\"text-xs text-slate-400\">{disp}</span>",
name = html_escape(&s.top_mem_name),
disp = html_escape(&mem_disp),
)
};
let uptime_str = if s.uptime_secs > 0 {
fmt_age(s.uptime_secs)
} else {
"".to_string()
};
format!(
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-4">
<div class="flex items-baseline justify-between mb-3">
<h4 class="text-sm font-semibold text-slate-200">{l_now}</h4>
<span class="text-xs text-slate-500" title="{at_full} UTC">{l_age}</span>
</div>
<dl class="grid grid-cols-2 gap-x-6 gap-y-3 text-sm md:grid-cols-4">
<div>
<dt class="text-xs text-slate-500">{l_cpu}</dt>
<dd class="text-lg font-semibold {cpu_cls} tabular-nums">{cpu:.0}%</dd>
</div>
<div>
<dt class="text-xs text-slate-500">{l_mem}</dt>
<dd class="text-lg font-semibold {mem_cls} tabular-nums">{mem_pct:.0}%</dd>
<dd class="text-xs text-slate-500 tabular-nums">{used:.1} / {total:.1} GB</dd>
</div>
<div>
<dt class="text-xs text-slate-500">{l_top_cpu}</dt>
<dd class="text-slate-200 font-mono text-xs truncate" title="{top_cpu_raw}">{top_cpu}</dd>
</div>
<div>
<dt class="text-xs text-slate-500">{l_top_mem}</dt>
<dd class="text-slate-200 font-mono text-xs truncate" title="{top_mem_raw}">{top_mem}</dd>
</div>
<div>
<dt class="text-xs text-slate-500">{l_uptime}</dt>
<dd class="text-slate-300 tabular-nums">{uptime}</dd>
</div>
<div>
<dt class="text-xs text-slate-500">{l_procs}</dt>
<dd class="text-slate-300 tabular-nums">{procs}</dd>
</div>
</dl>
</div>"##,
l_now = t(lang, "devices.perf_now"),
l_age = tf1(lang, "devices.perf_sampled_ago", &age_str),
at_full = html_escape(&fmt_unix_utc(s.at)),
l_cpu = t(lang, "devices.perf_cpu"),
cpu_cls = cpu_color,
cpu = s.cpu_pct,
l_mem = t(lang, "devices.perf_mem"),
mem_cls = mem_color,
mem_pct = mem_pct,
used = mem_used_gb,
total = mem_total_gb,
l_top_cpu = t(lang, "devices.perf_top_cpu"),
top_cpu_raw = html_escape(&s.top_cpu_name),
top_cpu = top_cpu,
l_top_mem = t(lang, "devices.perf_top_mem"),
top_mem_raw = html_escape(&s.top_mem_name),
top_mem = top_mem,
l_uptime = t(lang, "devices.perf_uptime"),
uptime = html_escape(&uptime_str),
l_procs = t(lang, "devices.perf_proc_count"),
procs = s.proc_count,
)
}
/// Color-code a percentage value (0100) — green up to 60, amber up to
/// 85, red above. Used for the snapshot stat tiles so the supporter
/// can spot a wedged-laptop at a glance.
fn pct_color(pct: f64) -> &'static str {
if pct >= 85.0 {
"text-rose-400"
} else if pct >= 60.0 {
"text-amber-300"
} else {
"text-emerald-300"
}
}
/// Render an inline-SVG sparkline. `series` is a (unix-seconds, value)
/// vector; `max_y` clamps the y-axis (so two side-by-side charts share
/// a scale); `bucketed = true` downsamples by averaging into 96 buckets
/// so the polyline string stays short for a wide time window.
fn render_sparkline(
lang: Lang,
series: Vec<(i64, f64)>,
max_y: f64,
bucketed: bool,
title: &str,
) -> String {
const WIDTH: f64 = 600.0;
const HEIGHT: f64 = 80.0;
const PAD: f64 = 4.0;
if series.is_empty() {
return format!(
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3">
<h4 class="text-xs uppercase text-slate-500 mb-1">{title}</h4>
<div class="text-xs text-slate-500">{msg}</div>
</div>"##,
title = html_escape(title),
msg = t(lang, "devices.perf_no_chart"),
);
}
let points = if bucketed && series.len() > 96 {
downsample_avg(&series, 96)
} else {
series.clone()
};
let min_x = points.first().map(|p| p.0).unwrap_or(0);
let max_x = points.last().map(|p| p.0).unwrap_or(0);
let span_x = (max_x - min_x).max(1) as f64;
let plot_w = WIDTH - 2.0 * PAD;
let plot_h = HEIGHT - 2.0 * PAD;
let mut path = String::new();
let mut area = String::new();
let mut peak: f64 = 0.0;
let mut last: f64 = 0.0;
for (i, (t, v)) in points.iter().enumerate() {
let x = PAD + plot_w * ((t - min_x) as f64) / span_x;
let y_norm = (v / max_y).clamp(0.0, 1.0);
let y = PAD + plot_h * (1.0 - y_norm);
if i == 0 {
path.push_str(&format!("M{:.1},{:.1}", x, y));
area.push_str(&format!("M{:.1},{:.1}", x, PAD + plot_h));
area.push_str(&format!(" L{:.1},{:.1}", x, y));
} else {
path.push_str(&format!(" L{:.1},{:.1}", x, y));
area.push_str(&format!(" L{:.1},{:.1}", x, y));
}
peak = peak.max(*v);
last = *v;
}
let last_x = PAD + plot_w;
area.push_str(&format!(" L{:.1},{:.1} Z", last_x, PAD + plot_h));
// Hours-from-now labels: oldest point's age, "now" on the right.
let span_secs = (max_x - min_x).max(0);
let span_label = if span_secs >= 3600 {
format!("-{}h", span_secs / 3600)
} else if span_secs >= 60 {
format!("-{}m", span_secs / 60)
} else {
format!("-{}s", span_secs)
};
format!(
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3">
<div class="flex items-baseline justify-between mb-1">
<h4 class="text-xs uppercase text-slate-500">{title}</h4>
<span class="text-[11px] text-slate-500 tabular-nums">{l_peak} <span class="text-slate-300">{peak:.0}%</span> &nbsp; {l_now} <span class="text-slate-300">{last:.0}%</span></span>
</div>
<svg viewBox="0 0 {w} {h}" preserveAspectRatio="none" class="w-full h-20">
<line x1="{pad}" y1="{ymid:.1}" x2="{xend:.1}" y2="{ymid:.1}" stroke="#1f2937" stroke-width="1" stroke-dasharray="2,3"/>
<path d="{area}" fill="#0ea5e9" fill-opacity="0.10" stroke="none"/>
<path d="{path}" fill="none" stroke="#38bdf8" stroke-width="1.5" stroke-linejoin="round"/>
</svg>
<div class="flex justify-between text-[10px] text-slate-500 mt-1 tabular-nums">
<span>{older}</span>
<span>{l_now_short}</span>
</div>
</div>"##,
title = html_escape(title),
l_peak = t(lang, "devices.perf_peak"),
peak = peak,
l_now = t(lang, "devices.perf_latest"),
last = last,
w = WIDTH,
h = HEIGHT,
pad = PAD,
ymid = PAD + plot_h * 0.5,
xend = WIDTH - PAD,
area = area,
path = path,
older = html_escape(&span_label),
l_now_short = t(lang, "devices.perf_now_short"),
)
}
/// Mean-pool a (timestamp, value) series down to `target` buckets,
/// keeping the bucket-mean timestamp as the bucket's x. Empty buckets
/// are dropped so the resulting polyline doesn't draw zero-lines for
/// stretches where the agent was offline.
fn downsample_avg(series: &[(i64, f64)], target: usize) -> Vec<(i64, f64)> {
if series.len() <= target {
return series.to_vec();
}
let min_x = series.first().map(|p| p.0).unwrap_or(0);
let max_x = series.last().map(|p| p.0).unwrap_or(0);
let span = (max_x - min_x).max(1);
let bucket_secs = (span as usize) / target.max(1);
let bucket_secs = bucket_secs.max(1) as i64;
let mut buckets: Vec<(i64, f64, usize)> = Vec::with_capacity(target);
let mut current_bucket: i64 = -1;
for (t, v) in series {
let b = (t - min_x) / bucket_secs;
if b != current_bucket {
buckets.push((*t, *v, 1));
current_bucket = b;
} else if let Some(last) = buckets.last_mut() {
last.1 += *v;
last.2 += 1;
}
}
buckets
.into_iter()
.map(|(t, sum, n)| (t, sum / (n as f64)))
.collect()
}
/// Recent perf-events table — boot/shutdown/sleep degradation, memory
/// exhaustion, BSODs, unexpected reboots. Empty list → a neutral
/// "nothing flagged yet" panel so the heading still has a body.
fn render_perf_events_table(lang: Lang, events: &[PerfEventRow]) -> String {
if events.is_empty() {
return format!(
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3 text-xs text-slate-500">
{msg}
</div>"##,
msg = t(lang, "devices.perf_events_none"),
);
}
let mut s = format!(
r##"<div>
<h4 class="text-xs uppercase text-slate-500 mb-1">{l_events}</h4>
<div class="rounded-md border border-slate-800 bg-slate-900 overflow-hidden">
<table class="w-full text-sm">
<thead class="text-xs uppercase text-slate-500 bg-slate-950">
<tr>
<th class="text-left font-medium px-3 py-2">{c_when}</th>
<th class="text-left font-medium px-3 py-2">{c_source}</th>
<th class="text-left font-medium px-3 py-2">{c_summary}</th>
</tr>
</thead>
<tbody class="divide-y divide-slate-800">"##,
l_events = t(lang, "devices.perf_events_heading"),
c_when = t(lang, "devices.perf_events_col_when"),
c_source = t(lang, "devices.perf_events_col_source"),
c_summary = t(lang, "devices.perf_events_col_summary"),
);
for ev in events {
let when = fmt_unix_utc(ev.at);
let (level_cls, _level_label) = match ev.level {
1 => ("bg-rose-900/40 text-rose-300 border-rose-800", "critical"),
2 => ("bg-rose-900/30 text-rose-300 border-rose-900", "error"),
3 => ("bg-amber-900/40 text-amber-300 border-amber-800", "warning"),
_ => ("bg-slate-800 text-slate-300 border-slate-700", "info"),
};
let source_label = match ev.provider.as_str() {
"diag-perf" => t(lang, "devices.perf_src_diag_perf"),
"res-exh" => t(lang, "devices.perf_src_res_exh"),
"system" => t(lang, "devices.perf_src_system"),
other => other,
};
let _ = write!(
s,
r##"<tr class="hover:bg-slate-800/40 align-top">
<td class="px-3 py-2 font-mono text-xs text-slate-300 whitespace-nowrap">{when}</td>
<td class="px-3 py-2">
<span class="inline-block text-[11px] px-1.5 py-0.5 rounded border {lvl_cls}">{src} · {eid}</span>
</td>
<td class="px-3 py-2 text-slate-200 text-xs">{summary}</td>
</tr>"##,
when = html_escape(&when),
lvl_cls = level_cls,
src = html_escape(source_label),
eid = ev.event_id,
summary = html_escape(&ev.summary),
);
}
s.push_str("</tbody></table></div></div>");
s
}
/// Render the per-device login history table. Empty input → a neutral
/// "no events yet" panel so the heading still has something under it.
/// We render the agent-reported `at` in the standard SQLite UTC format