This commit is contained in:
+10
-1
@@ -1,7 +1,7 @@
|
||||
# Agent API authentication
|
||||
|
||||
Reference for the per-device signature gate on the agent-facing HTTP
|
||||
API. Five endpoints are gated:
|
||||
API. Seven endpoints are gated:
|
||||
|
||||
- `POST /api/heartbeat`
|
||||
- `POST /api/sysinfo`
|
||||
@@ -11,6 +11,15 @@ API. Five endpoints are gated:
|
||||
by the agent. Same TOFU lifecycle as heartbeat / sysinfo: stock
|
||||
RustDesk doesn't post here at all, so in practice every caller is a
|
||||
managed agent; the legacy/unsigned path is kept for symmetry.
|
||||
- `POST /api/agent/metrics` — continuous CPU / memory / top-process
|
||||
samples (≈1 / minute). Surfaced on the admin Devices detail page as
|
||||
a 24 h sparkline + live snapshot card.
|
||||
- `POST /api/agent/perf-events` — sparse Windows-event-log entries
|
||||
flagged by `Microsoft-Windows-Diagnostics-Performance/Operational`,
|
||||
`Microsoft-Windows-Resource-Exhaustion-Detector/Operational`, and
|
||||
hand-picked `System` IDs (41 / 6008 / 1001 — unexpected reboot /
|
||||
dirty shutdown / BSOD). Server dedups via UNIQUE (peer_id, provider,
|
||||
record_id).
|
||||
|
||||
For the operator workflow — turning it on, the dashboard toggle, what
|
||||
happens when a managed agent is uninstalled — see the matching section
|
||||
|
||||
@@ -1224,6 +1224,137 @@ pub fn t(lang: Lang, key: &str) -> &'static str {
|
||||
"Istoric autentificări",
|
||||
"Historial de inicio de sesión",
|
||||
),
|
||||
"devices.performance" => (
|
||||
"Performance",
|
||||
"Leistung",
|
||||
"Performances",
|
||||
"Performanță",
|
||||
"Rendimiento",
|
||||
),
|
||||
"devices.perf_none" => (
|
||||
"No performance data reported yet. The agent collects CPU / memory samples once per minute and Windows-reported performance events as they happen.",
|
||||
"Noch keine Leistungsdaten gemeldet. Der Agent sammelt CPU-/Speicher-Stichproben einmal pro Minute und die von Windows gemeldeten Leistungsereignisse, sobald sie auftreten.",
|
||||
"Aucune donnée de performance signalée pour l'instant. L'agent collecte des échantillons CPU / mémoire une fois par minute et les événements de performance signalés par Windows au fur et à mesure.",
|
||||
"Niciun fel de date de performanță raportate încă. Agentul colectează eșantioane CPU / memorie o dată pe minut și evenimentele de performanță raportate de Windows pe măsură ce se întâmplă.",
|
||||
"Aún no se han reportado datos de rendimiento. El agente recopila muestras de CPU / memoria una vez por minuto y los eventos de rendimiento reportados por Windows a medida que ocurren.",
|
||||
),
|
||||
"devices.perf_no_live" => (
|
||||
"No live snapshot yet — waiting for the agent's first sample.",
|
||||
"Noch keine Live-Stichprobe – warte auf die erste Probe des Agenten.",
|
||||
"Pas encore d'instantané en direct — en attente du premier échantillon de l'agent.",
|
||||
"Niciun instantaneu live încă — se așteaptă prima probă a agentului.",
|
||||
"Aún no hay instantánea en vivo — esperando la primera muestra del agente.",
|
||||
),
|
||||
"devices.perf_now" => ("Live", "Live", "En direct", "Live", "En vivo"),
|
||||
"devices.perf_sampled_ago" => (
|
||||
"Sampled {0} ago",
|
||||
"Vor {0} aufgenommen",
|
||||
"Échantillonné il y a {0}",
|
||||
"Eșantionat acum {0}",
|
||||
"Muestreado hace {0}",
|
||||
),
|
||||
"devices.perf_cpu" => ("CPU", "CPU", "Processeur", "CPU", "CPU"),
|
||||
"devices.perf_mem" => (
|
||||
"Memory",
|
||||
"Speicher",
|
||||
"Mémoire",
|
||||
"Memorie",
|
||||
"Memoria",
|
||||
),
|
||||
"devices.perf_top_cpu" => (
|
||||
"Top CPU",
|
||||
"Top-CPU",
|
||||
"Plus gros CPU",
|
||||
"Top CPU",
|
||||
"Mayor CPU",
|
||||
),
|
||||
"devices.perf_top_mem" => (
|
||||
"Top memory",
|
||||
"Top-Speicher",
|
||||
"Plus grosse mémoire",
|
||||
"Top memorie",
|
||||
"Mayor memoria",
|
||||
),
|
||||
"devices.perf_uptime" => (
|
||||
"Uptime",
|
||||
"Laufzeit",
|
||||
"Disponibilité",
|
||||
"Timp activ",
|
||||
"Tiempo activo",
|
||||
),
|
||||
"devices.perf_proc_count" => (
|
||||
"Processes",
|
||||
"Prozesse",
|
||||
"Processus",
|
||||
"Procese",
|
||||
"Procesos",
|
||||
),
|
||||
"devices.perf_no_chart" => (
|
||||
"No samples in the last 24 h",
|
||||
"Keine Daten in den letzten 24 Std",
|
||||
"Aucun échantillon ces 24 dernières h",
|
||||
"Niciun eșantion în ultimele 24 h",
|
||||
"Sin muestras en las últimas 24 h",
|
||||
),
|
||||
"devices.perf_peak" => ("peak", "Spitze", "max.", "vârf", "máx."),
|
||||
"devices.perf_latest" => ("last", "letzter", "dernier", "ultim", "último"),
|
||||
"devices.perf_now_short" => ("now", "jetzt", "maint.", "acum", "ahora"),
|
||||
"devices.perf_events_heading" => (
|
||||
"Recent performance events",
|
||||
"Aktuelle Leistungsereignisse",
|
||||
"Événements de performance récents",
|
||||
"Evenimente recente de performanță",
|
||||
"Eventos de rendimiento recientes",
|
||||
),
|
||||
"devices.perf_events_none" => (
|
||||
"No performance events reported. Windows flags boot / shutdown / sleep slow paths, memory exhaustion, unexpected reboots and BSODs here.",
|
||||
"Keine Leistungsereignisse gemeldet. Windows markiert hier verlangsamte Start-/Herunterfahren-/Standby-Vorgänge, Speichermangel, unerwartete Neustarts und Bluescreens.",
|
||||
"Aucun événement de performance signalé. Windows signale ici les démarrages / arrêts / veilles lents, l'épuisement de la mémoire, les redémarrages inattendus et les BSOD.",
|
||||
"Niciun eveniment de performanță raportat. Windows marchează aici pornirile / opririle / repausurile lente, epuizarea memoriei, repornirile neașteptate și BSOD-urile.",
|
||||
"No se han reportado eventos de rendimiento. Windows registra aquí arranques / apagados / suspensiones lentos, agotamiento de memoria, reinicios inesperados y BSOD.",
|
||||
),
|
||||
"devices.perf_events_col_when" => (
|
||||
"When (UTC)",
|
||||
"Wann (UTC)",
|
||||
"Quand (UTC)",
|
||||
"Când (UTC)",
|
||||
"Cuándo (UTC)",
|
||||
),
|
||||
"devices.perf_events_col_source" => (
|
||||
"Source",
|
||||
"Quelle",
|
||||
"Source",
|
||||
"Sursă",
|
||||
"Origen",
|
||||
),
|
||||
"devices.perf_events_col_summary" => (
|
||||
"Summary",
|
||||
"Zusammenfassung",
|
||||
"Résumé",
|
||||
"Rezumat",
|
||||
"Resumen",
|
||||
),
|
||||
"devices.perf_src_diag_perf" => (
|
||||
"Diag-Perf",
|
||||
"Diag-Perf",
|
||||
"Diag-Perf",
|
||||
"Diag-Perf",
|
||||
"Diag-Perf",
|
||||
),
|
||||
"devices.perf_src_res_exh" => (
|
||||
"Res-Exh",
|
||||
"Res-Exh",
|
||||
"Res-Exh",
|
||||
"Res-Exh",
|
||||
"Res-Exh",
|
||||
),
|
||||
"devices.perf_src_system" => (
|
||||
"System",
|
||||
"System",
|
||||
"Système",
|
||||
"Sistem",
|
||||
"Sistema",
|
||||
),
|
||||
"devices.login_none" => (
|
||||
"No login events recorded yet. The agent reports logons and logoffs as it observes them.",
|
||||
"Noch keine Anmeldeereignisse aufgezeichnet. Der Agent meldet An- und Abmeldungen, sobald er sie beobachtet.",
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::api::admin::i18n::{t, tf1, tf2, tf3, Lang};
|
||||
use crate::api::error::ApiError;
|
||||
use crate::api::middleware::AuthedUser;
|
||||
use crate::api::state::AppState;
|
||||
use crate::database::{DashboardDeviceRow, LoginEventRow};
|
||||
use crate::database::{DashboardDeviceRow, LoginEventRow, MetricsSampleRow, PerfEventRow};
|
||||
use axum::extract::{Extension, Form, Path, Query};
|
||||
use axum::response::Html;
|
||||
use serde::Deserialize;
|
||||
@@ -470,7 +470,36 @@ pub async fn detail(
|
||||
.login_events_for_peer(&d.id, 50)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
render_detail(lang, &d, &events)
|
||||
// Performance: pull the most recent metrics sample for the
|
||||
// "right now" card, plus 24 h of samples for the sparkline,
|
||||
// plus the most recent perf events (boot/shutdown/memory-
|
||||
// exhaustion etc.) for the "recent slow events" table.
|
||||
// All three are best-effort — none of them is required for
|
||||
// the detail page to render meaningfully.
|
||||
let metrics_latest = state
|
||||
.db
|
||||
.metrics_latest(&d.id)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let since_24h = chrono::Utc::now().timestamp() - 24 * 3600;
|
||||
let metrics_24h = state
|
||||
.db
|
||||
.metrics_samples_since(&d.id, since_24h)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let perf_events = state
|
||||
.db
|
||||
.perf_events_for_peer(&d.id, 20)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
render_detail(
|
||||
lang,
|
||||
&d,
|
||||
&events,
|
||||
metrics_latest.as_ref(),
|
||||
&metrics_24h,
|
||||
&perf_events,
|
||||
)
|
||||
}
|
||||
None => format!(
|
||||
r##"<div class="space-y-4">
|
||||
@@ -1142,7 +1171,14 @@ fn fmt_inv_value(v: Option<&serde_json::Value>) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventRow]) -> String {
|
||||
fn render_detail(
|
||||
lang: Lang,
|
||||
d: &DashboardDeviceRow,
|
||||
login_events: &[LoginEventRow],
|
||||
metrics_latest: Option<&MetricsSampleRow>,
|
||||
metrics_24h: &[MetricsSampleRow],
|
||||
perf_events: &[PerfEventRow],
|
||||
) -> String {
|
||||
let parsed: serde_json::Value =
|
||||
serde_json::from_str(&d.sysinfo_payload).unwrap_or(serde_json::Value::Null);
|
||||
let pick = |k: &str| -> String {
|
||||
@@ -1237,6 +1273,7 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
|
||||
};
|
||||
|
||||
let login_section = render_login_events(lang, login_events);
|
||||
let perf_section = render_performance(lang, metrics_latest, metrics_24h, perf_events);
|
||||
|
||||
format!(
|
||||
r##"<div class="space-y-4">
|
||||
@@ -1245,6 +1282,8 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
|
||||
<div class="text-xs text-slate-500">{detail_view}</div>
|
||||
</div>
|
||||
{header}
|
||||
<h3 class="text-sm font-semibold text-slate-300 mt-4">{performance}</h3>
|
||||
{perf}
|
||||
<h3 class="text-sm font-semibold text-slate-300 mt-4">{inventory}</h3>
|
||||
{inv}
|
||||
<h3 class="text-sm font-semibold text-slate-300 mt-4">{login_history}</h3>
|
||||
@@ -1252,6 +1291,8 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
|
||||
</div>"##,
|
||||
back = back_button(lang),
|
||||
detail_view = t(lang, "devices.detail_view"),
|
||||
performance = t(lang, "devices.performance"),
|
||||
perf = perf_section,
|
||||
inventory = t(lang, "devices.inventory"),
|
||||
header = header,
|
||||
inv = inventory_section,
|
||||
@@ -1260,6 +1301,389 @@ fn render_detail(lang: Lang, d: &DashboardDeviceRow, login_events: &[LoginEventR
|
||||
)
|
||||
}
|
||||
|
||||
/// Top-level Performance section: snapshot card, two sparklines (CPU /
|
||||
/// memory), and a recent-events table. The whole thing is omitted in
|
||||
/// favour of a "no data yet" panel when the agent hasn't reported.
|
||||
fn render_performance(
|
||||
lang: Lang,
|
||||
latest: Option<&MetricsSampleRow>,
|
||||
series: &[MetricsSampleRow],
|
||||
events: &[PerfEventRow],
|
||||
) -> String {
|
||||
if latest.is_none() && series.is_empty() && events.is_empty() {
|
||||
return format!(
|
||||
r##"<div class="rounded-md border border-slate-700 bg-slate-900 p-3 text-sm text-slate-400">
|
||||
{msg}
|
||||
</div>"##,
|
||||
msg = t(lang, "devices.perf_none"),
|
||||
);
|
||||
}
|
||||
|
||||
let snapshot = render_perf_snapshot(lang, latest);
|
||||
let cpu_chart = render_sparkline(
|
||||
lang,
|
||||
series.iter().map(|s| (s.at, s.cpu_pct)).collect(),
|
||||
100.0,
|
||||
true,
|
||||
t(lang, "devices.perf_cpu"),
|
||||
);
|
||||
let mem_chart = {
|
||||
// Mem is reported as MB used / MB total; chart uses % so the
|
||||
// y-axis stays comparable to the CPU panel.
|
||||
let series_pct: Vec<(i64, f64)> = series
|
||||
.iter()
|
||||
.filter(|s| s.mem_total_mb > 0)
|
||||
.map(|s| {
|
||||
let pct = 100.0 * (s.mem_used_mb as f64) / (s.mem_total_mb as f64);
|
||||
(s.at, pct)
|
||||
})
|
||||
.collect();
|
||||
render_sparkline(lang, series_pct, 100.0, true, t(lang, "devices.perf_mem"))
|
||||
};
|
||||
let events_section = render_perf_events_table(lang, events);
|
||||
|
||||
format!(
|
||||
r##"<div class="space-y-4">
|
||||
{snapshot}
|
||||
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
|
||||
{cpu}
|
||||
{mem}
|
||||
</div>
|
||||
{events}
|
||||
</div>"##,
|
||||
snapshot = snapshot,
|
||||
cpu = cpu_chart,
|
||||
mem = mem_chart,
|
||||
events = events_section,
|
||||
)
|
||||
}
|
||||
|
||||
/// "Right now" card — the most recent metrics sample. Drawn as a 4-up
|
||||
/// stat tile so the supporter can glance at CPU / memory / top
|
||||
/// processes without reading a chart. Falls back to a thin "no live
|
||||
/// data" pill when the agent has never reported.
|
||||
fn render_perf_snapshot(lang: Lang, latest: Option<&MetricsSampleRow>) -> String {
|
||||
let Some(s) = latest else {
|
||||
return format!(
|
||||
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3 text-xs text-slate-500">
|
||||
{msg}
|
||||
</div>"##,
|
||||
msg = t(lang, "devices.perf_no_live"),
|
||||
);
|
||||
};
|
||||
let now = chrono::Utc::now().timestamp();
|
||||
let age = (now - s.at).max(0);
|
||||
let age_str = fmt_age(age);
|
||||
let cpu_color = pct_color(s.cpu_pct);
|
||||
let mem_pct = if s.mem_total_mb > 0 {
|
||||
100.0 * (s.mem_used_mb as f64) / (s.mem_total_mb as f64)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let mem_color = pct_color(mem_pct);
|
||||
let mem_used_gb = (s.mem_used_mb as f64) / 1024.0;
|
||||
let mem_total_gb = (s.mem_total_mb as f64) / 1024.0;
|
||||
let top_cpu = if s.top_cpu_name.is_empty() {
|
||||
"—".to_string()
|
||||
} else {
|
||||
format!(
|
||||
"{name} <span class=\"text-xs text-slate-400\">{pct:.0}%</span>",
|
||||
name = html_escape(&s.top_cpu_name),
|
||||
pct = s.top_cpu_pct,
|
||||
)
|
||||
};
|
||||
let top_mem = if s.top_mem_name.is_empty() {
|
||||
"—".to_string()
|
||||
} else {
|
||||
let mb = s.top_mem_mb;
|
||||
let mem_disp = if mb >= 1024 {
|
||||
format!("{:.1} GB", (mb as f64) / 1024.0)
|
||||
} else {
|
||||
format!("{} MB", mb)
|
||||
};
|
||||
format!(
|
||||
"{name} <span class=\"text-xs text-slate-400\">{disp}</span>",
|
||||
name = html_escape(&s.top_mem_name),
|
||||
disp = html_escape(&mem_disp),
|
||||
)
|
||||
};
|
||||
let uptime_str = if s.uptime_secs > 0 {
|
||||
fmt_age(s.uptime_secs)
|
||||
} else {
|
||||
"—".to_string()
|
||||
};
|
||||
|
||||
format!(
|
||||
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-4">
|
||||
<div class="flex items-baseline justify-between mb-3">
|
||||
<h4 class="text-sm font-semibold text-slate-200">{l_now}</h4>
|
||||
<span class="text-xs text-slate-500" title="{at_full} UTC">{l_age}</span>
|
||||
</div>
|
||||
<dl class="grid grid-cols-2 gap-x-6 gap-y-3 text-sm md:grid-cols-4">
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_cpu}</dt>
|
||||
<dd class="text-lg font-semibold {cpu_cls} tabular-nums">{cpu:.0}%</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_mem}</dt>
|
||||
<dd class="text-lg font-semibold {mem_cls} tabular-nums">{mem_pct:.0}%</dd>
|
||||
<dd class="text-xs text-slate-500 tabular-nums">{used:.1} / {total:.1} GB</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_top_cpu}</dt>
|
||||
<dd class="text-slate-200 font-mono text-xs truncate" title="{top_cpu_raw}">{top_cpu}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_top_mem}</dt>
|
||||
<dd class="text-slate-200 font-mono text-xs truncate" title="{top_mem_raw}">{top_mem}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_uptime}</dt>
|
||||
<dd class="text-slate-300 tabular-nums">{uptime}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt class="text-xs text-slate-500">{l_procs}</dt>
|
||||
<dd class="text-slate-300 tabular-nums">{procs}</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</div>"##,
|
||||
l_now = t(lang, "devices.perf_now"),
|
||||
l_age = tf1(lang, "devices.perf_sampled_ago", &age_str),
|
||||
at_full = html_escape(&fmt_unix_utc(s.at)),
|
||||
l_cpu = t(lang, "devices.perf_cpu"),
|
||||
cpu_cls = cpu_color,
|
||||
cpu = s.cpu_pct,
|
||||
l_mem = t(lang, "devices.perf_mem"),
|
||||
mem_cls = mem_color,
|
||||
mem_pct = mem_pct,
|
||||
used = mem_used_gb,
|
||||
total = mem_total_gb,
|
||||
l_top_cpu = t(lang, "devices.perf_top_cpu"),
|
||||
top_cpu_raw = html_escape(&s.top_cpu_name),
|
||||
top_cpu = top_cpu,
|
||||
l_top_mem = t(lang, "devices.perf_top_mem"),
|
||||
top_mem_raw = html_escape(&s.top_mem_name),
|
||||
top_mem = top_mem,
|
||||
l_uptime = t(lang, "devices.perf_uptime"),
|
||||
uptime = html_escape(&uptime_str),
|
||||
l_procs = t(lang, "devices.perf_proc_count"),
|
||||
procs = s.proc_count,
|
||||
)
|
||||
}
|
||||
|
||||
/// Color-code a percentage value (0–100) — green up to 60, amber up to
|
||||
/// 85, red above. Used for the snapshot stat tiles so the supporter
|
||||
/// can spot a wedged-laptop at a glance.
|
||||
fn pct_color(pct: f64) -> &'static str {
|
||||
if pct >= 85.0 {
|
||||
"text-rose-400"
|
||||
} else if pct >= 60.0 {
|
||||
"text-amber-300"
|
||||
} else {
|
||||
"text-emerald-300"
|
||||
}
|
||||
}
|
||||
|
||||
/// Render an inline-SVG sparkline. `series` is a (unix-seconds, value)
|
||||
/// vector; `max_y` clamps the y-axis (so two side-by-side charts share
|
||||
/// a scale); `bucketed = true` downsamples by averaging into 96 buckets
|
||||
/// so the polyline string stays short for a wide time window.
|
||||
fn render_sparkline(
|
||||
lang: Lang,
|
||||
series: Vec<(i64, f64)>,
|
||||
max_y: f64,
|
||||
bucketed: bool,
|
||||
title: &str,
|
||||
) -> String {
|
||||
const WIDTH: f64 = 600.0;
|
||||
const HEIGHT: f64 = 80.0;
|
||||
const PAD: f64 = 4.0;
|
||||
|
||||
if series.is_empty() {
|
||||
return format!(
|
||||
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3">
|
||||
<h4 class="text-xs uppercase text-slate-500 mb-1">{title}</h4>
|
||||
<div class="text-xs text-slate-500">{msg}</div>
|
||||
</div>"##,
|
||||
title = html_escape(title),
|
||||
msg = t(lang, "devices.perf_no_chart"),
|
||||
);
|
||||
}
|
||||
|
||||
let points = if bucketed && series.len() > 96 {
|
||||
downsample_avg(&series, 96)
|
||||
} else {
|
||||
series.clone()
|
||||
};
|
||||
|
||||
let min_x = points.first().map(|p| p.0).unwrap_or(0);
|
||||
let max_x = points.last().map(|p| p.0).unwrap_or(0);
|
||||
let span_x = (max_x - min_x).max(1) as f64;
|
||||
|
||||
let plot_w = WIDTH - 2.0 * PAD;
|
||||
let plot_h = HEIGHT - 2.0 * PAD;
|
||||
|
||||
let mut path = String::new();
|
||||
let mut area = String::new();
|
||||
let mut peak: f64 = 0.0;
|
||||
let mut last: f64 = 0.0;
|
||||
for (i, (t, v)) in points.iter().enumerate() {
|
||||
let x = PAD + plot_w * ((t - min_x) as f64) / span_x;
|
||||
let y_norm = (v / max_y).clamp(0.0, 1.0);
|
||||
let y = PAD + plot_h * (1.0 - y_norm);
|
||||
if i == 0 {
|
||||
path.push_str(&format!("M{:.1},{:.1}", x, y));
|
||||
area.push_str(&format!("M{:.1},{:.1}", x, PAD + plot_h));
|
||||
area.push_str(&format!(" L{:.1},{:.1}", x, y));
|
||||
} else {
|
||||
path.push_str(&format!(" L{:.1},{:.1}", x, y));
|
||||
area.push_str(&format!(" L{:.1},{:.1}", x, y));
|
||||
}
|
||||
peak = peak.max(*v);
|
||||
last = *v;
|
||||
}
|
||||
let last_x = PAD + plot_w;
|
||||
area.push_str(&format!(" L{:.1},{:.1} Z", last_x, PAD + plot_h));
|
||||
|
||||
// Hours-from-now labels: oldest point's age, "now" on the right.
|
||||
let span_secs = (max_x - min_x).max(0);
|
||||
let span_label = if span_secs >= 3600 {
|
||||
format!("-{}h", span_secs / 3600)
|
||||
} else if span_secs >= 60 {
|
||||
format!("-{}m", span_secs / 60)
|
||||
} else {
|
||||
format!("-{}s", span_secs)
|
||||
};
|
||||
|
||||
format!(
|
||||
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3">
|
||||
<div class="flex items-baseline justify-between mb-1">
|
||||
<h4 class="text-xs uppercase text-slate-500">{title}</h4>
|
||||
<span class="text-[11px] text-slate-500 tabular-nums">{l_peak} <span class="text-slate-300">{peak:.0}%</span> {l_now} <span class="text-slate-300">{last:.0}%</span></span>
|
||||
</div>
|
||||
<svg viewBox="0 0 {w} {h}" preserveAspectRatio="none" class="w-full h-20">
|
||||
<line x1="{pad}" y1="{ymid:.1}" x2="{xend:.1}" y2="{ymid:.1}" stroke="#1f2937" stroke-width="1" stroke-dasharray="2,3"/>
|
||||
<path d="{area}" fill="#0ea5e9" fill-opacity="0.10" stroke="none"/>
|
||||
<path d="{path}" fill="none" stroke="#38bdf8" stroke-width="1.5" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
<div class="flex justify-between text-[10px] text-slate-500 mt-1 tabular-nums">
|
||||
<span>{older}</span>
|
||||
<span>{l_now_short}</span>
|
||||
</div>
|
||||
</div>"##,
|
||||
title = html_escape(title),
|
||||
l_peak = t(lang, "devices.perf_peak"),
|
||||
peak = peak,
|
||||
l_now = t(lang, "devices.perf_latest"),
|
||||
last = last,
|
||||
w = WIDTH,
|
||||
h = HEIGHT,
|
||||
pad = PAD,
|
||||
ymid = PAD + plot_h * 0.5,
|
||||
xend = WIDTH - PAD,
|
||||
area = area,
|
||||
path = path,
|
||||
older = html_escape(&span_label),
|
||||
l_now_short = t(lang, "devices.perf_now_short"),
|
||||
)
|
||||
}
|
||||
|
||||
/// Mean-pool a (timestamp, value) series down to `target` buckets,
|
||||
/// keeping the bucket-mean timestamp as the bucket's x. Empty buckets
|
||||
/// are dropped so the resulting polyline doesn't draw zero-lines for
|
||||
/// stretches where the agent was offline.
|
||||
fn downsample_avg(series: &[(i64, f64)], target: usize) -> Vec<(i64, f64)> {
|
||||
if series.len() <= target {
|
||||
return series.to_vec();
|
||||
}
|
||||
let min_x = series.first().map(|p| p.0).unwrap_or(0);
|
||||
let max_x = series.last().map(|p| p.0).unwrap_or(0);
|
||||
let span = (max_x - min_x).max(1);
|
||||
let bucket_secs = (span as usize) / target.max(1);
|
||||
let bucket_secs = bucket_secs.max(1) as i64;
|
||||
|
||||
let mut buckets: Vec<(i64, f64, usize)> = Vec::with_capacity(target);
|
||||
let mut current_bucket: i64 = -1;
|
||||
for (t, v) in series {
|
||||
let b = (t - min_x) / bucket_secs;
|
||||
if b != current_bucket {
|
||||
buckets.push((*t, *v, 1));
|
||||
current_bucket = b;
|
||||
} else if let Some(last) = buckets.last_mut() {
|
||||
last.1 += *v;
|
||||
last.2 += 1;
|
||||
}
|
||||
}
|
||||
buckets
|
||||
.into_iter()
|
||||
.map(|(t, sum, n)| (t, sum / (n as f64)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Recent perf-events table — boot/shutdown/sleep degradation, memory
|
||||
/// exhaustion, BSODs, unexpected reboots. Empty list → a neutral
|
||||
/// "nothing flagged yet" panel so the heading still has a body.
|
||||
fn render_perf_events_table(lang: Lang, events: &[PerfEventRow]) -> String {
|
||||
if events.is_empty() {
|
||||
return format!(
|
||||
r##"<div class="rounded-md border border-slate-800 bg-slate-900 p-3 text-xs text-slate-500">
|
||||
{msg}
|
||||
</div>"##,
|
||||
msg = t(lang, "devices.perf_events_none"),
|
||||
);
|
||||
}
|
||||
let mut s = format!(
|
||||
r##"<div>
|
||||
<h4 class="text-xs uppercase text-slate-500 mb-1">{l_events}</h4>
|
||||
<div class="rounded-md border border-slate-800 bg-slate-900 overflow-hidden">
|
||||
<table class="w-full text-sm">
|
||||
<thead class="text-xs uppercase text-slate-500 bg-slate-950">
|
||||
<tr>
|
||||
<th class="text-left font-medium px-3 py-2">{c_when}</th>
|
||||
<th class="text-left font-medium px-3 py-2">{c_source}</th>
|
||||
<th class="text-left font-medium px-3 py-2">{c_summary}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="divide-y divide-slate-800">"##,
|
||||
l_events = t(lang, "devices.perf_events_heading"),
|
||||
c_when = t(lang, "devices.perf_events_col_when"),
|
||||
c_source = t(lang, "devices.perf_events_col_source"),
|
||||
c_summary = t(lang, "devices.perf_events_col_summary"),
|
||||
);
|
||||
for ev in events {
|
||||
let when = fmt_unix_utc(ev.at);
|
||||
let (level_cls, _level_label) = match ev.level {
|
||||
1 => ("bg-rose-900/40 text-rose-300 border-rose-800", "critical"),
|
||||
2 => ("bg-rose-900/30 text-rose-300 border-rose-900", "error"),
|
||||
3 => ("bg-amber-900/40 text-amber-300 border-amber-800", "warning"),
|
||||
_ => ("bg-slate-800 text-slate-300 border-slate-700", "info"),
|
||||
};
|
||||
let source_label = match ev.provider.as_str() {
|
||||
"diag-perf" => t(lang, "devices.perf_src_diag_perf"),
|
||||
"res-exh" => t(lang, "devices.perf_src_res_exh"),
|
||||
"system" => t(lang, "devices.perf_src_system"),
|
||||
other => other,
|
||||
};
|
||||
let _ = write!(
|
||||
s,
|
||||
r##"<tr class="hover:bg-slate-800/40 align-top">
|
||||
<td class="px-3 py-2 font-mono text-xs text-slate-300 whitespace-nowrap">{when}</td>
|
||||
<td class="px-3 py-2">
|
||||
<span class="inline-block text-[11px] px-1.5 py-0.5 rounded border {lvl_cls}">{src} · {eid}</span>
|
||||
</td>
|
||||
<td class="px-3 py-2 text-slate-200 text-xs">{summary}</td>
|
||||
</tr>"##,
|
||||
when = html_escape(&when),
|
||||
lvl_cls = level_cls,
|
||||
src = html_escape(source_label),
|
||||
eid = ev.event_id,
|
||||
summary = html_escape(&ev.summary),
|
||||
);
|
||||
}
|
||||
s.push_str("</tbody></table></div></div>");
|
||||
s
|
||||
}
|
||||
|
||||
/// Render the per-device login history table. Empty input → a neutral
|
||||
/// "no events yet" panel so the heading still has something under it.
|
||||
/// We render the agent-reported `at` in the standard SQLite UTC format
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
//! `POST /api/agent/metrics` — continuous performance time-series the
|
||||
//! agent samples at ~1/min. The admin Devices detail page renders this
|
||||
//! as a CPU / memory sparkline plus a "current snapshot" card.
|
||||
//!
|
||||
//! Auth: same per-peer signed-API gate as the other agent endpoints —
|
||||
//! see [`crate::api::device_auth`]. Body shape (batched so an agent
|
||||
//! that's catching up after a transport outage can land everything in
|
||||
//! one POST):
|
||||
//!
|
||||
//! ```json
|
||||
//! {
|
||||
//! "id": "<peer id>",
|
||||
//! "uuid": "<peer uuid>",
|
||||
//! "samples": [
|
||||
//! {
|
||||
//! "at": 1717920000,
|
||||
//! "cpu_pct": 42.5,
|
||||
//! "mem_used_mb": 7820,
|
||||
//! "mem_total_mb": 16384,
|
||||
//! "proc_count": 341,
|
||||
//! "uptime_secs": 173000,
|
||||
//! "top_cpu_name": "chrome.exe",
|
||||
//! "top_cpu_pct": 18.3,
|
||||
//! "top_mem_name": "chrome.exe",
|
||||
//! "top_mem_mb": 1240
|
||||
//! }
|
||||
//! ]
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::api::device_auth::{self, AuthOutcome};
|
||||
use crate::api::error::ApiError;
|
||||
use crate::api::state::AppState;
|
||||
use crate::database::MetricsSampleRow;
|
||||
use axum::body::Bytes;
|
||||
use axum::extract::Extension;
|
||||
use axum::http::HeaderMap;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct MetricsSampleIn {
|
||||
pub at: i64,
|
||||
#[serde(default)]
|
||||
pub cpu_pct: f64,
|
||||
#[serde(default)]
|
||||
pub mem_used_mb: i64,
|
||||
#[serde(default)]
|
||||
pub mem_total_mb: i64,
|
||||
#[serde(default)]
|
||||
pub proc_count: i64,
|
||||
#[serde(default)]
|
||||
pub uptime_secs: i64,
|
||||
#[serde(default)]
|
||||
pub top_cpu_name: String,
|
||||
#[serde(default)]
|
||||
pub top_cpu_pct: f64,
|
||||
#[serde(default)]
|
||||
pub top_mem_name: String,
|
||||
#[serde(default)]
|
||||
pub top_mem_mb: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct MetricsBody {
|
||||
pub id: String,
|
||||
pub uuid: String,
|
||||
pub samples: Vec<MetricsSampleIn>,
|
||||
}
|
||||
|
||||
/// Cap per request. At 60s sampling cadence + the agent's 30-minute
|
||||
/// retry-and-drain budget, even a long outage should fit well under this.
|
||||
const MAX_SAMPLES_PER_POST: usize = 512;
|
||||
|
||||
/// Defensive bound on string fields the agent puts in `top_*_name` — a
|
||||
/// runaway process name doesn't get to balloon the DB row.
|
||||
const MAX_PROC_NAME_LEN: usize = 128;
|
||||
|
||||
pub async fn metrics(
|
||||
Extension(state): Extension<Arc<AppState>>,
|
||||
headers: HeaderMap,
|
||||
body: Bytes,
|
||||
) -> Result<String, ApiError> {
|
||||
let outcome =
|
||||
device_auth::verify(&state, "POST", "/api/agent/metrics", &headers, &body).await?;
|
||||
|
||||
let payload: MetricsBody = serde_json::from_slice(&body)
|
||||
.map_err(|_| ApiError::BadRequest("invalid json".into()))?;
|
||||
|
||||
if payload.id.is_empty() || payload.uuid.is_empty() {
|
||||
return Err(ApiError::BadRequest("id and uuid are required".into()));
|
||||
}
|
||||
if payload.samples.is_empty() {
|
||||
return Ok("OK".to_string());
|
||||
}
|
||||
if payload.samples.len() > MAX_SAMPLES_PER_POST {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"too many samples in one POST (max {MAX_SAMPLES_PER_POST})"
|
||||
)));
|
||||
}
|
||||
|
||||
let id = match outcome {
|
||||
AuthOutcome::Verified { id: signed_id } => {
|
||||
if payload.id != signed_id {
|
||||
return Err(ApiError::Unauthorized);
|
||||
}
|
||||
signed_id
|
||||
}
|
||||
AuthOutcome::LegacyUnsigned => {
|
||||
device_auth::enforce_managed_for_id(&state, &payload.id).await?;
|
||||
payload.id.clone()
|
||||
}
|
||||
};
|
||||
|
||||
let peer = state
|
||||
.db
|
||||
.get_peer(&id)
|
||||
.await
|
||||
.map_err(|e| ApiError::Internal(e.to_string()))?;
|
||||
if peer.is_none() {
|
||||
return Ok("ID_NOT_FOUND".to_string());
|
||||
}
|
||||
|
||||
let mut accepted = 0usize;
|
||||
for s in &payload.samples {
|
||||
// Sanity-clamp the floats and string lengths. The agent should
|
||||
// produce well-formed values, but the public-API shape means
|
||||
// garbage-in shouldn't propagate to garbage-on-screen.
|
||||
let cpu_pct = clamp_pct(s.cpu_pct);
|
||||
let top_cpu_pct = clamp_pct(s.top_cpu_pct);
|
||||
let row = MetricsSampleRow {
|
||||
at: s.at,
|
||||
cpu_pct,
|
||||
mem_used_mb: s.mem_used_mb.max(0),
|
||||
mem_total_mb: s.mem_total_mb.max(0),
|
||||
proc_count: s.proc_count.max(0),
|
||||
uptime_secs: s.uptime_secs.max(0),
|
||||
top_cpu_name: truncate(&s.top_cpu_name, MAX_PROC_NAME_LEN),
|
||||
top_cpu_pct,
|
||||
top_mem_name: truncate(&s.top_mem_name, MAX_PROC_NAME_LEN),
|
||||
top_mem_mb: s.top_mem_mb.max(0),
|
||||
};
|
||||
if let Err(e) = state
|
||||
.db
|
||||
.metrics_sample_insert(&id, &payload.uuid, &row)
|
||||
.await
|
||||
{
|
||||
hbb_common::log::warn!(
|
||||
"metrics_sample_insert for peer {} failed: {}",
|
||||
id,
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
accepted += 1;
|
||||
}
|
||||
|
||||
hbb_common::log::debug!(
|
||||
"metrics: peer={} accepted={}/{}",
|
||||
id,
|
||||
accepted,
|
||||
payload.samples.len()
|
||||
);
|
||||
Ok("OK".to_string())
|
||||
}
|
||||
|
||||
fn clamp_pct(v: f64) -> f64 {
|
||||
if v.is_nan() {
|
||||
0.0
|
||||
} else {
|
||||
v.clamp(0.0, 100.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Char-aware truncate (so we don't slice mid-multibyte). The cap is
|
||||
/// generous so process names that include arguments or Unicode survive.
|
||||
fn truncate(s: &str, max_chars: usize) -> String {
|
||||
if s.chars().count() <= max_chars {
|
||||
s.to_string()
|
||||
} else {
|
||||
s.chars().take(max_chars).collect()
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,8 @@ pub mod groups;
|
||||
pub mod heartbeat;
|
||||
pub mod http_proxy;
|
||||
pub mod login_event;
|
||||
pub mod metrics;
|
||||
pub mod perf_events;
|
||||
pub mod middleware;
|
||||
pub mod oidc;
|
||||
pub mod pagination;
|
||||
@@ -53,6 +55,8 @@ pub fn router(state: Arc<AppState>) -> Router {
|
||||
.route("/api/sysinfo", post(sysinfo::sysinfo))
|
||||
.route("/api/agent/exec-result", post(agent_exec::exec_result))
|
||||
.route("/api/agent/login-event", post(login_event::login_event))
|
||||
.route("/api/agent/metrics", post(metrics::metrics))
|
||||
.route("/api/agent/perf-events", post(perf_events::perf_events))
|
||||
.route(
|
||||
"/api/unattended-password",
|
||||
post(unattended::unattended_password),
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
//! `POST /api/agent/perf-events` — performance-related Windows event log
|
||||
//! entries the agent surfaced from `Microsoft-Windows-Diagnostics-
|
||||
//! Performance/Operational`, `Microsoft-Windows-Resource-Exhaustion-
|
||||
//! Detector/Operational`, and a few hand-picked IDs from `System`
|
||||
//! (unexpected reboots, BSODs, dirty shutdowns). The admin UI shows
|
||||
//! the recent ones in the device's Performance section.
|
||||
//!
|
||||
//! Auth: same per-peer signed-API gate as the other agent endpoints.
|
||||
//! Server-side dedup is via the UNIQUE (peer_id, provider, record_id)
|
||||
//! index — the agent persists a per-channel cursor to disk, but a
|
||||
//! restart that loses the cursor can safely re-emit overlapping ranges.
|
||||
|
||||
use crate::api::device_auth::{self, AuthOutcome};
|
||||
use crate::api::error::ApiError;
|
||||
use crate::api::state::AppState;
|
||||
use crate::database::PerfEventRow;
|
||||
use axum::body::Bytes;
|
||||
use axum::extract::Extension;
|
||||
use axum::http::HeaderMap;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct PerfEventIn {
|
||||
pub at: i64,
|
||||
pub provider: String,
|
||||
pub event_id: i64,
|
||||
#[serde(default = "default_level")]
|
||||
pub level: i64,
|
||||
#[serde(default)]
|
||||
pub record_id: i64,
|
||||
#[serde(default)]
|
||||
pub summary: String,
|
||||
#[serde(default)]
|
||||
pub detail_json: String,
|
||||
}
|
||||
|
||||
fn default_level() -> i64 {
|
||||
4 // WEL "Information"
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct PerfEventsBody {
|
||||
pub id: String,
|
||||
pub uuid: String,
|
||||
pub events: Vec<PerfEventIn>,
|
||||
}
|
||||
|
||||
const MAX_EVENTS_PER_POST: usize = 128;
|
||||
const MAX_PROVIDER_LEN: usize = 64;
|
||||
const MAX_SUMMARY_LEN: usize = 512;
|
||||
const MAX_DETAIL_LEN: usize = 8 * 1024;
|
||||
|
||||
pub async fn perf_events(
|
||||
Extension(state): Extension<Arc<AppState>>,
|
||||
headers: HeaderMap,
|
||||
body: Bytes,
|
||||
) -> Result<String, ApiError> {
|
||||
let outcome =
|
||||
device_auth::verify(&state, "POST", "/api/agent/perf-events", &headers, &body).await?;
|
||||
|
||||
let payload: PerfEventsBody = serde_json::from_slice(&body)
|
||||
.map_err(|_| ApiError::BadRequest("invalid json".into()))?;
|
||||
|
||||
if payload.id.is_empty() || payload.uuid.is_empty() {
|
||||
return Err(ApiError::BadRequest("id and uuid are required".into()));
|
||||
}
|
||||
if payload.events.is_empty() {
|
||||
return Ok("OK".to_string());
|
||||
}
|
||||
if payload.events.len() > MAX_EVENTS_PER_POST {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"too many events in one POST (max {MAX_EVENTS_PER_POST})"
|
||||
)));
|
||||
}
|
||||
|
||||
let id = match outcome {
|
||||
AuthOutcome::Verified { id: signed_id } => {
|
||||
if payload.id != signed_id {
|
||||
return Err(ApiError::Unauthorized);
|
||||
}
|
||||
signed_id
|
||||
}
|
||||
AuthOutcome::LegacyUnsigned => {
|
||||
device_auth::enforce_managed_for_id(&state, &payload.id).await?;
|
||||
payload.id.clone()
|
||||
}
|
||||
};
|
||||
|
||||
let peer = state
|
||||
.db
|
||||
.get_peer(&id)
|
||||
.await
|
||||
.map_err(|e| ApiError::Internal(e.to_string()))?;
|
||||
if peer.is_none() {
|
||||
return Ok("ID_NOT_FOUND".to_string());
|
||||
}
|
||||
|
||||
let mut accepted = 0usize;
|
||||
for e in &payload.events {
|
||||
let provider = e.provider.trim();
|
||||
if provider.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let row = PerfEventRow {
|
||||
at: e.at,
|
||||
provider: truncate(provider, MAX_PROVIDER_LEN),
|
||||
event_id: e.event_id,
|
||||
level: e.level,
|
||||
record_id: e.record_id,
|
||||
summary: truncate(&e.summary, MAX_SUMMARY_LEN),
|
||||
detail_json: truncate(&e.detail_json, MAX_DETAIL_LEN),
|
||||
received_at: 0, // server fills via DEFAULT on INSERT
|
||||
};
|
||||
if let Err(err) = state.db.perf_event_insert(&id, &payload.uuid, &row).await {
|
||||
hbb_common::log::warn!(
|
||||
"perf_event_insert for peer {} failed: {}",
|
||||
id,
|
||||
err
|
||||
);
|
||||
continue;
|
||||
}
|
||||
accepted += 1;
|
||||
}
|
||||
|
||||
hbb_common::log::debug!(
|
||||
"perf-events: peer={} accepted={}/{}",
|
||||
id,
|
||||
accepted,
|
||||
payload.events.len()
|
||||
);
|
||||
Ok("OK".to_string())
|
||||
}
|
||||
|
||||
fn truncate(s: &str, max_chars: usize) -> String {
|
||||
if s.chars().count() <= max_chars {
|
||||
s.to_string()
|
||||
} else {
|
||||
s.chars().take(max_chars).collect()
|
||||
}
|
||||
}
|
||||
+253
@@ -260,6 +260,22 @@ pub struct ExecQueued {
|
||||
pub script: String,
|
||||
}
|
||||
|
||||
fn metrics_sample_row_from(r: sqlx::sqlite::SqliteRow) -> MetricsSampleRow {
|
||||
use sqlx::Row;
|
||||
MetricsSampleRow {
|
||||
at: r.try_get("at").unwrap_or(0),
|
||||
cpu_pct: r.try_get("cpu_pct").unwrap_or(0.0),
|
||||
mem_used_mb: r.try_get("mem_used_mb").unwrap_or(0),
|
||||
mem_total_mb: r.try_get("mem_total_mb").unwrap_or(0),
|
||||
proc_count: r.try_get("proc_count").unwrap_or(0),
|
||||
uptime_secs: r.try_get("uptime_secs").unwrap_or(0),
|
||||
top_cpu_name: r.try_get("top_cpu_name").unwrap_or_default(),
|
||||
top_cpu_pct: r.try_get("top_cpu_pct").unwrap_or(0.0),
|
||||
top_mem_name: r.try_get("top_mem_name").unwrap_or_default(),
|
||||
top_mem_mb: r.try_get("top_mem_mb").unwrap_or(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn exec_row_from(r: sqlx::sqlite::SqliteRow) -> ExecRow {
|
||||
use sqlx::Row;
|
||||
ExecRow {
|
||||
@@ -284,6 +300,43 @@ fn exec_row_from(r: sqlx::sqlite::SqliteRow) -> ExecRow {
|
||||
/// `received_at` is when the row landed in the DB. `kind` is currently
|
||||
/// `"logon"` or `"logoff"` — anything else is treated as an unknown kind
|
||||
/// by the renderer and shown verbatim.
|
||||
/// One agent-reported continuous-metrics sample (1/min cadence).
|
||||
/// `cpu_pct` is the overall system CPU% averaged over the sample window;
|
||||
/// `top_cpu_*` / `top_mem_*` are the heaviest single process at the
|
||||
/// moment of capture — enough to answer "what was eating the CPU at
|
||||
/// 2 AM" without storing a full process tree on every row.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MetricsSampleRow {
|
||||
pub at: i64,
|
||||
pub cpu_pct: f64,
|
||||
pub mem_used_mb: i64,
|
||||
pub mem_total_mb: i64,
|
||||
pub proc_count: i64,
|
||||
pub uptime_secs: i64,
|
||||
pub top_cpu_name: String,
|
||||
pub top_cpu_pct: f64,
|
||||
pub top_mem_name: String,
|
||||
pub top_mem_mb: i64,
|
||||
}
|
||||
|
||||
/// One row from `device_perf_events`. `provider` is the event-log
|
||||
/// channel short-name (`diag-perf`, `res-exh`, `system`); `event_id` is
|
||||
/// the numeric Windows event id. `summary` is a UI-ready short string
|
||||
/// the agent prepared (we don't re-localize Windows event messages
|
||||
/// server-side); `detail_json` carries the structured payload for the
|
||||
/// detail-row expansion.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct PerfEventRow {
|
||||
pub at: i64,
|
||||
pub provider: String,
|
||||
pub event_id: i64,
|
||||
pub level: i64,
|
||||
pub record_id: i64,
|
||||
pub summary: String,
|
||||
pub detail_json: String,
|
||||
pub received_at: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct LoginEventRow {
|
||||
pub at: i64,
|
||||
@@ -472,6 +525,12 @@ impl Database {
|
||||
.execute(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
}
|
||||
// M8 schema: agent-reported performance time-series + perf events.
|
||||
for stmt in M8_SCHEMA {
|
||||
sqlx::query(stmt)
|
||||
.execute(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
}
|
||||
// Soft-ALTERs run after schema creation. SQLite < 3.35 lacks
|
||||
// `ADD COLUMN IF NOT EXISTS`; swallow the duplicate-column error
|
||||
// so re-runs are idempotent. Newly-added soft alters get appended
|
||||
@@ -3517,6 +3576,141 @@ impl Database {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ────────────────────── device_metrics_samples (M8) ────────────────────
|
||||
//
|
||||
// Time-series performance samples reported by the agent. INSERT OR
|
||||
// IGNORE keeps the on-the-wire retry path idempotent.
|
||||
|
||||
pub async fn metrics_sample_insert(
|
||||
&self,
|
||||
peer_id: &str,
|
||||
peer_uuid: &str,
|
||||
s: &MetricsSampleRow,
|
||||
) -> ResultType<()> {
|
||||
sqlx::query(
|
||||
"insert or ignore into device_metrics_samples \
|
||||
(peer_id, peer_uuid, at, cpu_pct, mem_used_mb, mem_total_mb, \
|
||||
proc_count, uptime_secs, top_cpu_name, top_cpu_pct, \
|
||||
top_mem_name, top_mem_mb) \
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
)
|
||||
.bind(peer_id)
|
||||
.bind(peer_uuid)
|
||||
.bind(s.at)
|
||||
.bind(s.cpu_pct)
|
||||
.bind(s.mem_used_mb)
|
||||
.bind(s.mem_total_mb)
|
||||
.bind(s.proc_count)
|
||||
.bind(s.uptime_secs)
|
||||
.bind(&s.top_cpu_name)
|
||||
.bind(s.top_cpu_pct)
|
||||
.bind(&s.top_mem_name)
|
||||
.bind(s.top_mem_mb)
|
||||
.execute(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Range query — used by the sparkline renderer. `since_at` is a
|
||||
/// unix epoch second; rows are returned oldest-first because the
|
||||
/// chart polyline draws left-to-right.
|
||||
pub async fn metrics_samples_since(
|
||||
&self,
|
||||
peer_id: &str,
|
||||
since_at: i64,
|
||||
) -> ResultType<Vec<MetricsSampleRow>> {
|
||||
let rows = sqlx::query(
|
||||
"select at, cpu_pct, mem_used_mb, mem_total_mb, proc_count, \
|
||||
uptime_secs, top_cpu_name, top_cpu_pct, top_mem_name, top_mem_mb \
|
||||
from device_metrics_samples \
|
||||
where peer_id = ? and at >= ? \
|
||||
order by at asc",
|
||||
)
|
||||
.bind(peer_id)
|
||||
.bind(since_at)
|
||||
.fetch_all(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
Ok(rows.into_iter().map(metrics_sample_row_from).collect())
|
||||
}
|
||||
|
||||
/// "What's happening right now" — the most recent sample, for the
|
||||
/// snapshot card on the device detail page. None if the agent has
|
||||
/// never reported.
|
||||
pub async fn metrics_latest(
|
||||
&self,
|
||||
peer_id: &str,
|
||||
) -> ResultType<Option<MetricsSampleRow>> {
|
||||
let row = sqlx::query(
|
||||
"select at, cpu_pct, mem_used_mb, mem_total_mb, proc_count, \
|
||||
uptime_secs, top_cpu_name, top_cpu_pct, top_mem_name, top_mem_mb \
|
||||
from device_metrics_samples \
|
||||
where peer_id = ? \
|
||||
order by at desc limit 1",
|
||||
)
|
||||
.bind(peer_id)
|
||||
.fetch_optional(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
Ok(row.map(metrics_sample_row_from))
|
||||
}
|
||||
|
||||
// ─────────────────────── device_perf_events (M8) ────────────────────────
|
||||
|
||||
pub async fn perf_event_insert(
|
||||
&self,
|
||||
peer_id: &str,
|
||||
peer_uuid: &str,
|
||||
e: &PerfEventRow,
|
||||
) -> ResultType<()> {
|
||||
sqlx::query(
|
||||
"insert or ignore into device_perf_events \
|
||||
(peer_id, peer_uuid, at, provider, event_id, level, record_id, \
|
||||
summary, detail_json) \
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
)
|
||||
.bind(peer_id)
|
||||
.bind(peer_uuid)
|
||||
.bind(e.at)
|
||||
.bind(&e.provider)
|
||||
.bind(e.event_id)
|
||||
.bind(e.level)
|
||||
.bind(e.record_id)
|
||||
.bind(&e.summary)
|
||||
.bind(&e.detail_json)
|
||||
.execute(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn perf_events_for_peer(
|
||||
&self,
|
||||
peer_id: &str,
|
||||
limit: i64,
|
||||
) -> ResultType<Vec<PerfEventRow>> {
|
||||
let rows = sqlx::query(
|
||||
"select at, provider, event_id, level, record_id, summary, detail_json, received_at \
|
||||
from device_perf_events \
|
||||
where peer_id = ? \
|
||||
order by at desc, id desc limit ?",
|
||||
)
|
||||
.bind(peer_id)
|
||||
.bind(limit)
|
||||
.fetch_all(self.pool.get().await?.deref_mut())
|
||||
.await?;
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|r| PerfEventRow {
|
||||
at: r.try_get("at").unwrap_or(0),
|
||||
provider: r.try_get("provider").unwrap_or_default(),
|
||||
event_id: r.try_get("event_id").unwrap_or(0),
|
||||
level: r.try_get("level").unwrap_or(4),
|
||||
record_id: r.try_get("record_id").unwrap_or(0),
|
||||
summary: r.try_get("summary").unwrap_or_default(),
|
||||
detail_json: r.try_get("detail_json").unwrap_or_default(),
|
||||
received_at: r.try_get("received_at").unwrap_or(0),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Most-recent-first list for the device detail page.
|
||||
pub async fn login_events_for_peer(
|
||||
&self,
|
||||
@@ -3989,6 +4183,65 @@ const M7_SCHEMA: &[&str] = &[
|
||||
ON device_login_events(peer_id, kind, session_id, at, username)",
|
||||
];
|
||||
|
||||
/// M8: agent-reported performance data — both continuous time-series
|
||||
/// samples (1/min) and episodic events the OS itself flagged in the
|
||||
/// `Microsoft-Windows-Diagnostics-Performance` / `Resource-Exhaustion-
|
||||
/// Detector` / `System` event logs. Sized for "supporter confirms a
|
||||
/// user's slow-laptop complaint":
|
||||
///
|
||||
/// * `device_metrics_samples` — overall CPU%, memory used/total, top CPU
|
||||
/// / memory process. 60-second cadence, ~1440 rows/device/day,
|
||||
/// ~10K/week. The UI plots a 24 h sparkline off this.
|
||||
/// * `device_perf_events` — boot / shutdown / sleep degradation, memory
|
||||
/// exhaustion, unexpected reboots, BSODs. Sparse — one device might
|
||||
/// produce a handful per week. UI shows the most recent on the detail
|
||||
/// page.
|
||||
///
|
||||
/// Both tables carry a UNIQUE INDEX that pairs with `INSERT OR IGNORE`,
|
||||
/// so an agent that re-POSTs after a transient transport failure (or
|
||||
/// across a restart that loses its in-memory queue) doesn't pile up
|
||||
/// duplicates. Retention isn't enforced server-side yet — rows
|
||||
/// accumulate, same model as `audit_conn` / `exec_history`.
|
||||
const M8_SCHEMA: &[&str] = &[
|
||||
"CREATE TABLE IF NOT EXISTS device_metrics_samples (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
peer_id TEXT NOT NULL,
|
||||
peer_uuid TEXT NOT NULL,
|
||||
at INTEGER NOT NULL,
|
||||
cpu_pct REAL NOT NULL DEFAULT 0,
|
||||
mem_used_mb INTEGER NOT NULL DEFAULT 0,
|
||||
mem_total_mb INTEGER NOT NULL DEFAULT 0,
|
||||
proc_count INTEGER NOT NULL DEFAULT 0,
|
||||
uptime_secs INTEGER NOT NULL DEFAULT 0,
|
||||
top_cpu_name TEXT NOT NULL DEFAULT '',
|
||||
top_cpu_pct REAL NOT NULL DEFAULT 0,
|
||||
top_mem_name TEXT NOT NULL DEFAULT '',
|
||||
top_mem_mb INTEGER NOT NULL DEFAULT 0,
|
||||
received_at INTEGER NOT NULL DEFAULT (strftime('%s','now'))
|
||||
)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_dms_peer_at \
|
||||
ON device_metrics_samples(peer_id, at DESC)",
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS uq_dms \
|
||||
ON device_metrics_samples(peer_id, at)",
|
||||
"CREATE TABLE IF NOT EXISTS device_perf_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
peer_id TEXT NOT NULL,
|
||||
peer_uuid TEXT NOT NULL,
|
||||
at INTEGER NOT NULL,
|
||||
provider TEXT NOT NULL,
|
||||
event_id INTEGER NOT NULL,
|
||||
level INTEGER NOT NULL DEFAULT 4,
|
||||
record_id INTEGER NOT NULL DEFAULT 0,
|
||||
summary TEXT NOT NULL DEFAULT '',
|
||||
detail_json TEXT NOT NULL DEFAULT '',
|
||||
received_at INTEGER NOT NULL DEFAULT (strftime('%s','now'))
|
||||
)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_dpe_peer_at \
|
||||
ON device_perf_events(peer_id, at DESC)",
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS uq_dpe \
|
||||
ON device_perf_events(peer_id, provider, record_id)",
|
||||
];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use hbb_common::tokio;
|
||||
|
||||
Reference in New Issue
Block a user