//! `POST /api/agent/metrics` — continuous performance time-series the //! agent samples at ~1/min. The admin Devices detail page renders this //! as a CPU / memory sparkline plus a "current snapshot" card. //! //! Auth: same per-peer signed-API gate as the other agent endpoints — //! see [`crate::api::device_auth`]. Body shape (batched so an agent //! that's catching up after a transport outage can land everything in //! one POST): //! //! ```json //! { //! "id": "", //! "uuid": "", //! "samples": [ //! { //! "at": 1717920000, //! "cpu_pct": 42.5, //! "mem_used_mb": 7820, //! "mem_total_mb": 16384, //! "proc_count": 341, //! "uptime_secs": 173000, //! "top_cpu_name": "chrome.exe", //! "top_cpu_pct": 18.3, //! "top_mem_name": "chrome.exe", //! "top_mem_mb": 1240 //! } //! ] //! } //! ``` use crate::api::device_auth::{self, AuthOutcome}; use crate::api::error::ApiError; use crate::api::state::AppState; use crate::database::MetricsSampleRow; use axum::body::Bytes; use axum::extract::Extension; use axum::http::HeaderMap; use serde::Deserialize; use std::sync::Arc; #[derive(Debug, Deserialize)] pub struct MetricsSampleIn { pub at: i64, #[serde(default)] pub cpu_pct: f64, #[serde(default)] pub mem_used_mb: i64, #[serde(default)] pub mem_total_mb: i64, #[serde(default)] pub proc_count: i64, #[serde(default)] pub uptime_secs: i64, #[serde(default)] pub top_cpu_name: String, #[serde(default)] pub top_cpu_pct: f64, #[serde(default)] pub top_mem_name: String, #[serde(default)] pub top_mem_mb: i64, } #[derive(Debug, Deserialize)] pub struct MetricsBody { pub id: String, pub uuid: String, pub samples: Vec, } /// Cap per request. At 60s sampling cadence + the agent's 30-minute /// retry-and-drain budget, even a long outage should fit well under this. const MAX_SAMPLES_PER_POST: usize = 512; /// Defensive bound on string fields the agent puts in `top_*_name` — a /// runaway process name doesn't get to balloon the DB row. const MAX_PROC_NAME_LEN: usize = 128; pub async fn metrics( Extension(state): Extension>, headers: HeaderMap, body: Bytes, ) -> Result { let outcome = device_auth::verify(&state, "POST", "/api/agent/metrics", &headers, &body).await?; let payload: MetricsBody = serde_json::from_slice(&body) .map_err(|_| ApiError::BadRequest("invalid json".into()))?; if payload.id.is_empty() || payload.uuid.is_empty() { return Err(ApiError::BadRequest("id and uuid are required".into())); } if payload.samples.is_empty() { return Ok("OK".to_string()); } if payload.samples.len() > MAX_SAMPLES_PER_POST { return Err(ApiError::BadRequest(format!( "too many samples in one POST (max {MAX_SAMPLES_PER_POST})" ))); } let id = match outcome { AuthOutcome::Verified { id: signed_id } => { if payload.id != signed_id { return Err(ApiError::Unauthorized); } signed_id } AuthOutcome::LegacyUnsigned => { device_auth::enforce_managed_for_id(&state, &payload.id).await?; payload.id.clone() } }; let peer = state .db .get_peer(&id) .await .map_err(|e| ApiError::Internal(e.to_string()))?; if peer.is_none() { return Ok("ID_NOT_FOUND".to_string()); } let mut accepted = 0usize; for s in &payload.samples { // Sanity-clamp the floats and string lengths. The agent should // produce well-formed values, but the public-API shape means // garbage-in shouldn't propagate to garbage-on-screen. let cpu_pct = clamp_pct(s.cpu_pct); let top_cpu_pct = clamp_pct(s.top_cpu_pct); let row = MetricsSampleRow { at: s.at, cpu_pct, mem_used_mb: s.mem_used_mb.max(0), mem_total_mb: s.mem_total_mb.max(0), proc_count: s.proc_count.max(0), uptime_secs: s.uptime_secs.max(0), top_cpu_name: truncate(&s.top_cpu_name, MAX_PROC_NAME_LEN), top_cpu_pct, top_mem_name: truncate(&s.top_mem_name, MAX_PROC_NAME_LEN), top_mem_mb: s.top_mem_mb.max(0), }; if let Err(e) = state .db .metrics_sample_insert(&id, &payload.uuid, &row) .await { hbb_common::log::warn!( "metrics_sample_insert for peer {} failed: {}", id, e ); continue; } accepted += 1; } hbb_common::log::debug!( "metrics: peer={} accepted={}/{}", id, accepted, payload.samples.len() ); Ok("OK".to_string()) } fn clamp_pct(v: f64) -> f64 { if v.is_nan() { 0.0 } else { v.clamp(0.0, 100.0) } } /// Char-aware truncate (so we don't slice mid-multibyte). The cap is /// generous so process names that include arguments or Unicode survive. fn truncate(s: &str, max_chars: usize) -> String { if s.chars().count() <= max_chars { s.to_string() } else { s.chars().take(max_chars).collect() } }