Implement signed API communication to improve security

2026-05-22 12:50:42 +02:00
parent 21b25bcc1b
commit 475da0e950
12 changed files with 906 additions and 24 deletions
@@ -0,0 +1,184 @@
+//! Ed25519-signature gate for the agent-facing HTTP API
+//! (`/api/heartbeat`, `/api/sysinfo`).
+//!
+//! Trust root: the device's Ed25519 public key is already written into
+//! `peer.pk` during the rendezvous `RegisterPk` handshake (TCP/protobuf,
+//! port 21116). That handshake proves possession of the matching private key
+//! to the rendezvous server — so any later HTTP request signed by the same
+//! key is provably from the same device.
+//!
+//! Cutover: per-peer. `peer.managed = 0` (default) keeps stock-client
+//! behaviour — no signature required. `managed = 1` requires a valid sig on
+//! every request. The flag flips from 0→1 on the first valid signature we
+//! observe (TOFU) or via the admin endpoint. It never flips back from a
+//! request — only an admin can downgrade.
+//!
+//! Wire format (both headers required on signed requests):
+//!   X-RD-Device-Id: <id>
+//!   X-RD-Signature: v1.<unix_ts>.<base64(ed25519_sig)>
+//! where the signed message is:
+//!   "rd-api-v1\n" || METHOD || "\n" || PATH || "\n" || TS || "\n" || sha256(body)
+
+use crate::api::error::ApiError;
+use crate::api::state::AppState;
+use axum::http::HeaderMap;
+use lazy_static::lazy_static;
+use std::collections::HashMap;
+use std::sync::Mutex;
+use std::sync::Arc;
+
+const SIG_VERSION: &str = "v1";
+const HEADER_ID: &str = "x-rd-device-id";
+const HEADER_SIG: &str = "x-rd-signature";
+const SKEW_TOLERANCE_SECS: i64 = 300;
+const REPLAY_WINDOW_SECS: i64 = 600;
+const REPLAY_CACHE_MAX: usize = 16_384;
+
+/// Outcome of running the gate. The handler uses this to decide which `id`
+/// to trust as the device identity:
+///   - `Verified` → caller is cryptographically that device.
+///   - `LegacyUnsigned` → managed=0 peer that sent no sig headers; the
+///     handler may proceed but the body `id` is trusted only weakly
+///     (same risk as today). The handler still calls `get_peer` to confirm
+///     the id is known.
+pub enum AuthOutcome {
+    Verified { id: String },
+    LegacyUnsigned,
+}
+
+lazy_static! {
+    /// Replay cache. Key: "<id>|<ts>|<sig_first32>". Value: expiry unix ts.
+    /// Small enough that the sweep-on-insert cost is negligible.
+    static ref REPLAY: Mutex<HashMap<String, i64>> = Mutex::new(HashMap::new());
+}
+
+pub async fn verify(
+    state: &Arc<AppState>,
+    method: &str,
+    path: &str,
+    headers: &HeaderMap,
+    body: &[u8],
+) -> Result<AuthOutcome, ApiError> {
+    let sig_hdr = headers.get(HEADER_SIG).and_then(|v| v.to_str().ok());
+    let id_hdr = headers.get(HEADER_ID).and_then(|v| v.to_str().ok());
+
+    // No signature headers at all → legacy path. Even then we still need to
+    // check that the peer (if it claims an id in the body) isn't marked
+    // `managed=1`. The handler doesn't know the body id yet, so we defer
+    // the managed-check to a second call (`enforce_managed_for_id`) after
+    // the handler has parsed the body. Returning LegacyUnsigned here just
+    // means "no sig present, you must call enforce_managed_for_id next".
+    let (sig_hdr, id_hdr) = match (sig_hdr, id_hdr) {
+        (Some(s), Some(i)) if !s.is_empty() && !i.is_empty() => (s, i),
+        (None, None) => return Ok(AuthOutcome::LegacyUnsigned),
+        // Partial headers: someone tried to sign but messed up the request.
+        // Don't fall through to legacy — treat as an outright failure so we
+        // don't silently downgrade a misconfigured agent.
+        _ => return Err(ApiError::Unauthorized),
+    };
+
+    // Parse "v1.<ts>.<b64>".
+    let mut parts = sig_hdr.splitn(3, '.');
+    let ver = parts.next().unwrap_or("");
+    let ts_s = parts.next().unwrap_or("");
+    let sig_b64 = parts.next().unwrap_or("");
+    if ver != SIG_VERSION || ts_s.is_empty() || sig_b64.is_empty() {
+        return Err(ApiError::Unauthorized);
+    }
+    let ts: i64 = ts_s.parse().map_err(|_| ApiError::Unauthorized)?;
+    let now = chrono::Utc::now().timestamp();
+    if (now - ts).abs() > SKEW_TOLERANCE_SECS {
+        return Err(ApiError::Unauthorized);
+    }
+    let sig_bytes = base64::decode(sig_b64).map_err(|_| ApiError::Unauthorized)?;
+
+    // Replay check before the expensive crypto. The (id, ts, sig-prefix)
+    // tuple is unique per request from a non-broken agent.
+    let replay_key = {
+        let prefix: String = sig_b64.chars().take(32).collect();
+        format!("{}|{}|{}", id_hdr, ts, prefix)
+    };
+    {
+        let mut cache = REPLAY.lock().unwrap();
+        cache.retain(|_, exp| *exp > now);
+        if cache.contains_key(&replay_key) {
+            return Err(ApiError::Unauthorized);
+        }
+        if cache.len() < REPLAY_CACHE_MAX {
+            cache.insert(replay_key, now + REPLAY_WINDOW_SECS);
+        }
+        // If the cache is full we accept (no DoS via cache exhaustion). The
+        // 5-min skew window already bounds replay risk.
+    }
+
+    // Look up the peer's pk and managed flag in one query.
+    let (pk_bytes, managed) = state
+        .db
+        .peer_get_auth(id_hdr)
+        .await
+        .map_err(|e| ApiError::Internal(e.to_string()))?
+        .ok_or(ApiError::Unauthorized)?;
+    if pk_bytes.is_empty() {
+        // No PK registered — rendezvous hasn't completed. Can't verify.
+        return Err(ApiError::Unauthorized);
+    }
+
+    // Build the canonical signed message:
+    //   "rd-api-v1\n" || METHOD || "\n" || PATH || "\n" || TS || "\n" || sha256(body)
+    let body_sha = sodiumoxide::crypto::hash::sha256::hash(body);
+    let mut msg = Vec::with_capacity(64 + method.len() + path.len());
+    msg.extend_from_slice(b"rd-api-v1\n");
+    msg.extend_from_slice(method.as_bytes());
+    msg.push(b'\n');
+    msg.extend_from_slice(path.as_bytes());
+    msg.push(b'\n');
+    msg.extend_from_slice(ts_s.as_bytes());
+    msg.push(b'\n');
+    msg.extend_from_slice(body_sha.as_ref());
+
+    let pk = sodiumoxide::crypto::sign::PublicKey::from_slice(&pk_bytes)
+        .ok_or(ApiError::Unauthorized)?;
+    let sig = sodiumoxide::crypto::sign::Signature::from_bytes(&sig_bytes)
+        .map_err(|_| ApiError::Unauthorized)?;
+    if !sodiumoxide::crypto::sign::verify_detached(&sig, &msg, &pk) {
+        return Err(ApiError::Unauthorized);
+    }
+
+    // TOFU promote: first valid sig flips managed=0 → 1. After this, the
+    // same device can no longer fall back to the legacy unsigned path.
+    if !managed {
+        if let Err(e) = state.db.peer_set_managed(id_hdr, true).await {
+            hbb_common::log::warn!("peer_set_managed({}) failed: {}", id_hdr, e);
+            // Don't fail the request — the sig was valid, the promote is
+            // best-effort. Next request will retry the promote.
+        } else {
+            hbb_common::log::info!("peer {} TOFU-promoted to managed=1", id_hdr);
+        }
+    }
+
+    Ok(AuthOutcome::Verified {
+        id: id_hdr.to_string(),
+    })
+}
+
+/// Called by handlers AFTER they've parsed the body and extracted the
+/// device id. Only meaningful when `verify` returned `LegacyUnsigned`.
+/// Enforces: if the peer is currently managed=1, an unsigned request for
+/// that id must be rejected.
+pub async fn enforce_managed_for_id(
+    state: &Arc<AppState>,
+    id: &str,
+) -> Result<(), ApiError> {
+    if id.is_empty() {
+        return Ok(());
+    }
+    let row = state
+        .db
+        .peer_get_auth(id)
+        .await
+        .map_err(|e| ApiError::Internal(e.to_string()))?;
+    match row {
+        Some((_, true)) => Err(ApiError::Unauthorized),
+        _ => Ok(()),
+    }
+}