Implement auto-update routine
build-windows / build-hello-agent-x64 (push) Successful in 5m5s
build-windows / sign-hello-agent-x64 (push) Successful in 5s
build-windows / validate-hello-agent-x64 (push) Successful in 6s

This commit is contained in:
2026-05-21 13:34:02 +02:00
parent d10e547b70
commit e45abbe64d
10 changed files with 654 additions and 201 deletions
+268 -69
View File
@@ -3,7 +3,7 @@
// Three responsibilities:
//
// 1. `install()` — copy the binary to %ProgramFiles%\hello-agent, mirror the
// calling user's `HelloAgent.toml` into the LocalService-effective
// calling user's `hello-agent.toml` into the LocalService-effective
// config dir so the SYSTEM service inherits the --config blob, register
// the service with the SCM pointing at the installed exe, and start it.
// Idempotent.
@@ -29,14 +29,21 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use windows_service::service::{
ServiceAccess, ServiceControl, ServiceControlAccept, ServiceErrorControl, ServiceExitCode,
ServiceAccess, ServiceAction, ServiceActionType, ServiceControl, ServiceControlAccept,
ServiceErrorControl, ServiceExitCode, ServiceFailureActions, ServiceFailureResetPeriod,
ServiceInfo, ServiceStartType, ServiceState, ServiceStatus, ServiceType,
};
use windows_service::service_control_handler::{self, ServiceControlHandlerResult};
use windows_service::service_dispatcher;
use windows_service::service_manager::{ServiceManager, ServiceManagerAccess};
const SERVICE_NAME: &str = "HelloAgent";
/// Internal service name registered with the SCM. Must equal `crate::APP_NAME`
/// because upstream `librustdesk::platform::is_self_service_running` queries
/// `is_service_running(&crate::get_app_name())` — i.e. it looks up the
/// service whose name *is* the app name. If these diverge, the `--update`
/// path's `sc stop` / `sc start` use the wrong name and the service is
/// left in a Stopped state after a self-update.
const SERVICE_NAME: &str = crate::APP_NAME;
const DISPLAY_NAME: &str = "HelloAgent Remote Support";
const SERVICE_DESCRIPTION: &str =
"HelloAgent — headless remote-support agent (RustDesk-protocol-compatible). \
@@ -47,6 +54,11 @@ const SERVICE_TYPE: ServiceType = ServiceType::OWN_PROCESS;
const INSTALL_SUBDIR: &str = "hello-agent";
const INSTALLED_EXE_NAME: &str = "hello-agent.exe";
/// Display name used for the Windows Firewall rule. Stable across versions
/// so `--uninstall` (or a re-install that clears it before re-adding) can
/// find and delete the prior entry by name.
const FIREWALL_RULE_NAME: &str = "HelloAgent";
// ----------------------------- paths ---------------------------------------
/// `%ProgramFiles%\hello-agent`. Falls back to `C:\Program Files\hello-agent`
@@ -68,9 +80,9 @@ fn install_dir() -> PathBuf {
/// Note the trailing `config` segment: `directories_next::ProjectDirs`,
/// which hbb_common uses on Windows, appends a literal `\config` to the
/// app's roaming dir (so the user-side path is
/// `%APPDATA%\HelloAgent\config\HelloAgent.toml`, not
/// `…\HelloAgent\…`). The SYSTEM-side path follows the same convention.
/// The `HelloAgent` segment is sourced from `crate::APP_NAME` so it stays
/// `%APPDATA%\hello-agent\config\hello-agent.toml`, not
/// `…\hello-agent\…`). The SYSTEM-side path follows the same convention.
/// The `hello-agent` segment is sourced from `crate::APP_NAME` so it stays
/// in lockstep with the `APP_NAME` we install into hbb_common at startup.
fn service_config_dir() -> PathBuf {
let system_root = std::env::var_os("SystemRoot")
@@ -88,11 +100,15 @@ fn service_config_dir() -> PathBuf {
// ----------------------------- install --------------------------------------
pub fn install() -> Result<()> {
// Probe-open the SCM with CREATE_SERVICE rights up front; if the caller
// isn't elevated this fails with ERROR_ACCESS_DENIED (raw_os_error == 5)
// and we surface a single human-readable message instead of bubbling
// up a Win32 errno string. Anything else propagates as-is.
let scm = ServiceManager::local_computer(
None::<&str>,
ServiceManagerAccess::CONNECT | ServiceManagerAccess::CREATE_SERVICE,
)
.context("open SCM")?;
.map_err(map_scm_open_error)?;
// 1. If a previous install left a running service, stop it before we
// overwrite its binary. Otherwise the file copy in step 2 fails
@@ -106,8 +122,8 @@ pub fn install() -> Result<()> {
// idempotent / usable as an in-place update — without it, the
// `stage_binary` file copy below fails with "access denied"
// whenever a `--cm` child is still holding the old exe open.
// `kill_orphan_processes` uses taskkill with `/FI "PID ne <ours>"`
// so it never kills the running installer.
// `kill_orphan_processes` walks the process table via sysinfo and
// filters out our own pid so the installer doesn't suicide.
kill_orphan_processes();
// 2. Pin the binary to %ProgramFiles%\hello-agent. The user might be
@@ -120,17 +136,17 @@ pub fn install() -> Result<()> {
// first, fall back to popup). Older hello-agent installs wrote
// "click" here, which disabled the password path; clearing it
// every install makes upgrades idempotent. These write into the
// *calling user's* %APPDATA%\HelloAgent\ — we mirror the result
// *calling user's* %APPDATA%\hello-agent\ — we mirror the result
// into the service's effective dir in step 4.
hbb_common::config::Config::set_option("stop-service".into(), "".into());
hbb_common::config::Config::set_option("approve-mode".into(), "".into());
// 4. Mirror the calling user's `HelloAgent.toml` / `HelloAgent2.toml`
// 4. Mirror the calling user's `hello-agent.toml` / `hello-agent2.toml`
// into the LocalService-effective config root that the SYSTEM
// service will actually read. Without this, --config writes to e.g.
// C:\Users\Admin\AppData\Roaming\HelloAgent\, but the service runs
// C:\Users\Admin\AppData\Roaming\hello-agent\, but the service runs
// as LocalSystem and (via hbb_common's `patch()`) reads from
// C:\Windows\ServiceProfiles\LocalService\AppData\Roaming\HelloAgent\.
// C:\Windows\ServiceProfiles\LocalService\AppData\Roaming\hello-agent\.
if let Err(e) = mirror_config_to_service_dir() {
log::warn!(
"could not mirror config to service dir ({e:#}); the service may not see --config until first heartbeat"
@@ -183,6 +199,60 @@ pub fn install() -> Result<()> {
let _ = svc.set_description(SERVICE_DESCRIPTION);
// 5b. Configure SCM auto-restart on unexpected exit. Without this,
// a panic in the `--service` supervisor leaves the agent permanently
// Stopped until the host reboots. The schedule restarts after
// 5s, 30s, 60s and gives up after that; the failure-count reset
// window is one day, so transient hiccups don't accumulate and
// stable hosts converge back to "running" within a minute.
//
// `set_failure_actions_on_non_crash_failures(true)` is what makes
// these actions fire when the service exits cleanly with a non-zero
// code (panic via abort, for instance), not just on outright
// crashes detected by the SCM. Both are best-effort; the SCM
// accepts the call but doesn't error if the underlying ChangeServiceConfig2
// fails for some reason — we log and continue.
let failure_actions = ServiceFailureActions {
reset_period: ServiceFailureResetPeriod::After(Duration::from_secs(60 * 60 * 24)),
reboot_msg: None,
command: None,
actions: Some(vec![
ServiceAction {
action_type: ServiceActionType::Restart,
delay: Duration::from_secs(5),
},
ServiceAction {
action_type: ServiceActionType::Restart,
delay: Duration::from_secs(30),
},
ServiceAction {
action_type: ServiceActionType::Restart,
delay: Duration::from_secs(60),
},
]),
};
if let Err(e) = svc.update_failure_actions(failure_actions) {
log::warn!("could not set SCM failure actions ({e}); auto-restart-on-crash disabled");
}
if let Err(e) = svc.set_failure_actions_on_non_crash_failures(true) {
log::warn!(
"could not enable failure actions for clean-exit-with-error ({e}); only hard crashes will trigger restart"
);
}
// 5c. Allow inbound TCP/UDP to hello-agent.exe at the Windows Firewall.
// A vanilla deploy doesn't actually need it (the rendezvous/relay
// connections are outbound), but operators who enable `direct-server`
// (TCP 21118) or `enable-lan-discovery` (UDP 21119) via the --config
// blob need this rule or those features silently fail. Cheaper to
// add it always than to discover at support-call time that the
// deploy never matched a firewall rule. Best-effort: if netsh
// isn't present (extremely stripped-down server SKUs) we log and
// continue.
if let Err(e) = install_firewall_rule(&target_exe) {
log::warn!("could not install firewall rule ({e:#}); inbound connections may be blocked");
}
// 6. Start the service. (Step 1 already stopped any prior instance.)
svc.start::<&str>(&[]).context("start service")?;
log::info!(
@@ -250,7 +320,7 @@ fn stage_binary() -> Result<PathBuf> {
Ok(dest)
}
/// Copy the calling user's `HelloAgent.toml` + `HelloAgent2.toml` into
/// Copy the calling user's `hello-agent.toml` + `hello-agent2.toml` into
/// the LocalService-effective config dir so the SYSTEM service sees them.
fn mirror_config_to_service_dir() -> Result<()> {
let dest_dir = service_config_dir();
@@ -272,7 +342,7 @@ fn mirror_config_to_service_dir() -> Result<()> {
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
// Calling user never had this file (e.g. --install without
// --config, or first ever run on this machine, or the user
// wiped %APPDATA%\HelloAgent\ between tests). Logged at
// wiped %APPDATA%\hello-agent\ between tests). Logged at
// info so the post-install log shows clearly which toml
// files were available and which weren't.
log::info!(
@@ -298,6 +368,16 @@ fn mirror_config_to_service_dir() -> Result<()> {
// ----------------------------- uninstall ------------------------------------
pub fn uninstall() -> Result<()> {
// Probe-open the SCM with the rights we'll need (CONNECT for the SCM
// handle itself, and DELETE on the per-service open below). The same
// elevation-error mapping as install() — surface a single clear message
// when the operator forgot the elevated prompt.
let scm = ServiceManager::local_computer(
None::<&str>,
ServiceManagerAccess::CONNECT,
)
.map_err(map_scm_open_error)?;
// Kill every hello-agent.exe process except ourselves *first*. We can't
// rely on the SCM Stop control alone because the `--cm` child spawned
// via `run_as_user` runs under the logged-in user's token, not SYSTEM,
@@ -305,15 +385,9 @@ pub fn uninstall() -> Result<()> {
// Doing this up front means the SCM stop below is usually a no-op
// (service process already gone) and the rmdir at the end no longer
// races a lingering child holding hello-agent.exe open. Our own PID
// is excluded via taskkill's `/FI` so the uninstaller doesn't suicide.
// is excluded via the sysinfo filter so the uninstaller doesn't suicide.
kill_orphan_processes();
let scm = ServiceManager::local_computer(
None::<&str>,
ServiceManagerAccess::CONNECT,
)
.context("open SCM")?;
match scm.open_service(
SERVICE_NAME,
ServiceAccess::QUERY_STATUS | ServiceAccess::STOP | ServiceAccess::DELETE,
@@ -343,9 +417,17 @@ pub fn uninstall() -> Result<()> {
Err(e) => return Err(anyhow!("open_service: {e}")),
}
// Remove the firewall rule we installed (best-effort). netsh delete is
// idempotent — if the rule was never there (or someone manually removed
// it) netsh returns 1 with "No rules match the specified criteria",
// which we treat as success.
if let Err(e) = delete_firewall_rule() {
log::warn!("could not delete firewall rule ({e:#}); remove it manually if needed");
}
cleanup_install_dir();
// We deliberately do NOT delete the LocalService config dir here.
// `HelloAgent.toml` in that directory holds the agent's id + keypair,
// `hello-agent.toml` in that directory holds the agent's id + keypair,
// which the rustdesk-server / rendezvous server has registered against
// the agent's id. Wiping it forces the next --install to generate
// fresh keys, which the rendezvous server's cached entry (and any
@@ -354,7 +436,7 @@ pub fn uninstall() -> Result<()> {
// the connection sits idle until the peer times out.
//
// Operators who want a true hard wipe can run:
// rmdir /s /q "%SystemRoot%\ServiceProfiles\LocalService\AppData\Roaming\HelloAgent"
// rmdir /s /q "%SystemRoot%\ServiceProfiles\LocalService\AppData\Roaming\hello-agent"
// and then delete the device record from the rustdesk-server admin UI.
log::info!("preserved LocalService config dir to keep agent keys/id stable across reinstalls");
Ok(())
@@ -365,58 +447,175 @@ pub fn uninstall() -> Result<()> {
/// old `--cm` child holding the exe open) and `--uninstall` (so the
/// rmdir at the end isn't racing a lingering child).
///
/// Shells out to the built-in `taskkill` rather than re-implementing the
/// Toolhelp32 enumeration in winapi: taskkill ships in every Windows
/// install since XP, runs in milliseconds, and the `/FI "PID ne <ours>"`
/// filter handles the "don't suicide ourselves" requirement declaratively.
///
/// Exit code 128 from taskkill means "no matching processes" — common
/// case when there's no orphan to clean up — and we treat it the same
/// as success. Anything else gets logged but does not fail the caller.
/// Walks the process table via `hbb_common::sysinfo` (the same enumerator
/// the vendored rustdesk uses internally) and calls `Process::kill` —
/// equivalent to `TerminateProcess` under the hood. After issuing the
/// kills we poll the process table for actual exit rather than guessing
/// at a 500 ms sleep: `TerminateProcess` marks the process as exited but
/// the kernel takes a variable amount of time to release the image-file
/// handle, and we only want to return once those handles are gone (so
/// the install-time file copy and uninstall-time rmdir don't race a
/// half-finalized victim).
fn kill_orphan_processes() {
// hbb_common pulls the rustdesk-org sysinfo 0.29 fork, which exposes
// System/Process/Pid with inherent methods (no SystemExt/ProcessExt
// trait imports needed — that style was removed when this fork
// diverged from upstream 0.30).
use hbb_common::sysinfo::{Pid, System};
let our_pid = std::process::id();
let pid_filter = format!("PID ne {our_pid}");
let output = std::process::Command::new("taskkill")
.args([
"/F",
"/IM",
INSTALLED_EXE_NAME,
"/FI",
&pid_filter,
])
.output();
match output {
Ok(out) => {
let code = out.status.code();
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
if out.status.success() {
log::info!(
"taskkill killed orphan {INSTALLED_EXE_NAME} processes (excluding pid {our_pid}): {}",
stdout.trim()
);
// TerminateProcess is synchronous w.r.t. the kernel marking
// the process as exited, but kernel-mode finalization
// (releasing file handles, paging out the image section)
// can lag by up to a few hundred ms. The rmdir that follows
// races against this: without the pause, an immediate
// remove_dir_all can still see "file in use" on the just-
// killed process's exe.
std::thread::sleep(Duration::from_millis(500));
} else if code == Some(128) {
log::info!("no orphan {INSTALLED_EXE_NAME} processes to kill");
let target = INSTALLED_EXE_NAME;
let mut system = System::new();
system.refresh_processes();
let victims: Vec<Pid> = system
.processes()
.iter()
.filter(|(pid, p)| {
pid.as_u32() != our_pid && p.name().eq_ignore_ascii_case(target)
})
.map(|(pid, _)| *pid)
.collect();
if victims.is_empty() {
log::info!("no orphan {target} processes to kill");
return;
}
let killed: Vec<u32> = victims
.iter()
.filter_map(|pid| {
let process = system.process(*pid)?;
if process.kill() {
Some(pid.as_u32())
} else {
log::warn!(
"taskkill returned {code:?}: stdout={} stderr={}",
stdout.trim(),
stderr.trim(),
);
log::warn!("Process::kill failed for pid {}", pid.as_u32());
None
}
})
.collect();
log::info!("issued kill on {} {target} process(es): {killed:?}", killed.len());
// Poll for actual exit. 5 s ceiling is generous (TerminateProcess
// usually finalizes within tens of ms) but cheap — we only burn it
// when the kernel really is dragging its feet, which is the exact
// case the old `sleep(500ms)` heuristic couldn't handle.
let deadline = Instant::now() + Duration::from_secs(5);
while Instant::now() < deadline {
system.refresh_processes();
let still_alive = victims.iter().any(|pid| system.process(*pid).is_some());
if !still_alive {
return;
}
Err(e) => {
log::warn!("could not invoke taskkill: {e}");
std::thread::sleep(Duration::from_millis(50));
}
log::warn!(
"some {target} processes were still alive after 5 s; subsequent file ops may fail with sharing violation"
);
}
/// Translate a `windows_service::Error` from `ServiceManager::local_computer`
/// into a friendlier user-facing message. ERROR_ACCESS_DENIED (Win32 err 5)
/// is the overwhelmingly common case — operator forgot to elevate — and
/// deserves a single clear line rather than the raw Win32 errno string.
fn map_scm_open_error(e: windows_service::Error) -> anyhow::Error {
if let windows_service::Error::Winapi(ref ioe) = e {
if ioe.raw_os_error() == Some(5) {
return anyhow!(
"requires an elevated (Administrator) prompt — re-run from \"Run as administrator\""
);
}
}
anyhow!("open SCM: {e}")
}
/// Add a Windows Firewall rule allowing inbound TCP/UDP to the installed
/// hello-agent.exe. Idempotent: we delete any prior rule by the same name
/// first, so re-running --install (or upgrading in place) doesn't pile up
/// duplicate entries in the firewall's per-name list.
///
/// We use the program-scoped form (`program=<path>`) rather than port-scoped
/// rules because hello-agent's optional listeners (direct-server TCP 21118,
/// LAN-discovery UDP 21119) are gated on operator-controlled config flags;
/// rule-by-program covers whatever ports the agent actually decides to bind.
fn install_firewall_rule(exe_path: &PathBuf) -> Result<()> {
// Drop any pre-existing rule first; netsh quietly succeeds-with-exit-1
// when nothing matches, so we ignore the result.
let _ = run_netsh(&[
"advfirewall",
"firewall",
"delete",
"rule",
&format!("name={FIREWALL_RULE_NAME}"),
]);
let program_arg = format!(
"program={}",
exe_path.to_str().ok_or_else(|| anyhow!(
"non-UTF-8 install path can't be passed to netsh: {}",
exe_path.display()
))?
);
let status = run_netsh(&[
"advfirewall",
"firewall",
"add",
"rule",
&format!("name={FIREWALL_RULE_NAME}"),
"dir=in",
"action=allow",
"enable=yes",
"profile=any",
&program_arg,
])?;
if !status {
return Err(anyhow!("netsh add rule failed"));
}
log::info!(
"added firewall rule '{FIREWALL_RULE_NAME}' for {}",
exe_path.display()
);
Ok(())
}
/// Remove the hello-agent firewall rule by name. netsh exits non-zero when
/// no rule matches; we translate that into success since the post-condition
/// (no rule by that name) is what we want anyway.
fn delete_firewall_rule() -> Result<()> {
let status = run_netsh(&[
"advfirewall",
"firewall",
"delete",
"rule",
&format!("name={FIREWALL_RULE_NAME}"),
]);
match status {
Ok(_) => {
log::info!("removed firewall rule '{FIREWALL_RULE_NAME}' (or none was present)");
Ok(())
}
Err(e) => Err(e),
}
}
/// Shell out to netsh.exe with the given args. Returns Ok(true) on
/// exit-0, Ok(false) on a non-zero exit that *netsh itself* produced
/// (e.g. "rule already exists" or "no rules match"), and Err only when
/// the binary couldn't be invoked at all (PATH stripped, etc.).
fn run_netsh(args: &[&str]) -> Result<bool> {
let out = std::process::Command::new("netsh")
.args(args)
.output()
.context("invoke netsh")?;
if !out.status.success() {
let stderr = String::from_utf8_lossy(&out.stderr);
log::debug!(
"netsh {args:?} exited {:?}: {}",
out.status.code(),
stderr.trim()
);
}
Ok(out.status.success())
}
/// Remove %ProgramFiles%\hello-agent. Best-effort: if the user ran