Files
hello-agent/ci/runners/linux/provision-signer.sh

352 lines
14 KiB
Bash
Executable File

#!/usr/bin/env bash
# Provisions a Debian 13 (Trixie) container or VM as a Gitea Actions runner
# that does Authenticode code signing for hello-agent via osslsigncode.
#
# Idempotent: safe to re-run. Does NOT generate or import the signing key —
# operators do that out-of-band after provisioning. The script only sets up
# the directory layout, ACLs, runner, and systemd sandbox.
#
# Designed for an unprivileged Incus/LXC container on a hardened host:
# * No build toolchains. Smallest possible attack surface.
# * Service unit is heavily sandboxed (Read*Only*Paths, NoNewPrivileges, …).
# * Outbound network restriction is the LXC HOST's responsibility — the
# container itself can't enforce it because nothing inside the namespace
# is privileged enough to load nf_tables. Configure on the host.
#
# Usage:
# sudo ./provision.sh \
# --gitea-url https://gitea.example.com \
# --runner-token <token>
set -euo pipefail
# ---- pinned versions (mirror .gitea/workflows/build-windows.yml where they overlap) ----
RUNNER_VERSION="0.2.11"
NODE_MAJOR="20" # act_runner spawns Node for JS actions (upload/download-artifact)
# ---- defaults ----
RUNNER_NAME="$(hostname)-helloagent-sign"
# Label suffix `:host` tells act_runner to run jobs directly on this host
# rather than inside a Docker container (the Linux runner default). The
# signing runner deliberately has no Docker daemon — its only job is to
# call osslsigncode and upload, which doesn't need container isolation
# beyond the LXC + systemd sandbox we already enforce. The workflow's
# `runs-on: [self-hosted, linux, signing]` matches on label name, so the
# `:host` qualifier is invisible to workflow authors.
RUNNER_LABELS="self-hosted:host,linux:host,signing:host"
SERVICE_USER="hello-signer"
PKI_DIR="/etc/pki/hello-agent"
GITEA_URL=""
RUNNER_TOKEN=""
# ---- arg parse ----
while [[ $# -gt 0 ]]; do
case "$1" in
--gitea-url) GITEA_URL="$2"; shift 2 ;;
--runner-token) RUNNER_TOKEN="$2"; shift 2 ;;
--runner-name) RUNNER_NAME="$2"; shift 2 ;;
--runner-labels) RUNNER_LABELS="$2"; shift 2 ;;
--service-user) SERVICE_USER="$2"; shift 2 ;;
-h|--help)
sed -n '2,20p' "$0"
exit 0 ;;
*) echo "Unknown arg: $1" >&2; exit 2 ;;
esac
done
[[ "$EUID" -eq 0 ]] || { echo "Run as root (use sudo)." >&2; exit 1; }
[[ -n "$GITEA_URL" && -n "$RUNNER_TOKEN" ]] \
|| { echo "Missing --gitea-url or --runner-token" >&2; exit 2; }
. /etc/os-release
case "${ID}-${VERSION_ID:-}" in
debian-13|debian-trixie) ;;
*)
echo "WARNING: tested only on Debian 13 (Trixie). You're on $PRETTY_NAME."
sleep 3 ;;
esac
log() { printf '\n==> %s\n' "$*"; }
# ---- 1. apt packages (deliberately minimal — no compilers on a signing host) ----
log "Installing apt packages"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y --no-install-recommends \
osslsigncode openssl ca-certificates \
curl wget git \
sudo gnupg
# Node.js (act_runner spawns node for JS actions like actions/download-artifact)
if ! command -v node >/dev/null; then
log "Installing Node.js ${NODE_MAJOR} LTS"
curl -fsSL "https://deb.nodesource.com/setup_${NODE_MAJOR}.x" | bash -
apt-get install -y --no-install-recommends nodejs
fi
# Sanity-check osslsigncode. Debian 13 ships 2.9, which has -ts (RFC 3161).
ver="$(osslsigncode --version 2>&1 | awk '/^osslsigncode/ {print $2; exit}')"
if [[ -z "$ver" ]]; then
echo "could not parse osslsigncode version" >&2; exit 1
fi
log "osslsigncode $ver OK"
# ---- 2. dedicated runner user ----
# We pin the user's home to RUNNER_DIR (defined below in section 4) rather
# than letting useradd default to /home/$SERVICE_USER. Two reasons:
#
# 1. The systemd unit sets ProtectHome=yes, which masks /home, /root,
# /run/user with empty tmpfs. If HOME points into /home, anything
# act_runner spawns (Node for JS actions, etc.) inherits a HOME path
# that doesn't exist from the sandbox's view, and crashes on first
# cache write with "mkdir /home/<user>: permission denied".
# 2. The runner user has no real "home" — it's a system account that
# exists only to run a daemon. Pointing HOME at /var/lib/gitea-runner
# reflects what's actually true.
#
# RUNNER_DIR is hardcoded here (mirrors the section-4 value) because user
# creation has to happen before we know we'll need to mkdir the dir, but
# we need the path baked into /etc/passwd up front. Keep these two in sync.
RUNNER_DIR=/var/lib/gitea-runner
mkdir -p "$RUNNER_DIR"
if ! id -u "$SERVICE_USER" >/dev/null 2>&1; then
log "Creating system user $SERVICE_USER (home=$RUNNER_DIR)"
# No login shell on purpose: this user only runs systemd's exec, never logs in.
# --no-create-home: we already mkdir'd RUNNER_DIR; useradd would fail
# trying to copy /etc/skel into a non-empty dir.
useradd --system \
--home-dir "$RUNNER_DIR" \
--no-create-home \
--shell /usr/sbin/nologin \
"$SERVICE_USER"
else
# Existing user from a pre-fix provision run: re-point home to
# RUNNER_DIR if it isn't already. Fixes deployments that hit the
# ProtectHome=yes / HOME=/home/<user> mismatch.
current_home="$(getent passwd "$SERVICE_USER" | cut -d: -f6)"
if [[ "$current_home" != "$RUNNER_DIR" ]]; then
log "Re-pointing $SERVICE_USER home: $current_home -> $RUNNER_DIR"
usermod --home "$RUNNER_DIR" "$SERVICE_USER"
# If the legacy home is empty (the common case — runner state lives
# under RUNNER_DIR, not under /home), remove it. If it has content
# for some reason, leave it alone for the operator to inspect.
if [[ -d "$current_home" && -z "$(ls -A "$current_home" 2>/dev/null)" ]]; then
rmdir "$current_home" || true
fi
fi
fi
RUNNER_HOME="$(getent passwd "$SERVICE_USER" | cut -d: -f6)"
# ---- 3. PKI directory ----
# Layout:
# /etc/pki/hello-agent/
# chain.pem leaf || intermediate || root 0444 root:root
# codesign.key PEM private key (or PKCS#11 stub) 0400 root:hello-signer
#
# Why root owns the key file but hello-signer can read it: prevents the
# runner user from rewriting / deleting the key (rotate operations require
# root), while still letting osslsigncode open it for signing.
log "Preparing PKI directory at $PKI_DIR"
install -d -m 0755 -o root -g root "$PKI_DIR"
# Touch stub files if they don't exist yet so systemd's ReadOnlyPaths
# resolves cleanly on first start. Operator overwrites these post-provision.
[[ -f "$PKI_DIR/chain.pem" ]] || install -m 0444 -o root -g root /dev/null "$PKI_DIR/chain.pem"
[[ -f "$PKI_DIR/codesign.key" ]] || install -m 0400 -o root -g "$SERVICE_USER" /dev/null "$PKI_DIR/codesign.key"
# Re-assert ACLs unconditionally — defends against an operator copying files
# in with overly-permissive umask.
chmod 0755 "$PKI_DIR"
chown root:root "$PKI_DIR/chain.pem"; chmod 0444 "$PKI_DIR/chain.pem"
chown root:"$SERVICE_USER" "$PKI_DIR/codesign.key"; chmod 0400 "$PKI_DIR/codesign.key"
# ---- 4. act_runner ----
# RUNNER_DIR was already defined and mkdir'd in section 2 (we needed it
# before useradd to set the user's home). Just re-assert ownership now
# that the user exists.
chown -R "$SERVICE_USER:$SERVICE_USER" "$RUNNER_DIR"
if [[ ! -x "$RUNNER_DIR/act_runner" ]]; then
log "Downloading act_runner $RUNNER_VERSION"
case "$(uname -m)" in
x86_64) arch_label="amd64" ;;
aarch64) arch_label="arm64" ;;
*) echo "Unsupported arch: $(uname -m)" >&2; exit 1 ;;
esac
curl -fsSL -o "$RUNNER_DIR/act_runner" \
"https://gitea.com/gitea/act_runner/releases/download/v${RUNNER_VERSION}/act_runner-${RUNNER_VERSION}-linux-${arch_label}"
chmod +x "$RUNNER_DIR/act_runner"
chown "$SERVICE_USER:$SERVICE_USER" "$RUNNER_DIR/act_runner"
fi
if [[ ! -f "$RUNNER_DIR/.runner" ]]; then
log "Registering runner with $GITEA_URL (labels: $RUNNER_LABELS)"
sudo -u "$SERVICE_USER" -H bash -c "
cd '$RUNNER_DIR' && \
./act_runner register --no-interactive \
--instance '$GITEA_URL' \
--token '$RUNNER_TOKEN' \
--name '$RUNNER_NAME' \
--labels '$RUNNER_LABELS'
"
fi
# act_runner config.yaml: pin host-mode workdir under RUNNER_DIR.
#
# Without this, host-mode jobs default to /workspace/<owner>/<repo> as
# $GITHUB_WORKSPACE — a path that doesn't exist and, under the systemd
# ProtectSystem=strict + ReadWritePaths=$RUNNER_DIR sandbox below, can't
# be created. The first JS action that writes there (e.g. actions/download-
# artifact populating ./incoming) fails with EROFS and the job dies before
# osslsigncode is ever invoked.
WORKDIR_PARENT="$RUNNER_DIR/workspace"
install -d -m 0755 -o "$SERVICE_USER" -g "$SERVICE_USER" "$WORKDIR_PARENT"
CONFIG_FILE="$RUNNER_DIR/config.yaml"
if [[ ! -f "$CONFIG_FILE" ]]; then
log "Writing act_runner config at $CONFIG_FILE"
cat > "$CONFIG_FILE" <<EOF
log:
level: info
runner:
capacity: 1
host:
workdir_parent: $WORKDIR_PARENT
EOF
chown "$SERVICE_USER:$SERVICE_USER" "$CONFIG_FILE"
chmod 0644 "$CONFIG_FILE"
fi
# ---- 5. systemd unit (heavily sandboxed) ----
#
# Why these flags: the signing runner does almost nothing — pulls a PE file,
# calls osslsigncode, uploads. So we can lock it down far more than the
# rustdesk build runner.
#
# Notable omissions:
# * NO MemoryDenyWriteExecute=yes — Node.js (V8 JIT) needs w+x mappings.
# Action runners that invoke JS actions (download-artifact etc.) break
# under MDWX. The other sandbox flags still cover the realistic
# post-exploitation paths.
# * PrivateDevices=yes is fine for software-key signing. If you migrate to
# a USB hardware token (YubiKey via opensc-pkcs11), set PrivateDevices=no
# and add a DeviceAllow= line for /dev/bus/usb/<bus>/<dev>.
log "Installing systemd unit"
cat > /etc/systemd/system/gitea-act-runner.service <<EOF
[Unit]
Description=Gitea Actions runner (hello-agent code signing)
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=${SERVICE_USER}
WorkingDirectory=${RUNNER_DIR}
ExecStart=${RUNNER_DIR}/act_runner daemon --config ${CONFIG_FILE}
Restart=on-failure
RestartSec=5
# --- sandbox ---
NoNewPrivileges=yes
PrivateTmp=yes
PrivateDevices=yes
ProtectSystem=strict
ProtectHome=yes
ProtectKernelTunables=yes
ProtectKernelModules=yes
ProtectKernelLogs=yes
ProtectControlGroups=yes
ProtectClock=yes
ProtectHostname=yes
RestrictNamespaces=yes
RestrictRealtime=yes
RestrictSUIDSGID=yes
LockPersonality=yes
SystemCallArchitectures=native
# No SystemCallFilter=. We tried @system-service with various exclusions and
# Node 20 (spawned by act_runner for JS actions) hits a syscall outside the
# allowed set, getting killed with SIGSYS ("signal: bad system call") before
# producing any stderr — a silent kill that's miserable to diagnose. The
# other sandbox flags above (NoNewPrivileges, ProtectSystem=strict,
# ProtectHome, RestrictNamespaces, RestrictSUIDSGID, LockPersonality, plus
# the LXC and host-firewall layers) already cover the realistic threats for
# a signing-only service. Re-enable a tightened seccomp policy here only
# after auditing the exact syscalls Node + osslsigncode use end-to-end.
# --- filesystem access ---
ReadWritePaths=${RUNNER_DIR}
ReadOnlyPaths=${PKI_DIR}
# --- network ---
# Pull-mode runner: never binds. Disallow listening implicitly via
# RestrictAddressFamilies (no AF_PACKET, etc.). The host firewall enforces
# *which* outbound destinations are reachable.
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
LimitNOFILE=65535
TasksMax=512
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable gitea-act-runner.service
systemctl restart gitea-act-runner.service
log "Done."
cat <<EOF
----------------------------------------------------------------
Next steps (manual, on this host):
1. Import the signing key + cert chain.
The provisioning intentionally does NOT pull these from anywhere —
keys must move under operator control. Once you have them locally:
# Public chain (leaf || intermediate || root):
install -m 0444 -o root -g root \\
/path/to/chain.pem ${PKI_DIR}/chain.pem
# Private key:
install -m 0400 -o root -g ${SERVICE_USER} \\
/path/to/codesign.key ${PKI_DIR}/codesign.key
# Sanity-check the cert subject, EKU, and expiry:
openssl x509 -in ${PKI_DIR}/chain.pem -noout \\
-subject -enddate -ext extendedKeyUsage
Required: extendedKeyUsage MUST contain "Code Signing" and NOTHING ELSE.
2. Smoke-test signing as the runner user (uses an empty PE — fails fast
but proves osslsigncode can read the key):
sudo -u ${SERVICE_USER} osslsigncode sign \\
-certs ${PKI_DIR}/chain.pem \\
-key ${PKI_DIR}/codesign.key \\
-h sha256 \\
-in /usr/bin/osslsigncode -out /tmp/signtest.exe \\
&& echo "OK: signing key reachable" \\
|| echo "FAIL: check perms and PEM format"
3. Confirm the runner came online:
systemctl status gitea-act-runner
journalctl -u gitea-act-runner -n 50 --no-pager
# Then check ${GITEA_URL} > Site Admin > Actions > Runners
# for "${RUNNER_NAME}" with labels "${RUNNER_LABELS}"
4. Lock the LXC HOST firewall down. Outbound from the container should
reach ONLY:
- your Gitea instance (HTTPS, your Gitea host)
- the RFC 3161 timestamp authority (HTTP, e.g. timestamp.digicert.com)
- apt + node mirrors (HTTPS, only during provisioning;
revoke after first successful run)
Drop all inbound. Configure on the host (nftables / Proxmox firewall /
Incus proxy device) — the container can't enforce this on itself.
----------------------------------------------------------------
EOF