352 lines
14 KiB
Bash
Executable File
352 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Provisions a Debian 13 (Trixie) container or VM as a Gitea Actions runner
|
|
# that does Authenticode code signing for hello-agent via osslsigncode.
|
|
#
|
|
# Idempotent: safe to re-run. Does NOT generate or import the signing key —
|
|
# operators do that out-of-band after provisioning. The script only sets up
|
|
# the directory layout, ACLs, runner, and systemd sandbox.
|
|
#
|
|
# Designed for an unprivileged Incus/LXC container on a hardened host:
|
|
# * No build toolchains. Smallest possible attack surface.
|
|
# * Service unit is heavily sandboxed (Read*Only*Paths, NoNewPrivileges, …).
|
|
# * Outbound network restriction is the LXC HOST's responsibility — the
|
|
# container itself can't enforce it because nothing inside the namespace
|
|
# is privileged enough to load nf_tables. Configure on the host.
|
|
#
|
|
# Usage:
|
|
# sudo ./provision.sh \
|
|
# --gitea-url https://gitea.example.com \
|
|
# --runner-token <token>
|
|
|
|
set -euo pipefail
|
|
|
|
# ---- pinned versions (mirror .gitea/workflows/build-windows.yml where they overlap) ----
|
|
RUNNER_VERSION="0.2.11"
|
|
NODE_MAJOR="20" # act_runner spawns Node for JS actions (upload/download-artifact)
|
|
|
|
# ---- defaults ----
|
|
RUNNER_NAME="$(hostname)-helloagent-sign"
|
|
# Label suffix `:host` tells act_runner to run jobs directly on this host
|
|
# rather than inside a Docker container (the Linux runner default). The
|
|
# signing runner deliberately has no Docker daemon — its only job is to
|
|
# call osslsigncode and upload, which doesn't need container isolation
|
|
# beyond the LXC + systemd sandbox we already enforce. The workflow's
|
|
# `runs-on: [self-hosted, linux, signing]` matches on label name, so the
|
|
# `:host` qualifier is invisible to workflow authors.
|
|
RUNNER_LABELS="self-hosted:host,linux:host,signing:host"
|
|
SERVICE_USER="hello-signer"
|
|
PKI_DIR="/etc/pki/hello-agent"
|
|
GITEA_URL=""
|
|
RUNNER_TOKEN=""
|
|
|
|
# ---- arg parse ----
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--gitea-url) GITEA_URL="$2"; shift 2 ;;
|
|
--runner-token) RUNNER_TOKEN="$2"; shift 2 ;;
|
|
--runner-name) RUNNER_NAME="$2"; shift 2 ;;
|
|
--runner-labels) RUNNER_LABELS="$2"; shift 2 ;;
|
|
--service-user) SERVICE_USER="$2"; shift 2 ;;
|
|
-h|--help)
|
|
sed -n '2,20p' "$0"
|
|
exit 0 ;;
|
|
*) echo "Unknown arg: $1" >&2; exit 2 ;;
|
|
esac
|
|
done
|
|
|
|
[[ "$EUID" -eq 0 ]] || { echo "Run as root (use sudo)." >&2; exit 1; }
|
|
[[ -n "$GITEA_URL" && -n "$RUNNER_TOKEN" ]] \
|
|
|| { echo "Missing --gitea-url or --runner-token" >&2; exit 2; }
|
|
|
|
. /etc/os-release
|
|
case "${ID}-${VERSION_ID:-}" in
|
|
debian-13|debian-trixie) ;;
|
|
*)
|
|
echo "WARNING: tested only on Debian 13 (Trixie). You're on $PRETTY_NAME."
|
|
sleep 3 ;;
|
|
esac
|
|
|
|
log() { printf '\n==> %s\n' "$*"; }
|
|
|
|
# ---- 1. apt packages (deliberately minimal — no compilers on a signing host) ----
|
|
log "Installing apt packages"
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
apt-get update -qq
|
|
apt-get install -y --no-install-recommends \
|
|
osslsigncode openssl ca-certificates \
|
|
curl wget git \
|
|
sudo gnupg
|
|
|
|
# Node.js (act_runner spawns node for JS actions like actions/download-artifact)
|
|
if ! command -v node >/dev/null; then
|
|
log "Installing Node.js ${NODE_MAJOR} LTS"
|
|
curl -fsSL "https://deb.nodesource.com/setup_${NODE_MAJOR}.x" | bash -
|
|
apt-get install -y --no-install-recommends nodejs
|
|
fi
|
|
|
|
# Sanity-check osslsigncode. Debian 13 ships 2.9, which has -ts (RFC 3161).
|
|
ver="$(osslsigncode --version 2>&1 | awk '/^osslsigncode/ {print $2; exit}')"
|
|
if [[ -z "$ver" ]]; then
|
|
echo "could not parse osslsigncode version" >&2; exit 1
|
|
fi
|
|
log "osslsigncode $ver OK"
|
|
|
|
# ---- 2. dedicated runner user ----
|
|
# We pin the user's home to RUNNER_DIR (defined below in section 4) rather
|
|
# than letting useradd default to /home/$SERVICE_USER. Two reasons:
|
|
#
|
|
# 1. The systemd unit sets ProtectHome=yes, which masks /home, /root,
|
|
# /run/user with empty tmpfs. If HOME points into /home, anything
|
|
# act_runner spawns (Node for JS actions, etc.) inherits a HOME path
|
|
# that doesn't exist from the sandbox's view, and crashes on first
|
|
# cache write with "mkdir /home/<user>: permission denied".
|
|
# 2. The runner user has no real "home" — it's a system account that
|
|
# exists only to run a daemon. Pointing HOME at /var/lib/gitea-runner
|
|
# reflects what's actually true.
|
|
#
|
|
# RUNNER_DIR is hardcoded here (mirrors the section-4 value) because user
|
|
# creation has to happen before we know we'll need to mkdir the dir, but
|
|
# we need the path baked into /etc/passwd up front. Keep these two in sync.
|
|
RUNNER_DIR=/var/lib/gitea-runner
|
|
mkdir -p "$RUNNER_DIR"
|
|
|
|
if ! id -u "$SERVICE_USER" >/dev/null 2>&1; then
|
|
log "Creating system user $SERVICE_USER (home=$RUNNER_DIR)"
|
|
# No login shell on purpose: this user only runs systemd's exec, never logs in.
|
|
# --no-create-home: we already mkdir'd RUNNER_DIR; useradd would fail
|
|
# trying to copy /etc/skel into a non-empty dir.
|
|
useradd --system \
|
|
--home-dir "$RUNNER_DIR" \
|
|
--no-create-home \
|
|
--shell /usr/sbin/nologin \
|
|
"$SERVICE_USER"
|
|
else
|
|
# Existing user from a pre-fix provision run: re-point home to
|
|
# RUNNER_DIR if it isn't already. Fixes deployments that hit the
|
|
# ProtectHome=yes / HOME=/home/<user> mismatch.
|
|
current_home="$(getent passwd "$SERVICE_USER" | cut -d: -f6)"
|
|
if [[ "$current_home" != "$RUNNER_DIR" ]]; then
|
|
log "Re-pointing $SERVICE_USER home: $current_home -> $RUNNER_DIR"
|
|
usermod --home "$RUNNER_DIR" "$SERVICE_USER"
|
|
# If the legacy home is empty (the common case — runner state lives
|
|
# under RUNNER_DIR, not under /home), remove it. If it has content
|
|
# for some reason, leave it alone for the operator to inspect.
|
|
if [[ -d "$current_home" && -z "$(ls -A "$current_home" 2>/dev/null)" ]]; then
|
|
rmdir "$current_home" || true
|
|
fi
|
|
fi
|
|
fi
|
|
RUNNER_HOME="$(getent passwd "$SERVICE_USER" | cut -d: -f6)"
|
|
|
|
# ---- 3. PKI directory ----
|
|
# Layout:
|
|
# /etc/pki/hello-agent/
|
|
# chain.pem leaf || intermediate || root 0444 root:root
|
|
# codesign.key PEM private key (or PKCS#11 stub) 0400 root:hello-signer
|
|
#
|
|
# Why root owns the key file but hello-signer can read it: prevents the
|
|
# runner user from rewriting / deleting the key (rotate operations require
|
|
# root), while still letting osslsigncode open it for signing.
|
|
log "Preparing PKI directory at $PKI_DIR"
|
|
install -d -m 0755 -o root -g root "$PKI_DIR"
|
|
|
|
# Touch stub files if they don't exist yet so systemd's ReadOnlyPaths
|
|
# resolves cleanly on first start. Operator overwrites these post-provision.
|
|
[[ -f "$PKI_DIR/chain.pem" ]] || install -m 0444 -o root -g root /dev/null "$PKI_DIR/chain.pem"
|
|
[[ -f "$PKI_DIR/codesign.key" ]] || install -m 0400 -o root -g "$SERVICE_USER" /dev/null "$PKI_DIR/codesign.key"
|
|
|
|
# Re-assert ACLs unconditionally — defends against an operator copying files
|
|
# in with overly-permissive umask.
|
|
chmod 0755 "$PKI_DIR"
|
|
chown root:root "$PKI_DIR/chain.pem"; chmod 0444 "$PKI_DIR/chain.pem"
|
|
chown root:"$SERVICE_USER" "$PKI_DIR/codesign.key"; chmod 0400 "$PKI_DIR/codesign.key"
|
|
|
|
# ---- 4. act_runner ----
|
|
# RUNNER_DIR was already defined and mkdir'd in section 2 (we needed it
|
|
# before useradd to set the user's home). Just re-assert ownership now
|
|
# that the user exists.
|
|
chown -R "$SERVICE_USER:$SERVICE_USER" "$RUNNER_DIR"
|
|
|
|
if [[ ! -x "$RUNNER_DIR/act_runner" ]]; then
|
|
log "Downloading act_runner $RUNNER_VERSION"
|
|
case "$(uname -m)" in
|
|
x86_64) arch_label="amd64" ;;
|
|
aarch64) arch_label="arm64" ;;
|
|
*) echo "Unsupported arch: $(uname -m)" >&2; exit 1 ;;
|
|
esac
|
|
curl -fsSL -o "$RUNNER_DIR/act_runner" \
|
|
"https://gitea.com/gitea/act_runner/releases/download/v${RUNNER_VERSION}/act_runner-${RUNNER_VERSION}-linux-${arch_label}"
|
|
chmod +x "$RUNNER_DIR/act_runner"
|
|
chown "$SERVICE_USER:$SERVICE_USER" "$RUNNER_DIR/act_runner"
|
|
fi
|
|
|
|
if [[ ! -f "$RUNNER_DIR/.runner" ]]; then
|
|
log "Registering runner with $GITEA_URL (labels: $RUNNER_LABELS)"
|
|
sudo -u "$SERVICE_USER" -H bash -c "
|
|
cd '$RUNNER_DIR' && \
|
|
./act_runner register --no-interactive \
|
|
--instance '$GITEA_URL' \
|
|
--token '$RUNNER_TOKEN' \
|
|
--name '$RUNNER_NAME' \
|
|
--labels '$RUNNER_LABELS'
|
|
"
|
|
fi
|
|
|
|
# act_runner config.yaml: pin host-mode workdir under RUNNER_DIR.
|
|
#
|
|
# Without this, host-mode jobs default to /workspace/<owner>/<repo> as
|
|
# $GITHUB_WORKSPACE — a path that doesn't exist and, under the systemd
|
|
# ProtectSystem=strict + ReadWritePaths=$RUNNER_DIR sandbox below, can't
|
|
# be created. The first JS action that writes there (e.g. actions/download-
|
|
# artifact populating ./incoming) fails with EROFS and the job dies before
|
|
# osslsigncode is ever invoked.
|
|
WORKDIR_PARENT="$RUNNER_DIR/workspace"
|
|
install -d -m 0755 -o "$SERVICE_USER" -g "$SERVICE_USER" "$WORKDIR_PARENT"
|
|
|
|
CONFIG_FILE="$RUNNER_DIR/config.yaml"
|
|
if [[ ! -f "$CONFIG_FILE" ]]; then
|
|
log "Writing act_runner config at $CONFIG_FILE"
|
|
cat > "$CONFIG_FILE" <<EOF
|
|
log:
|
|
level: info
|
|
runner:
|
|
capacity: 1
|
|
host:
|
|
workdir_parent: $WORKDIR_PARENT
|
|
EOF
|
|
chown "$SERVICE_USER:$SERVICE_USER" "$CONFIG_FILE"
|
|
chmod 0644 "$CONFIG_FILE"
|
|
fi
|
|
|
|
# ---- 5. systemd unit (heavily sandboxed) ----
|
|
#
|
|
# Why these flags: the signing runner does almost nothing — pulls a PE file,
|
|
# calls osslsigncode, uploads. So we can lock it down far more than the
|
|
# rustdesk build runner.
|
|
#
|
|
# Notable omissions:
|
|
# * NO MemoryDenyWriteExecute=yes — Node.js (V8 JIT) needs w+x mappings.
|
|
# Action runners that invoke JS actions (download-artifact etc.) break
|
|
# under MDWX. The other sandbox flags still cover the realistic
|
|
# post-exploitation paths.
|
|
# * PrivateDevices=yes is fine for software-key signing. If you migrate to
|
|
# a USB hardware token (YubiKey via opensc-pkcs11), set PrivateDevices=no
|
|
# and add a DeviceAllow= line for /dev/bus/usb/<bus>/<dev>.
|
|
log "Installing systemd unit"
|
|
cat > /etc/systemd/system/gitea-act-runner.service <<EOF
|
|
[Unit]
|
|
Description=Gitea Actions runner (hello-agent code signing)
|
|
After=network-online.target
|
|
Wants=network-online.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=${SERVICE_USER}
|
|
WorkingDirectory=${RUNNER_DIR}
|
|
ExecStart=${RUNNER_DIR}/act_runner daemon --config ${CONFIG_FILE}
|
|
Restart=on-failure
|
|
RestartSec=5
|
|
|
|
# --- sandbox ---
|
|
NoNewPrivileges=yes
|
|
PrivateTmp=yes
|
|
PrivateDevices=yes
|
|
ProtectSystem=strict
|
|
ProtectHome=yes
|
|
ProtectKernelTunables=yes
|
|
ProtectKernelModules=yes
|
|
ProtectKernelLogs=yes
|
|
ProtectControlGroups=yes
|
|
ProtectClock=yes
|
|
ProtectHostname=yes
|
|
RestrictNamespaces=yes
|
|
RestrictRealtime=yes
|
|
RestrictSUIDSGID=yes
|
|
LockPersonality=yes
|
|
SystemCallArchitectures=native
|
|
# No SystemCallFilter=. We tried @system-service with various exclusions and
|
|
# Node 20 (spawned by act_runner for JS actions) hits a syscall outside the
|
|
# allowed set, getting killed with SIGSYS ("signal: bad system call") before
|
|
# producing any stderr — a silent kill that's miserable to diagnose. The
|
|
# other sandbox flags above (NoNewPrivileges, ProtectSystem=strict,
|
|
# ProtectHome, RestrictNamespaces, RestrictSUIDSGID, LockPersonality, plus
|
|
# the LXC and host-firewall layers) already cover the realistic threats for
|
|
# a signing-only service. Re-enable a tightened seccomp policy here only
|
|
# after auditing the exact syscalls Node + osslsigncode use end-to-end.
|
|
|
|
# --- filesystem access ---
|
|
ReadWritePaths=${RUNNER_DIR}
|
|
ReadOnlyPaths=${PKI_DIR}
|
|
|
|
# --- network ---
|
|
# Pull-mode runner: never binds. Disallow listening implicitly via
|
|
# RestrictAddressFamilies (no AF_PACKET, etc.). The host firewall enforces
|
|
# *which* outbound destinations are reachable.
|
|
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
|
|
|
LimitNOFILE=65535
|
|
TasksMax=512
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
systemctl daemon-reload
|
|
systemctl enable gitea-act-runner.service
|
|
systemctl restart gitea-act-runner.service
|
|
|
|
log "Done."
|
|
cat <<EOF
|
|
|
|
----------------------------------------------------------------
|
|
Next steps (manual, on this host):
|
|
|
|
1. Import the signing key + cert chain.
|
|
|
|
The provisioning intentionally does NOT pull these from anywhere —
|
|
keys must move under operator control. Once you have them locally:
|
|
|
|
# Public chain (leaf || intermediate || root):
|
|
install -m 0444 -o root -g root \\
|
|
/path/to/chain.pem ${PKI_DIR}/chain.pem
|
|
|
|
# Private key:
|
|
install -m 0400 -o root -g ${SERVICE_USER} \\
|
|
/path/to/codesign.key ${PKI_DIR}/codesign.key
|
|
|
|
# Sanity-check the cert subject, EKU, and expiry:
|
|
openssl x509 -in ${PKI_DIR}/chain.pem -noout \\
|
|
-subject -enddate -ext extendedKeyUsage
|
|
|
|
Required: extendedKeyUsage MUST contain "Code Signing" and NOTHING ELSE.
|
|
|
|
2. Smoke-test signing as the runner user (uses an empty PE — fails fast
|
|
but proves osslsigncode can read the key):
|
|
|
|
sudo -u ${SERVICE_USER} osslsigncode sign \\
|
|
-certs ${PKI_DIR}/chain.pem \\
|
|
-key ${PKI_DIR}/codesign.key \\
|
|
-h sha256 \\
|
|
-in /usr/bin/osslsigncode -out /tmp/signtest.exe \\
|
|
&& echo "OK: signing key reachable" \\
|
|
|| echo "FAIL: check perms and PEM format"
|
|
|
|
3. Confirm the runner came online:
|
|
|
|
systemctl status gitea-act-runner
|
|
journalctl -u gitea-act-runner -n 50 --no-pager
|
|
# Then check ${GITEA_URL} > Site Admin > Actions > Runners
|
|
# for "${RUNNER_NAME}" with labels "${RUNNER_LABELS}"
|
|
|
|
4. Lock the LXC HOST firewall down. Outbound from the container should
|
|
reach ONLY:
|
|
- your Gitea instance (HTTPS, your Gitea host)
|
|
- the RFC 3161 timestamp authority (HTTP, e.g. timestamp.digicert.com)
|
|
- apt + node mirrors (HTTPS, only during provisioning;
|
|
revoke after first successful run)
|
|
Drop all inbound. Configure on the host (nftables / Proxmox firewall /
|
|
Incus proxy device) — the container can't enforce this on itself.
|
|
|
|
----------------------------------------------------------------
|
|
EOF
|