diff --git a/.gitea/workflows/build-windows.yml b/.gitea/workflows/build-windows.yml index 35d061c..665f8b1 100644 --- a/.gitea/workflows/build-windows.yml +++ b/.gitea/workflows/build-windows.yml @@ -10,20 +10,32 @@ on: type: string default: "" +# Workflow-level env is visible to every job. Runner-specific paths +# (VCPKG_ROOT, LLVM_HOME, …) live on the build-x64 job instead, since the +# Linux signing runner has no use for them and shouldn't see them. env: - RUST_VERSION: "1.75" - LLVM_VERSION: "15.0.6" - # bindgen (pulled in via scrap → libvpx-sys) reads LIBCLANG_PATH; the runner - # provisioner installs LLVM here. - LLVM_HOME: 'C:\tools\llvm-15.0.6' - VCPKG_COMMIT_ID: "120deac3062162151622ca4860575a33844ba10b" + TIMESTAMP_URL: "http://timestamp.digicert.com" + SIGN_DESCRIPTION: "HelloAgent Remote Support" + SIGN_INFO_URL: "https://cstudio.ch/hello-agent" jobs: + # ---- 1. BUILD on Windows --------------------------------------------------- + # Produces an unsigned hello-agent.exe and a SHA-256 of those bytes that + # the sign job re-checks before signing. The pre-sign hash is the audit + # link between "what the build runner produced" and "what the signer was + # asked to sign" — a transit corruption or tampered artifact gets caught + # at sign time, not at the customer. build-x64: name: build-hello-agent-x64 runs-on: [self-hosted, windows-10] timeout-minutes: 90 + outputs: + version_tag: ${{ steps.version.outputs.tag }} env: + RUST_VERSION: "1.75" + LLVM_VERSION: "15.0.6" + LLVM_HOME: 'C:\tools\llvm-15.0.6' + VCPKG_COMMIT_ID: "120deac3062162151622ca4860575a33844ba10b" VCPKG_ROOT: C:\vcpkg VCPKG_BINARY_SOURCES: "clear;files,C:\\vcpkg-cache,readwrite" LIBCLANG_PATH: 'C:\tools\llvm-15.0.6\bin' @@ -109,54 +121,239 @@ jobs: throw "target\release\hello-agent.exe missing after cargo build" } - - name: Compute version suffix and stage artifact + - name: Compute version tag + id: version shell: pwsh - run: | - $suffix = "${env:VERSION_SUFFIX}" - if ($suffix) { $tag = "0.1.0-$suffix" } else { $tag = "0.1.0" } - New-Item -ItemType Directory -Force -Path .\SignOutput | Out-Null - Copy-Item -Force ` - target\release\hello-agent.exe ` - ".\SignOutput\hello-agent-$tag-x86_64.exe" - Write-Host "staged: SignOutput\hello-agent-$tag-x86_64.exe" env: VERSION_SUFFIX: ${{ inputs.version_suffix }} + run: | + $base = (Select-String -Path Cargo.toml -Pattern '^version = "([^"]+)"').Matches[0].Groups[1].Value + if (-not $base) { throw "could not parse version from Cargo.toml" } + if ($env:VERSION_SUFFIX) { $tag = "$base-$env:VERSION_SUFFIX" } else { $tag = $base } + "tag=$tag" | Out-File -FilePath $env:GITHUB_OUTPUT -Append + Write-Host "Version tag: $tag" - - name: Report signing status of build artifacts + - name: Stage unsigned artifact + record pre-sign hash + shell: pwsh + # The pre-sign hash is the chain-of-custody primitive: build runner + # publishes it with the binary, signing runner re-hashes the binary + # it actually received and refuses to sign on mismatch. Catches + # corruption in transit and tampering between jobs. + run: | + New-Item -ItemType Directory -Force -Path .\BuildOutput | Out-Null + Copy-Item -Force target\release\hello-agent.exe ` + .\BuildOutput\hello-agent.exe + # Lowercase hex + LF-only line ending is the canonical sha256 + # format (matches sha256sum, openssl, etc.). PowerShell's + # Get-FileHash returns uppercase, and Out-File writes CRLF on + # Windows — both bite the Linux signer's hash compare. We use + # WriteAllText with an explicit "`n" so the file ends up exactly + # 65 bytes: 64 hex chars + one LF. + $h = Get-FileHash .\BuildOutput\hello-agent.exe -Algorithm SHA256 + $hash = $h.Hash.ToLower() + [IO.File]::WriteAllText( + "$PWD\BuildOutput\hello-agent.exe.presig.sha256", + "$hash`n") + Write-Host "::notice title=Pre-sign SHA-256::$hash" + + - name: Upload unsigned artifact + uses: actions/upload-artifact@v3 + with: + name: hello-agent-unsigned-${{ github.sha }} + path: | + BuildOutput/hello-agent.exe + BuildOutput/hello-agent.exe.presig.sha256 + if-no-files-found: error + # Short retention — superseded by the signed artifact within a few + # minutes. Keeps the artifact store from filling up with + # unsigned-and-therefore-not-shippable binaries. + retention-days: 1 + + # ---- 2. SIGN on the Linux LXC --------------------------------------------- + # Runs on the dedicated signing host provisioned by ci/runners/linux/provision.sh. + # Has access to /etc/pki/hello-agent/{chain.pem,codesign.key}; doesn't have + # a build toolchain. The runner can sign — it can't produce a binary from + # source. + sign-x64: + name: sign-hello-agent-x64 + needs: build-x64 + runs-on: [self-hosted, linux, signing] + timeout-minutes: 10 + env: + PKI_DIR: /etc/pki/hello-agent + steps: + - name: Download unsigned artifact + uses: actions/download-artifact@v3 + with: + name: hello-agent-unsigned-${{ github.sha }} + path: ./incoming + + - name: Check signing-cert expiry + # Fail HARD if the cert expires within 30 days; warn at 60. Without + # this, an expired cert silently produces a signature that's invalid + # at customer install time (and lifetimeSigning isn't set, but + # signtool still rejects expired-at-sign-time leaves). + run: | + end="$(openssl x509 -in "$PKI_DIR/chain.pem" -noout -enddate | cut -d= -f2)" + end_epoch="$(date -d "$end" +%s)" + now_epoch="$(date +%s)" + days_left=$(( (end_epoch - now_epoch) / 86400 )) + echo "Signing cert NotAfter: $end ($days_left days)" + if [ "$days_left" -lt 30 ]; then + echo "::error title=Signing cert expiring::$days_left days left — rotate before next sign" + exit 1 + elif [ "$days_left" -lt 60 ]; then + echo "::warning title=Signing cert::$days_left days left — schedule rotation" + fi + + - name: Verify pre-sign hash matches received bytes + # Pre-sign hash check: confirms the file we're about to sign matches + # what the build runner emitted. A mismatch means corruption or + # tampering between jobs — abort, do not sign. + # + # `tr -d '\r' | tr A-Z a-z` defensively normalizes the expected + # value: `awk '{print $1}'` strips the trailing LF (its RS) but NOT + # CR, and PowerShell's default Out-File writes CRLF on Windows — + # so without -d '\r' a CRLF-encoded sha256 file ends up with $1 = + # "\r", which compares unequal to the actual sha256sum output + # and renders as visually identical in logs (the \r is a no-op in + # most terminals). Lowercasing covers a similar case-mismatch trap. + # The build job already writes LF + lowercase, but the verifier + # shouldn't trust that contract. + run: | + expected="$(tr -d '\r' < incoming/hello-agent.exe.presig.sha256 | awk '{print $1}' | tr 'A-Z' 'a-z')" + actual="$(sha256sum incoming/hello-agent.exe | awk '{print $1}')" + if [ "$expected" != "$actual" ]; then + echo "::error title=Pre-sign hash mismatch::artifact corrupted or tampered" + echo "expected=$expected" + echo "actual=$actual" + exit 1 + fi + echo "Pre-sign SHA-256 OK: $actual" + + - name: Authenticode sign (osslsigncode, SHA-256, RFC 3161) + # -h sha256: file digest. -ts: RFC 3161 timestamp (NOT -t which is + # legacy Authenticode timestamp). -n / -i: cosmetic, shown in the + # UAC prompt and "signtool verify /v" output. + run: | + mkdir -p signed + osslsigncode sign \ + -certs "$PKI_DIR/chain.pem" \ + -key "$PKI_DIR/codesign.key" \ + -h sha256 \ + -ts "$TIMESTAMP_URL" \ + -n "$SIGN_DESCRIPTION" \ + -i "$SIGN_INFO_URL" \ + -in incoming/hello-agent.exe \ + -out signed/hello-agent.exe + + - name: Verify signature (osslsigncode self-check) + run: osslsigncode verify -in signed/hello-agent.exe + + - name: Record post-sign hash + run: | + h="$(sha256sum signed/hello-agent.exe | awk '{print $1}')" + echo "$h" > signed/hello-agent.exe.signed.sha256 + echo "::notice title=Post-sign SHA-256::$h" + + - name: Stage signed bundle for upload + # upload-artifact@v3 stores files using paths relative to the + # *most root common directory* of all matched paths. Mixing + # `signed/...` and `incoming/...` makes that common directory the + # workflow root, so the artifact ends up containing `signed/file` + # and `incoming/file` — and `download-artifact@v3 path: ./signed` + # then double-nests it as `./signed/signed/file`. Flatten into a + # single staging dir so the artifact has a flat layout that + # extracts to `./signed/file` cleanly on the validate runner. + run: | + mkdir -p bundle + cp signed/hello-agent.exe bundle/ + cp signed/hello-agent.exe.signed.sha256 bundle/ + cp incoming/hello-agent.exe.presig.sha256 bundle/ + + - name: Upload signed artifact + uses: actions/upload-artifact@v3 + with: + name: hello-agent-signed-${{ github.sha }} + path: bundle/ + if-no-files-found: error + retention-days: 90 + + # ---- 3. VALIDATE on Windows ----------------------------------------------- + # Trust-but-verify: osslsigncode produces signatures that should validate + # on Windows, but "should" isn't "did". This job is the cross-OS smoke + # test — a Windows runner running CryptoAPI's verifier against the same + # bytes the customer will see. Catches the rare cases where Linux thinks + # a sig is fine but Windows rejects it. + validate-x64: + name: validate-hello-agent-x64 + needs: [build-x64, sign-x64] + runs-on: [self-hosted, windows-10] + timeout-minutes: 10 + steps: + - name: Download signed artifact + uses: actions/download-artifact@v3 + with: + name: hello-agent-signed-${{ github.sha }} + path: ./signed + + - name: Verify Authenticode signature (Windows CryptoAPI) shell: pwsh run: | - $artifacts = Get-ChildItem .\SignOutput -Include *.exe -File - if (-not $artifacts) { - Write-Warning "No artifacts found in SignOutput\" - return + # signtool ships with Windows Kits; version dir varies. Find the + # newest x64 build present rather than hardcoding 10.0.22621.0. + $signtool = Get-ChildItem "${env:ProgramFiles(x86)}\Windows Kits\10\bin" ` + -Recurse -Filter signtool.exe -ErrorAction SilentlyContinue ` + | Where-Object { $_.FullName -match '\\x64\\' } ` + | Sort-Object FullName -Descending ` + | Select-Object -First 1 -ExpandProperty FullName + if (-not $signtool) { throw "signtool.exe not found in any Windows Kits 10 bin\*\x64\" } + Write-Host "Using signtool: $signtool" + + & $signtool verify /pa /v signed\hello-agent.exe + if ($LASTEXITCODE -ne 0) { throw "signtool verify failed ($LASTEXITCODE)" } + + $sig = Get-AuthenticodeSignature signed\hello-agent.exe + if ($sig.Status -ne 'Valid') { + throw "AuthenticodeSignature.Status = $($sig.Status); $($sig.StatusMessage)" } - $unsigned = @() - foreach ($f in $artifacts) { - $sig = Get-AuthenticodeSignature -FilePath $f.FullName - $size = '{0,8:N0}' -f $f.Length - switch ($sig.Status) { - 'Valid' { - Write-Host ("[ SIGNED ] {0} ({1} bytes) signed by: {2}" -f $f.Name, $size, $sig.SignerCertificate.Subject) - } - 'NotSigned' { - Write-Host ("[UNSIGNED] {0} ({1} bytes)" -f $f.Name, $size) - $unsigned += $f.Name - } - default { - Write-Host ("[ {0,-7} ] {1} ({2} bytes) -- {3}" -f $sig.Status, $f.Name, $size, $sig.StatusMessage) - $unsigned += $f.Name - } - } - } - if ($unsigned.Count -gt 0) { - $list = $unsigned -join ', ' - Write-Host "::warning title=Unsigned artifacts::$list -- SmartScreen will warn end users. Wire up signing before distributing." + if (-not $sig.TimeStamperCertificate) { + throw "no RFC 3161 timestamp present — would expire with the cert" } - - name: Upload artifacts + Write-Host "" + Write-Host "Signed by: $($sig.SignerCertificate.Subject)" + Write-Host "Cert expires: $($sig.SignerCertificate.NotAfter.ToString('u'))" + Write-Host "Timestamp: $($sig.TimeStamperCertificate.Subject)" + Write-Host "TS expires: $($sig.TimeStamperCertificate.NotAfter.ToString('u'))" + + - name: Stage final release artifact + shell: pwsh + env: + VERSION_TAG: ${{ needs.build-x64.outputs.version_tag }} + # Final artifact is named with the canonical "hello-agent--x86_64.exe" + # form so it lands in MDM / release storage with a stable, versioned + # filename rather than a generic "hello-agent.exe". + run: | + if (-not $env:VERSION_TAG) { throw "build-x64 did not export version_tag" } + New-Item -ItemType Directory -Force -Path .\SignOutput | Out-Null + $final = "hello-agent-$env:VERSION_TAG-x86_64.exe" + Copy-Item -Force signed\hello-agent.exe ".\SignOutput\$final" + $h = Get-FileHash ".\SignOutput\$final" -Algorithm SHA256 + $hash = $h.Hash.ToLower() + # ` \n` is the standard `sha256sum -c` format — + # lowercase hex, two spaces, filename, LF terminator. Out-File + # would write CRLF; WriteAllText with explicit "`n" matches what + # GNU coreutils emits, so `sha256sum -c` works without complaint. + [IO.File]::WriteAllText( + "$PWD\SignOutput\$final.sha256", + "$hash $final`n") + Write-Host "::notice title=Release SHA-256::$hash $final" + + - name: Upload final release artifact uses: actions/upload-artifact@v3 with: name: hello-agent-windows-x64-${{ github.sha }} - path: SignOutput/hello-agent-*.exe + path: SignOutput/hello-agent-*.exe* if-no-files-found: error - retention-days: 14 + retention-days: 90 diff --git a/ci/runners/linux/provision.sh b/ci/runners/linux/provision.sh index aa7d06c..f7ac6d2 100755 --- a/ci/runners/linux/provision.sh +++ b/ci/runners/linux/provision.sh @@ -26,7 +26,14 @@ NODE_MAJOR="20" # act_runner spawns Node for JS actions (upload/downl # ---- defaults ---- RUNNER_NAME="$(hostname)-helloagent-sign" -RUNNER_LABELS="self-hosted,linux,signing" +# Label suffix `:host` tells act_runner to run jobs directly on this host +# rather than inside a Docker container (the Linux runner default). The +# signing runner deliberately has no Docker daemon — its only job is to +# call osslsigncode and upload, which doesn't need container isolation +# beyond the LXC + systemd sandbox we already enforce. The workflow's +# `runs-on: [self-hosted, linux, signing]` matches on label name, so the +# `:host` qualifier is invisible to workflow authors. +RUNNER_LABELS="self-hosted:host,linux:host,signing:host" SERVICE_USER="hello-signer" PKI_DIR="/etc/pki/hello-agent" GITEA_URL="" @@ -85,10 +92,49 @@ fi log "osslsigncode $ver OK" # ---- 2. dedicated runner user ---- +# We pin the user's home to RUNNER_DIR (defined below in section 4) rather +# than letting useradd default to /home/$SERVICE_USER. Two reasons: +# +# 1. The systemd unit sets ProtectHome=yes, which masks /home, /root, +# /run/user with empty tmpfs. If HOME points into /home, anything +# act_runner spawns (Node for JS actions, etc.) inherits a HOME path +# that doesn't exist from the sandbox's view, and crashes on first +# cache write with "mkdir /home/: permission denied". +# 2. The runner user has no real "home" — it's a system account that +# exists only to run a daemon. Pointing HOME at /var/lib/gitea-runner +# reflects what's actually true. +# +# RUNNER_DIR is hardcoded here (mirrors the section-4 value) because user +# creation has to happen before we know we'll need to mkdir the dir, but +# we need the path baked into /etc/passwd up front. Keep these two in sync. +RUNNER_DIR=/var/lib/gitea-runner +mkdir -p "$RUNNER_DIR" + if ! id -u "$SERVICE_USER" >/dev/null 2>&1; then - log "Creating system user $SERVICE_USER" + log "Creating system user $SERVICE_USER (home=$RUNNER_DIR)" # No login shell on purpose: this user only runs systemd's exec, never logs in. - useradd --system --create-home --shell /usr/sbin/nologin "$SERVICE_USER" + # --no-create-home: we already mkdir'd RUNNER_DIR; useradd would fail + # trying to copy /etc/skel into a non-empty dir. + useradd --system \ + --home-dir "$RUNNER_DIR" \ + --no-create-home \ + --shell /usr/sbin/nologin \ + "$SERVICE_USER" +else + # Existing user from a pre-fix provision run: re-point home to + # RUNNER_DIR if it isn't already. Fixes deployments that hit the + # ProtectHome=yes / HOME=/home/ mismatch. + current_home="$(getent passwd "$SERVICE_USER" | cut -d: -f6)" + if [[ "$current_home" != "$RUNNER_DIR" ]]; then + log "Re-pointing $SERVICE_USER home: $current_home -> $RUNNER_DIR" + usermod --home "$RUNNER_DIR" "$SERVICE_USER" + # If the legacy home is empty (the common case — runner state lives + # under RUNNER_DIR, not under /home), remove it. If it has content + # for some reason, leave it alone for the operator to inspect. + if [[ -d "$current_home" && -z "$(ls -A "$current_home" 2>/dev/null)" ]]; then + rmdir "$current_home" || true + fi + fi fi RUNNER_HOME="$(getent passwd "$SERVICE_USER" | cut -d: -f6)" @@ -116,8 +162,9 @@ chown root:root "$PKI_DIR/chain.pem"; chmod 0444 "$PKI_DIR/chain.pem" chown root:"$SERVICE_USER" "$PKI_DIR/codesign.key"; chmod 0400 "$PKI_DIR/codesign.key" # ---- 4. act_runner ---- -RUNNER_DIR=/var/lib/gitea-runner -mkdir -p "$RUNNER_DIR" +# RUNNER_DIR was already defined and mkdir'd in section 2 (we needed it +# before useradd to set the user's home). Just re-assert ownership now +# that the user exists. chown -R "$SERVICE_USER:$SERVICE_USER" "$RUNNER_DIR" if [[ ! -x "$RUNNER_DIR/act_runner" ]]; then @@ -145,6 +192,32 @@ if [[ ! -f "$RUNNER_DIR/.runner" ]]; then " fi +# act_runner config.yaml: pin host-mode workdir under RUNNER_DIR. +# +# Without this, host-mode jobs default to /workspace// as +# $GITHUB_WORKSPACE — a path that doesn't exist and, under the systemd +# ProtectSystem=strict + ReadWritePaths=$RUNNER_DIR sandbox below, can't +# be created. The first JS action that writes there (e.g. actions/download- +# artifact populating ./incoming) fails with EROFS and the job dies before +# osslsigncode is ever invoked. +WORKDIR_PARENT="$RUNNER_DIR/workspace" +install -d -m 0755 -o "$SERVICE_USER" -g "$SERVICE_USER" "$WORKDIR_PARENT" + +CONFIG_FILE="$RUNNER_DIR/config.yaml" +if [[ ! -f "$CONFIG_FILE" ]]; then + log "Writing act_runner config at $CONFIG_FILE" + cat > "$CONFIG_FILE" <