9 Commits

Author SHA1 Message Date
88cfad2a69 Update flake inputs (nixpkgs, home-manager, claude-code-nix)
All checks were successful
Check Flake / check-flake (push) Successful in 2m12s
Auto Update Flake / auto-update (push) Successful in 7m5s
Remove obsolete libreoffice-noto-fonts-subset.patch — upstream nixpkgs
removed the noto-fonts-subset code from the libreoffice derivation.
2026-03-03 22:54:45 -08:00
86a9f777ad Use the hosts overlays in gitea container (for attic patches)
All checks were successful
Check Flake / check-flake (push) Successful in 3m42s
2026-03-03 22:54:14 -08:00
b29e80f3e9 Patch attic-client to retry on push failure
Some checks failed
Check Flake / check-flake (push) Failing after 4m5s
Backport zhaofengli/attic#246 to work around a hyper connection pool
race condition that causes spurious "connection closed before message
completed" errors during cache uploads in CI.
2026-03-03 22:40:27 -08:00
e32834ff7f Prevent nify-failure from calling itself
Some checks failed
Check Flake / check-flake (push) Failing after 4m13s
2026-03-03 22:36:58 -08:00
bb39587292 Fix unifi service taking 5+ minutes to shut down
Some checks failed
Check Flake / check-flake (push) Failing after 4m8s
UniFi's Java process crashes during shutdown (Spring context race
condition) leaving mongod orphaned in the cgroup. The upstream module
sets KillSignal=SIGCONT so systemd won't interrupt the graceful
shutdown, but with the default KillMode=control-group this means
mongod also only gets SIGCONT (a no-op) and sits there until the
5-minute timeout triggers SIGKILL.

Switch to KillMode=mixed so the main Java process still gets the
harmless SIGCONT while mongod gets a proper SIGTERM for a clean
database shutdown.
2026-03-03 22:02:21 -08:00
712b52a48d Capture full systemd unit name for ntfy error alerts 2026-03-03 21:46:45 -08:00
c6eeea982e Add ignoredUnits option; skip logrotate failures on s0 because they are spurious 2026-03-03 21:46:19 -08:00
6bd1b4466e Update claude.md 2026-03-03 21:43:36 -08:00
d806d4df0a Increase tinyproxy wait-online timeout to 180s
Some checks failed
Check Flake / check-flake (push) Failing after 5m29s
The bridge takes ~62s to come up on s0, exceeding the 60s timeout
and causing tinyproxy to fail on first start.
2026-03-03 21:04:40 -08:00
12 changed files with 189 additions and 42 deletions

View File

@@ -85,17 +85,3 @@ When adding or removing a web-facing service, update both:
- Always use `--no-link` when running `nix build` - Always use `--no-link` when running `nix build`
- Don't use `nix build --dry-run` unless you only need evaluation — it skips the actual build - Don't use `nix build --dry-run` unless you only need evaluation — it skips the actual build
- Avoid `2>&1` on nix commands — it can cause error output to be missed - Avoid `2>&1` on nix commands — it can cause error output to be missed
## Git Worktrees
When the user asks you to "start a worktree" or work in a worktree, **do not create one manually** with `git worktree add`. Instead, tell the user to start a new session with:
```bash
claude --worktree <name>
```
This is the built-in Claude Code worktree workflow. It creates the worktree at `.claude/worktrees/<name>/` with a branch `worktree-<name>` and starts a new Claude session inside it. Cleanup is handled automatically on exit.
When instructed to work in a git worktree (e.g., via `isolation: "worktree"` on a subagent), you **MUST** do so. If you are unable to create or use a git worktree, you **MUST** stop work immediately and report the failure to the user. Do not fall back to working in the main working tree.
When applying work from a git worktree back to the main branch, commit in the worktree first, then use `git cherry-pick` from the main working tree to bring the commit over. Do not use `git checkout` or `git apply` to copy files directly. Do **not** automatically apply worktree work to the main branch — always ask the user for approval first.

View File

@@ -235,7 +235,7 @@ in
after = [ "systemd-networkd.service" ]; after = [ "systemd-networkd.service" ];
requires = [ "systemd-networkd.service" ]; requires = [ "systemd-networkd.service" ];
serviceConfig.ExecStartPre = [ serviceConfig.ExecStartPre = [
"+${pkgs.systemd}/lib/systemd/systemd-networkd-wait-online --interface=${cfg.bridgeName}:no-carrier --timeout=60" "+${pkgs.systemd}/lib/systemd/systemd-networkd-wait-online --interface=${cfg.bridgeName}:no-carrier --timeout=180"
]; ];
}; };

View File

@@ -19,6 +19,12 @@
default = ""; default = "";
description = "Extra arguments to pass to curl (e.g. --proxy http://host:port)."; description = "Extra arguments to pass to curl (e.g. --proxy http://host:port).";
}; };
ignoredUnits = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = "Unit names to skip failure notifications for.";
};
}; };
config = lib.mkIf config.thisMachine.hasRole."ntfy" { config = lib.mkIf config.thisMachine.hasRole."ntfy" {

View File

@@ -14,6 +14,14 @@ in
EnvironmentFile = "/run/agenix/ntfy-token"; EnvironmentFile = "/run/agenix/ntfy-token";
ExecStart = "${pkgs.writeShellScript "ntfy-failure-notify" '' ExecStart = "${pkgs.writeShellScript "ntfy-failure-notify" ''
unit="$1" unit="$1"
# Prevent infinite recursion if this service itself fails
[[ "$unit" == ntfy-failure@* ]] && exit 0
ignored_units=(${lib.concatMapStringsSep " " (u: lib.escapeShellArg u) cfg.ignoredUnits})
for ignored in "''${ignored_units[@]}"; do
if [[ "$unit" == "$ignored" ]]; then
exit 0
fi
done
logfile=$(mktemp) logfile=$(mktemp)
trap 'rm -f "$logfile"' EXIT trap 'rm -f "$logfile"' EXIT
${pkgs.systemd}/bin/journalctl -u "$unit" -n 50 --no-pager -o short > "$logfile" 2>/dev/null \ ${pkgs.systemd}/bin/journalctl -u "$unit" -n 50 --no-pager -o short > "$logfile" 2>/dev/null \
@@ -40,7 +48,7 @@ in
mkdir -p $out/lib/systemd/system/service.d mkdir -p $out/lib/systemd/system/service.d
cat > $out/lib/systemd/system/service.d/ntfy-on-failure.conf <<'EOF' cat > $out/lib/systemd/system/service.d/ntfy-on-failure.conf <<'EOF'
[Unit] [Unit]
OnFailure=ntfy-failure@%p.service OnFailure=ntfy-failure@%N.service
EOF EOF
'') '')
]; ];

View File

@@ -8,6 +8,7 @@
let let
thisMachineIsARunner = config.thisMachine.hasRole."gitea-actions-runner"; thisMachineIsARunner = config.thisMachine.hasRole."gitea-actions-runner";
hostOverlays = config.nixpkgs.overlays;
containerName = "gitea-runner"; containerName = "gitea-runner";
giteaRunnerUid = 991; giteaRunnerUid = 991;
giteaRunnerGid = 989; giteaRunnerGid = 989;
@@ -32,6 +33,7 @@ in
config = { config, lib, pkgs, ... }: { config = { config, lib, pkgs, ... }: {
system.stateVersion = "25.11"; system.stateVersion = "25.11";
nixpkgs.overlays = hostOverlays;
services.gitea-actions-runner.instances.inst = { services.gitea-actions-runner.instances.inst = {
enable = true; enable = true;

View File

@@ -13,6 +13,15 @@ in
services.unifi.unifiPackage = pkgs.unifi; services.unifi.unifiPackage = pkgs.unifi;
services.unifi.mongodbPackage = pkgs.mongodb-7_0; services.unifi.mongodbPackage = pkgs.mongodb-7_0;
# The upstream module sets KillSignal=SIGCONT so systemd doesn't interfere
# with UniFi's self-managed shutdown. But UniFi's Java process crashes during
# shutdown (Spring context already closed) leaving mongod orphaned in the
# cgroup. With the default KillMode=control-group, mongod only gets SIGCONT
# (a no-op) and runs until the 5min timeout triggers SIGKILL.
# KillMode=mixed sends SIGCONT to the main process but SIGTERM to remaining
# children, giving mongod a clean shutdown instead of SIGKILL.
systemd.services.unifi.serviceConfig.KillMode = "mixed";
networking.firewall = lib.mkIf cfg.openMinimalFirewall { networking.firewall = lib.mkIf cfg.openMinimalFirewall {
allowedUDPPorts = [ allowedUDPPorts = [
3478 # STUN 3478 # STUN

18
flake.lock generated
View File

@@ -53,11 +53,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1772252645, "lastModified": 1772587858,
"narHash": "sha256-SVP3BYv/tY19P7mh0aG2Pgq4M/CynQEnV4y+57Ed91g=", "narHash": "sha256-w0/XBU20BdBeEIJ9i3ecr9Lc6c8uQaXUn/ri+aOsyJk=",
"owner": "sadjow", "owner": "sadjow",
"repo": "claude-code-nix", "repo": "claude-code-nix",
"rev": "42c9207e79f1e6b8b95b54a64c10452275717466", "rev": "0a5fc14be38fabfcfff18db749b63c9c15726765",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -228,11 +228,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1772380461, "lastModified": 1772569491,
"narHash": "sha256-O3ukj3Bb3V0Tiy/4LUfLlBpWypJ9P0JeUgsKl2nmZZY=", "narHash": "sha256-bdr6ueeXO1Xg91sFkuvaysYF0mVdwHBpdyhTjBEWv+s=",
"owner": "nix-community", "owner": "nix-community",
"repo": "home-manager", "repo": "home-manager",
"rev": "f140aa04d7d14f8a50ab27f3691b5766b17ae961", "rev": "924e61f5c2aeab38504028078d7091077744ab17",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -301,11 +301,11 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1772198003, "lastModified": 1772542754,
"narHash": "sha256-I45esRSssFtJ8p/gLHUZ1OUaaTaVLluNkABkk6arQwE=", "narHash": "sha256-WGV2hy+VIeQsYXpsLjdr4GvHv5eECMISX1zKLTedhdg=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "dd9b079222d43e1943b6ebd802f04fd959dc8e61", "rev": "8c809a146a140c5c8806f13399592dbcb1bb5dc4",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@@ -139,7 +139,6 @@
src = nixpkgs; src = nixpkgs;
patches = [ patches = [
./patches/dont-break-nix-serve.patch ./patches/dont-break-nix-serve.patch
./patches/libreoffice-noto-fonts-subset.patch
]; ];
}; };
patchedNixpkgs = nixpkgs.lib.fix (self: (import "${patchedNixpkgsSrc}/flake.nix").outputs { self = nixpkgs; }); patchedNixpkgs = nixpkgs.lib.fix (self: (import "${patchedNixpkgsSrc}/flake.nix").outputs { self = nixpkgs; });

View File

@@ -9,6 +9,8 @@
networking.hostName = "s0"; networking.hostName = "s0";
ntfy-alerts.ignoredUnits = [ "logrotate" ];
# system.autoUpgrade.enable = true; # system.autoUpgrade.enable = true;
nix.gc.automatic = lib.mkForce false; # allow the nix store to serve as a build cache nix.gc.automatic = lib.mkForce false; # allow the nix store to serve as a build cache

View File

@@ -16,6 +16,14 @@ in
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ prev.writableTmpDirAsHomeHook ]; nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ prev.writableTmpDirAsHomeHook ];
}); });
# Retry on push failure to work around hyper connection pool race condition.
# https://github.com/zhaofengli/attic/pull/246
attic-client = prev.attic-client.overrideAttrs (old: {
patches = (old.patches or [ ]) ++ [
../patches/attic-client-push-retry.patch
];
});
# Add --zeroconf-port support to Spotify Connect plugin so librespot # Add --zeroconf-port support to Spotify Connect plugin so librespot
# binds to a fixed port that can be opened in the firewall. # binds to a fixed port that can be opened in the firewall.
music-assistant = prev.music-assistant.overrideAttrs (old: { music-assistant = prev.music-assistant.overrideAttrs (old: {

View File

@@ -0,0 +1,143 @@
diff --git a/attic/src/api/v1/upload_path.rs b/attic/src/api/v1/upload_path.rs
index 5b1231e5..cb90928c 100644
--- a/attic/src/api/v1/upload_path.rs
+++ b/attic/src/api/v1/upload_path.rs
@@ -25,7 +25,7 @@ pub const ATTIC_NAR_INFO_PREAMBLE_SIZE: &str = "X-Attic-Nar-Info-Preamble-Size";
/// Regardless of client compression, the server will always decompress
/// the NAR to validate the NAR hash before applying the server-configured
/// compression again.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct UploadPathNarInfo {
/// The name of the binary cache to upload to.
pub cache: CacheName,
diff --git a/client/src/push.rs b/client/src/push.rs
index 309bd4b6..f3951d2b 100644
--- a/client/src/push.rs
+++ b/client/src/push.rs
@@ -560,57 +560,83 @@ pub async fn upload_path(
);
let bar = mp.add(ProgressBar::new(path_info.nar_size));
bar.set_style(style);
- let nar_stream = NarStreamProgress::new(store.nar_from_path(path.to_owned()), bar.clone())
- .map_ok(Bytes::from);
- let start = Instant::now();
- match api
- .upload_path(upload_info, nar_stream, force_preamble)
- .await
- {
- Ok(r) => {
- let r = r.unwrap_or(UploadPathResult {
- kind: UploadPathResultKind::Uploaded,
- file_size: None,
- frac_deduplicated: None,
- });
-
- let info_string: String = match r.kind {
- UploadPathResultKind::Deduplicated => "deduplicated".to_string(),
- _ => {
- let elapsed = start.elapsed();
- let seconds = elapsed.as_secs_f64();
- let speed = (path_info.nar_size as f64 / seconds) as u64;
+ // Create a new stream for each retry attempt
+ let bar_ref = &bar;
+ let nar_stream = move || {
+ NarStreamProgress::new(store.nar_from_path(path.to_owned()), bar_ref.clone())
+ .map_ok(Bytes::from)
+ };
- let mut s = format!("{}/s", HumanBytes(speed));
+ let start = Instant::now();
+ let mut retries = 0;
+ const MAX_RETRIES: u32 = 3;
+ const RETRY_DELAY: Duration = Duration::from_millis(250);
- if let Some(frac_deduplicated) = r.frac_deduplicated {
- if frac_deduplicated > 0.01f64 {
- s += &format!(", {:.1}% deduplicated", frac_deduplicated * 100.0);
+ loop {
+ let result = api
+ .upload_path(upload_info.clone(), nar_stream(), force_preamble)
+ .await;
+ match result {
+ Ok(r) => {
+ let r = r.unwrap_or(UploadPathResult {
+ kind: UploadPathResultKind::Uploaded,
+ file_size: None,
+ frac_deduplicated: None,
+ });
+
+ let info_string: String = match r.kind {
+ UploadPathResultKind::Deduplicated => "deduplicated".to_string(),
+ _ => {
+ let elapsed = start.elapsed();
+ let seconds = elapsed.as_secs_f64();
+ let speed = (path_info.nar_size as f64 / seconds) as u64;
+
+ let mut s = format!("{}/s", HumanBytes(speed));
+
+ if let Some(frac_deduplicated) = r.frac_deduplicated {
+ if frac_deduplicated > 0.01f64 {
+ s += &format!(", {:.1}% deduplicated", frac_deduplicated * 100.0);
+ }
}
+
+ s
}
+ };
- s
+ mp.suspend(|| {
+ eprintln!(
+ "✅ {} ({})",
+ path.as_os_str().to_string_lossy(),
+ info_string
+ );
+ });
+ bar.finish_and_clear();
+
+ return Ok(());
+ }
+ Err(e) => {
+ if retries < MAX_RETRIES {
+ retries += 1;
+ mp.suspend(|| {
+ eprintln!(
+ "❕ {}: Upload failed, retrying ({}/{})...",
+ path.as_os_str().to_string_lossy(),
+ retries,
+ MAX_RETRIES
+ );
+ });
+ tokio::time::sleep(RETRY_DELAY).await;
+ continue;
}
- };
- mp.suspend(|| {
- eprintln!(
- "✅ {} ({})",
- path.as_os_str().to_string_lossy(),
- info_string
- );
- });
- bar.finish_and_clear();
+ mp.suspend(|| {
+ eprintln!("❌ {}: {}", path.as_os_str().to_string_lossy(), e);
+ });
+ bar.finish_and_clear();
- Ok(())
- }
- Err(e) => {
- mp.suspend(|| {
- eprintln!("❌ {}: {}", path.as_os_str().to_string_lossy(), e);
- });
- bar.finish_and_clear();
- Err(e)
+ return Err(e);
+ }
}
}
}

View File

@@ -1,16 +0,0 @@
Fix notoSubset glob for noto-fonts >= 2026.02.01.
noto-fonts switched from variable fonts (NotoSansArabic[wdth,wght].ttf)
to static fonts (NotoSansArabic.ttf). The old glob pattern only matched
files with brackets in the name, causing the cp to fail.
--- a/pkgs/applications/office/libreoffice/default.nix
+++ b/pkgs/applications/office/libreoffice/default.nix
@@ -191,7 +191,7 @@
runCommand "noto-fonts-subset" { } ''
mkdir -p "$out/share/fonts/noto/"
${concatMapStrings (x: ''
- cp "${noto-fonts}/share/fonts/noto/NotoSans${x}["*.[ot]tf "$out/share/fonts/noto/"
+ cp "${noto-fonts}/share/fonts/noto/NotoSans${x}"*.[ot]tf "$out/share/fonts/noto/"
'') suffixes}
'';