Files
nix-config/common/ntfy-alerts.nix
googlebot a697ea10ad Add daily ZFS health check with ntfy alerts and introduce ntfy role
Add a zfs-alerts module that runs a daily health check on ZFS machines,
sending detailed ntfy notifications for degraded pools, data errors, or
drive errors. Introduce an "ntfy" system role to decouple ntfy alerting
from the server/personal roles, and assign it to all machines.
2026-02-22 17:17:40 -08:00

58 lines
1.7 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.ntfy-alerts;
in
{
options.ntfy-alerts = {
serverUrl = lib.mkOption {
type = lib.types.str;
default = "https://ntfy.neet.dev";
description = "Base URL of the ntfy server.";
};
topic = lib.mkOption {
type = lib.types.str;
default = "service-failures";
description = "ntfy topic to publish alerts to.";
};
};
config = lib.mkIf config.thisMachine.hasRole."ntfy" {
age.secrets.ntfy-token.file = ../secrets/ntfy-token.age;
systemd.services."ntfy-failure@" = {
description = "Send ntfy alert for failed unit %i";
wants = [ "network-online.target" ];
after = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot";
EnvironmentFile = "/run/agenix/ntfy-token";
ExecStart = "${pkgs.writeShellScript "ntfy-failure-notify" ''
unit="$1"
${lib.getExe pkgs.curl} \
--fail --silent --show-error \
--max-time 30 --retry 3 \
-H "Authorization: Bearer $NTFY_TOKEN" \
-H "Title: Service failure on ${config.networking.hostName}" \
-H "Priority: high" \
-H "Tags: rotating_light" \
-d "Unit $unit failed at $(date +%c)" \
"${cfg.serverUrl}/${cfg.topic}"
''} %i";
};
};
# Apply OnFailure to all services via a systemd drop-in
systemd.packages = [
(pkgs.runCommand "ntfy-on-failure-dropin" { } ''
mkdir -p $out/lib/systemd/system/service.d
cat > $out/lib/systemd/system/service.d/ntfy-on-failure.conf <<'EOF'
[Unit]
OnFailure=ntfy-failure@%p.service
EOF
'')
];
};
}