diff --git a/modules/nixos/monitor/exporters.nix b/modules/nixos/monitor/exporters.nix index 0c9b95d..b48209e 100644 --- a/modules/nixos/monitor/exporters.nix +++ b/modules/nixos/monitor/exporters.nix @@ -71,7 +71,7 @@ in services.restic.server.prometheus = true; - # miniflux + # miniflux sops.templates."miniflux_metrics_env" = { content = '' METRICS_COLLECTOR=1 diff --git a/modules/nixos/monitor/loki.nix b/modules/nixos/monitor/loki.nix index 324235f..c3e0afd 100644 --- a/modules/nixos/monitor/loki.nix +++ b/modules/nixos/monitor/loki.nix @@ -1,68 +1,158 @@ { + pkgs, config, lib, + my-lib, ... }: let inherit (lib) + mkOption mkEnableOption mkIf mkMerge + types + literalExpression + ; + inherit (my-lib.settings) + alertmanagerPort ; cfg = config.custom.monitoring; - port-loki = 3100; + lokiPort = 3100; in { options = { custom.monitoring = { - loki.enable = mkEnableOption "loki"; + loki = { + enable = mkEnableOption "loki"; + rules = mkOption { + type = types.attrsOf ( + types.submodule { + options = { + condition = mkOption { + type = types.str; + description = '' + Loki alert expression. + ''; + example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15''; + default = null; + }; + description = mkOption { + type = types.str; + description = '' + Loki alert message. + ''; + example = "Prometheus encountered value {{ $value }} with {{ $labels }}"; + default = null; + }; + labels = mkOption { + type = types.nullOr (types.attrsOf types.str); + description = '' + Additional alert labels. + ''; + example = literalExpression '' + { severity = "page" }; + ''; + default = { }; + }; + time = mkOption { + type = types.str; + description = '' + Time until the alert is fired. + ''; + example = "5m"; + default = "2m"; + }; + }; + } + ); + description = '' + Defines the loki rules. + ''; + default = { }; + }; + }; promtail.enable = mkEnableOption "promtail"; }; }; config = mkMerge [ - (mkIf cfg.loki.enable { - services.loki = { - enable = true; - configuration = { - auth_enabled = false; - server.http_listen_address = "${config.networking.hostName}.coho-tet.ts.net"; - server.http_listen_port = port-loki; - - common = { - ring = { - instance_addr = "${config.networking.hostName}.coho-tet.ts.net"; - kvstore.store = "inmemory"; - }; - replication_factor = 1; - path_prefix = "/var/lib/loki"; - }; - - schema_config.configs = [ + ( + let + rulerConfig = { + groups = [ { - from = "2024-12-01"; - store = "boltdb-shipper"; - object_store = "filesystem"; - schema = "v13"; - index = { - prefix = "index_"; - period = "24h"; - }; + name = "alerting-rules"; + rules = lib.mapAttrsToList (name: opts: { + alert = name; + inherit (opts) condition labels; + for = opts.time; + annotations.description = opts.description; + }) cfg.loki.rules; } ]; + }; + rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig); + in + mkIf cfg.loki.enable { + services.loki = { + enable = true; + configuration = { + auth_enabled = false; + server.http_listen_address = "${config.networking.hostName}.coho-tet.ts.net"; + server.http_listen_port = lokiPort; - storage_config = { - filesystem.directory = "/var/lib/loki/chunks"; - }; + common = { + ring = { + instance_addr = "${config.networking.hostName}.coho-tet.ts.net"; + kvstore.store = "inmemory"; + }; + replication_factor = 1; + path_prefix = "/var/lib/loki"; + }; - limits_config = { - reject_old_samples = true; - reject_old_samples_max_age = "168h"; - allow_structured_metadata = false; + schema_config.configs = [ + { + from = "2024-12-01"; + store = "boltdb-shipper"; + object_store = "filesystem"; + schema = "v13"; + index = { + prefix = "index_"; + period = "24h"; + }; + } + ]; + + storage_config = { + filesystem.directory = "/var/lib/loki/chunks"; + }; + + limits_config = { + reject_old_samples = true; + reject_old_samples_max_age = "168h"; + allow_structured_metadata = false; + }; + + ruler = { + storage = { + type = "local"; + local.directory = "${config.services.loki.dataDir}/ruler"; + }; + rule_path = "${config.services.loki.dataDir}/rules"; + alertmanager_url = "http://127.0.0.1:${toString alertmanagerPort}"; + }; }; }; - }; - }) + systemd.tmpfiles.rules = [ + "d /var/lib/loki 0700 loki loki - -" + "d /var/lib/loki/ruler 0700 loki loki - -" + "d /var/lib/loki/rules 0700 loki loki - -" + "L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}" + ]; + systemd.services.loki.reloadTriggers = [ rulerFile ]; + } + ) (mkIf cfg.promtail.enable { services.promtail = { enable = true; @@ -78,7 +168,7 @@ in clients = [ { - url = "http://thorite.coho-tet.ts.net:${toString port-loki}/loki/api/v1/push"; + url = "http://thorite.coho-tet.ts.net:${toString lokiPort}/loki/api/v1/push"; } ]; diff --git a/modules/nixos/restic.nix b/modules/nixos/restic.nix index bef9c44..f07bdfb 100644 --- a/modules/nixos/restic.nix +++ b/modules/nixos/restic.nix @@ -39,7 +39,7 @@ let echo "Creating snapshot for ${rootDir}" subvolumes=$(${pkgs.btrfs-progs}/bin/btrfs subvolume list -o "${rootDir}" | ${awk} '{print $NF}') mkdir -p "${backupDir}" - ${pkgs.btrfs-progs}/bin/btrfs subvolume snapshot -r "${rootDir}" "${backupDir}/rootfs" + ${pkgs.btrfs-progs}/bin/btrfs subvolume snapshot -r "${rootDir}" "${backupDir}/rootDirectory" for subvol in $subvolumes; do ${continueIfInExclude} [[ /"$subvol" == "${backupDir}"* ]] && continue