{ pkgs, config, lib, ... }: let inherit (lib) mkOption mkEnableOption mkIf mkMerge types literalExpression ; inherit (config.my-lib.settings) alertmanagerPort ; cfg = config.custom.monitoring; lokiPort = 3100; in { options = { custom.monitoring = { loki = { enable = mkEnableOption "loki"; rules = mkOption { type = types.attrsOf ( types.submodule { options = { expr = mkOption { type = types.str; description = '' Loki alert expression. ''; example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15''; default = null; }; description = mkOption { type = types.str; description = '' Loki alert message. ''; example = "Prometheus encountered value {{ $value }} with {{ $labels }}"; default = null; }; labels = mkOption { type = types.nullOr (types.attrsOf types.str); description = '' Additional alert labels. ''; example = literalExpression '' { severity = "page" }; ''; default = { }; }; time = mkOption { type = types.str; description = '' Time until the alert is fired. ''; example = "5m"; default = "2m"; }; }; } ); description = '' Defines the loki rules. ''; default = { }; }; }; promtail.enable = mkEnableOption "promtail"; }; }; config = mkMerge [ ( let rulerConfig = { groups = [ { name = "alerting-rules"; rules = lib.mapAttrsToList (name: opts: { alert = name; inherit (opts) expr labels; for = opts.time; annotations.description = opts.description; }) cfg.loki.rules; } ]; }; rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig); in mkIf cfg.loki.enable { services.loki = { enable = true; configuration = { auth_enabled = false; server.http_listen_address = "${config.networking.hostName}.coho-tet.ts.net"; server.http_listen_port = lokiPort; common = { ring = { instance_addr = "${config.networking.hostName}.coho-tet.ts.net"; kvstore.store = "inmemory"; }; replication_factor = 1; path_prefix = "/var/lib/loki"; }; schema_config.configs = [ { from = "2024-12-01"; store = "boltdb-shipper"; object_store = "filesystem"; schema = "v13"; index = { prefix = "index_"; period = "24h"; }; } ]; storage_config = { filesystem.directory = "/var/lib/loki/chunks"; }; limits_config = { reject_old_samples = true; reject_old_samples_max_age = "168h"; allow_structured_metadata = false; }; ruler = { storage = { type = "local"; local.directory = "${config.services.loki.dataDir}/rules"; }; rule_path = "${config.services.loki.dataDir}/rules-temp"; enable_api = true; alertmanager_url = "http://127.0.0.1:${toString alertmanagerPort}"; }; }; }; systemd.tmpfiles.rules = [ "d /var/lib/loki 0700 loki loki - -" "d /var/lib/loki/rules-temp 0700 loki loki - -" "d /var/lib/loki/rules 0700 loki loki - -" "d /var/lib/loki/rules/fake 0700 loki loki - -" "L /var/lib/loki/rules/fake/ruler.yml - - - - ${rulerFile}" ]; systemd.services.loki.restartTriggers = [ rulerFile ]; } ) (mkIf cfg.promtail.enable { services.promtail = { enable = true; configuration = { server = { http_listen_address = "${config.networking.hostName}.coho-tet.ts.net"; http_listen_port = 28183; grpc_listen_port = 0; }; positions.filename = "/tmp/positions.yml"; clients = [ { url = "http://thorite.coho-tet.ts.net:${toString lokiPort}/loki/api/v1/push"; } ]; scrape_configs = [ { job_name = "journal"; # Copied from Mic92's config journal = { max_age = "12h"; json = true; labels.job = "systemd-journal"; }; pipeline_stages = [ { json.expressions = { transport = "_TRANSPORT"; unit = "_SYSTEMD_UNIT"; msg = "MESSAGE"; coredump_cgroup = "COREDUMP_CGROUP"; coredump_exe = "COREDUMP_EXE"; coredump_cmdline = "COREDUMP_CMDLINE"; coredump_uid = "COREDUMP_UID"; coredump_gid = "COREDUMP_GID"; }; } { # Set the unit (defaulting to the transport like audit and kernel) template = { source = "unit"; template = "{{if .unit}}{{.unit}}{{else}}{{.transport}}{{end}}"; }; } { regex = { expression = "(?P[^/]+)$"; source = "coredump_cgroup"; }; } { template = { source = "msg"; # FIXME would be cleaner to have this in a match block, but could not get it to work template = "{{if .coredump_exe}}{{.coredump_exe}} core dumped (user: {{.coredump_uid}}/{{.coredump_gid}}, command: {{.coredump_cmdline}}){{else}}{{.msg}}{{end}}"; }; } { labels.coredump_unit = "coredump_unit"; } { # Normalize session IDs (session-1234.scope -> session.scope) to limit number of label values replace = { source = "unit"; expression = "^(session-\\d+.scope)$"; replace = "session.scope"; }; } { labels.unit = "unit"; } { # Write the proper message instead of JSON output.source = "msg"; } # silence nscd: # ignore random portscans on the internet { drop.expression = "refused connection: IN="; } ]; relabel_configs = [ { source_labels = [ "__journal__hostname" ]; target_label = "host"; } ]; } # { # job_name = "caddy"; # static_configs = [ # { # targets = [ "localhost" ]; # labels = { # job = "caddy"; # __path__ = "/var/log/caddy/*log"; # agent = "caddy-promtail"; # }; # } # ]; # pipeline_stages = [ # { # json = { # expressions = { # duration = "duration"; # status = "status"; # }; # }; # } # { # labels = { # duration = null; # status = null; # }; # } # ]; # } ]; }; }; services.caddy.logFormat = '' format json level INFO ''; }) ]; }