thorite: fix loki alerts

This commit is contained in:
xinyangli 2024-12-03 19:35:48 +08:00
parent d9a7b3d48c
commit 947e97ce4e
Signed by: xin
SSH key fingerprint: SHA256:UU5pRTl7NiLFJbWJZa+snLylZSXIz5rgHmwjzv8v4oE
2 changed files with 12 additions and 10 deletions

View file

@ -18,11 +18,11 @@ with my-lib;
enable = true; enable = true;
rules = { rules = {
sshd_closed = { sshd_closed = {
condition = ''count_over_time({unit="sshd.service"} |~ "Connection closed by authenticating user" [15m]) > 25''; expr = ''count_over_time({unit="sshd.service"} |~ "Connection closed by authenticating user" [15m]) > 25'';
description = "More then 25 users have tried logging in the last 15 min without success"; description = "More then 25 login attemps in last 15 min without success";
}; };
unusual_log_volume = { unusual_log_volume = {
condition = ''sum by (unit) (rate({unit=~".+"}[5m])) > 80''; expr = ''sum by (unit) (rate({unit=~".+"}[5m])) > 80'';
description = "Unit {{ $labels.unit }} is logging at an unusually high rate"; description = "Unit {{ $labels.unit }} is logging at an unusually high rate";
}; };
}; };

View file

@ -29,7 +29,7 @@ in
type = types.attrsOf ( type = types.attrsOf (
types.submodule { types.submodule {
options = { options = {
condition = mkOption { expr = mkOption {
type = types.str; type = types.str;
description = '' description = ''
Loki alert expression. Loki alert expression.
@ -85,7 +85,7 @@ in
name = "alerting-rules"; name = "alerting-rules";
rules = lib.mapAttrsToList (name: opts: { rules = lib.mapAttrsToList (name: opts: {
alert = name; alert = name;
inherit (opts) condition labels; inherit (opts) expr labels;
for = opts.time; for = opts.time;
annotations.description = opts.description; annotations.description = opts.description;
}) cfg.loki.rules; }) cfg.loki.rules;
@ -137,20 +137,22 @@ in
ruler = { ruler = {
storage = { storage = {
type = "local"; type = "local";
local.directory = "${config.services.loki.dataDir}/ruler"; local.directory = "${config.services.loki.dataDir}/rules";
}; };
rule_path = "${config.services.loki.dataDir}/rules"; rule_path = "${config.services.loki.dataDir}/rules-temp";
enable_api = true;
alertmanager_url = "http://127.0.0.1:${toString alertmanagerPort}"; alertmanager_url = "http://127.0.0.1:${toString alertmanagerPort}";
}; };
}; };
}; };
systemd.tmpfiles.rules = [ systemd.tmpfiles.rules = [
"d /var/lib/loki 0700 loki loki - -" "d /var/lib/loki 0700 loki loki - -"
"d /var/lib/loki/ruler 0700 loki loki - -" "d /var/lib/loki/rules-temp 0700 loki loki - -"
"d /var/lib/loki/rules 0700 loki loki - -" "d /var/lib/loki/rules 0700 loki loki - -"
"L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}" "d /var/lib/loki/rules/fake 0700 loki loki - -"
"L /var/lib/loki/rules/fake/ruler.yml - - - - ${rulerFile}"
]; ];
systemd.services.loki.reloadTriggers = [ rulerFile ]; systemd.services.loki.restartTriggers = [ rulerFile ];
} }
) )
(mkIf cfg.promtail.enable { (mkIf cfg.promtail.enable {