modules/monitoring: add alert rules to loki
This commit is contained in:
parent
5b6f6ce735
commit
83f7700949
3 changed files with 130 additions and 40 deletions
modules/nixos
|
@ -71,7 +71,7 @@ in
|
|||
|
||||
services.restic.server.prometheus = true;
|
||||
|
||||
# miniflux
|
||||
# miniflux
|
||||
sops.templates."miniflux_metrics_env" = {
|
||||
content = ''
|
||||
METRICS_COLLECTOR=1
|
||||
|
|
|
@ -1,68 +1,158 @@
|
|||
{
|
||||
pkgs,
|
||||
config,
|
||||
lib,
|
||||
my-lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
inherit (lib)
|
||||
mkOption
|
||||
mkEnableOption
|
||||
mkIf
|
||||
mkMerge
|
||||
types
|
||||
literalExpression
|
||||
;
|
||||
inherit (my-lib.settings)
|
||||
alertmanagerPort
|
||||
;
|
||||
cfg = config.custom.monitoring;
|
||||
port-loki = 3100;
|
||||
lokiPort = 3100;
|
||||
in
|
||||
{
|
||||
options = {
|
||||
custom.monitoring = {
|
||||
loki.enable = mkEnableOption "loki";
|
||||
loki = {
|
||||
enable = mkEnableOption "loki";
|
||||
rules = mkOption {
|
||||
type = types.attrsOf (
|
||||
types.submodule {
|
||||
options = {
|
||||
condition = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Loki alert expression.
|
||||
'';
|
||||
example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15'';
|
||||
default = null;
|
||||
};
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Loki alert message.
|
||||
'';
|
||||
example = "Prometheus encountered value {{ $value }} with {{ $labels }}";
|
||||
default = null;
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.nullOr (types.attrsOf types.str);
|
||||
description = ''
|
||||
Additional alert labels.
|
||||
'';
|
||||
example = literalExpression ''
|
||||
{ severity = "page" };
|
||||
'';
|
||||
default = { };
|
||||
};
|
||||
time = mkOption {
|
||||
type = types.str;
|
||||
description = ''
|
||||
Time until the alert is fired.
|
||||
'';
|
||||
example = "5m";
|
||||
default = "2m";
|
||||
};
|
||||
};
|
||||
}
|
||||
);
|
||||
description = ''
|
||||
Defines the loki rules.
|
||||
'';
|
||||
default = { };
|
||||
};
|
||||
};
|
||||
promtail.enable = mkEnableOption "promtail";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf cfg.loki.enable {
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
auth_enabled = false;
|
||||
server.http_listen_address = "${config.networking.hostName}.coho-tet.ts.net";
|
||||
server.http_listen_port = port-loki;
|
||||
|
||||
common = {
|
||||
ring = {
|
||||
instance_addr = "${config.networking.hostName}.coho-tet.ts.net";
|
||||
kvstore.store = "inmemory";
|
||||
};
|
||||
replication_factor = 1;
|
||||
path_prefix = "/var/lib/loki";
|
||||
};
|
||||
|
||||
schema_config.configs = [
|
||||
(
|
||||
let
|
||||
rulerConfig = {
|
||||
groups = [
|
||||
{
|
||||
from = "2024-12-01";
|
||||
store = "boltdb-shipper";
|
||||
object_store = "filesystem";
|
||||
schema = "v13";
|
||||
index = {
|
||||
prefix = "index_";
|
||||
period = "24h";
|
||||
};
|
||||
name = "alerting-rules";
|
||||
rules = lib.mapAttrsToList (name: opts: {
|
||||
alert = name;
|
||||
inherit (opts) condition labels;
|
||||
for = opts.time;
|
||||
annotations.description = opts.description;
|
||||
}) cfg.loki.rules;
|
||||
}
|
||||
];
|
||||
};
|
||||
rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig);
|
||||
in
|
||||
mkIf cfg.loki.enable {
|
||||
services.loki = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
auth_enabled = false;
|
||||
server.http_listen_address = "${config.networking.hostName}.coho-tet.ts.net";
|
||||
server.http_listen_port = lokiPort;
|
||||
|
||||
storage_config = {
|
||||
filesystem.directory = "/var/lib/loki/chunks";
|
||||
};
|
||||
common = {
|
||||
ring = {
|
||||
instance_addr = "${config.networking.hostName}.coho-tet.ts.net";
|
||||
kvstore.store = "inmemory";
|
||||
};
|
||||
replication_factor = 1;
|
||||
path_prefix = "/var/lib/loki";
|
||||
};
|
||||
|
||||
limits_config = {
|
||||
reject_old_samples = true;
|
||||
reject_old_samples_max_age = "168h";
|
||||
allow_structured_metadata = false;
|
||||
schema_config.configs = [
|
||||
{
|
||||
from = "2024-12-01";
|
||||
store = "boltdb-shipper";
|
||||
object_store = "filesystem";
|
||||
schema = "v13";
|
||||
index = {
|
||||
prefix = "index_";
|
||||
period = "24h";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
storage_config = {
|
||||
filesystem.directory = "/var/lib/loki/chunks";
|
||||
};
|
||||
|
||||
limits_config = {
|
||||
reject_old_samples = true;
|
||||
reject_old_samples_max_age = "168h";
|
||||
allow_structured_metadata = false;
|
||||
};
|
||||
|
||||
ruler = {
|
||||
storage = {
|
||||
type = "local";
|
||||
local.directory = "${config.services.loki.dataDir}/ruler";
|
||||
};
|
||||
rule_path = "${config.services.loki.dataDir}/rules";
|
||||
alertmanager_url = "http://127.0.0.1:${toString alertmanagerPort}";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
})
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/loki 0700 loki loki - -"
|
||||
"d /var/lib/loki/ruler 0700 loki loki - -"
|
||||
"d /var/lib/loki/rules 0700 loki loki - -"
|
||||
"L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}"
|
||||
];
|
||||
systemd.services.loki.reloadTriggers = [ rulerFile ];
|
||||
}
|
||||
)
|
||||
(mkIf cfg.promtail.enable {
|
||||
services.promtail = {
|
||||
enable = true;
|
||||
|
@ -78,7 +168,7 @@ in
|
|||
|
||||
clients = [
|
||||
{
|
||||
url = "http://thorite.coho-tet.ts.net:${toString port-loki}/loki/api/v1/push";
|
||||
url = "http://thorite.coho-tet.ts.net:${toString lokiPort}/loki/api/v1/push";
|
||||
}
|
||||
];
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ let
|
|||
echo "Creating snapshot for ${rootDir}"
|
||||
subvolumes=$(${pkgs.btrfs-progs}/bin/btrfs subvolume list -o "${rootDir}" | ${awk} '{print $NF}')
|
||||
mkdir -p "${backupDir}"
|
||||
${pkgs.btrfs-progs}/bin/btrfs subvolume snapshot -r "${rootDir}" "${backupDir}/rootfs"
|
||||
${pkgs.btrfs-progs}/bin/btrfs subvolume snapshot -r "${rootDir}" "${backupDir}/rootDirectory"
|
||||
for subvol in $subvolumes; do
|
||||
${continueIfInExclude}
|
||||
[[ /"$subvol" == "${backupDir}"* ]] && continue
|
||||
|
|
Loading…
Add table
Reference in a new issue