From d9a7b3d48ccae1144ba8d03a8608e8c0c1639459 Mon Sep 17 00:00:00 2001 From: xinyangli Date: Tue, 3 Dec 2024 16:43:39 +0800 Subject: [PATCH] my-lib/settings: manage settings shared globally --- .sops.yaml | 1 + machines/secrets.yaml | 89 ++++++++++++++++++--------------- overlays/my-lib/default.nix | 8 +++ overlays/my-lib/prometheus.nix | 91 +++++++++++++++++++++++++++------- overlays/my-lib/settings.nix | 5 ++ 5 files changed, 135 insertions(+), 59 deletions(-) create mode 100644 overlays/my-lib/settings.nix diff --git a/.sops.yaml b/.sops.yaml index c092203..ad2d8e4 100644 --- a/.sops.yaml +++ b/.sops.yaml @@ -15,6 +15,7 @@ creation_rules: - age: - *xin - *host-calcite + - *host-weilite - *host-massicot - *host-thorite - *host-biotite diff --git a/machines/secrets.yaml b/machines/secrets.yaml index 69456c4..6d94d7e 100644 --- a/machines/secrets.yaml +++ b/machines/secrets.yaml @@ -10,74 +10,83 @@ sops: - recipient: age1uw059wcwfvd9xuj0hpqzqpeg7qemecspjrsatg37wc7rs2pumfdsgken0c enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAwMHB1bFQ3dWJIU3NiOVVP - Yi9LZE1PTVdMY1BqS1JHV3VPLzZIY0hGK0NZClNlclVXKzBvNTBrTlhiR0VsaVoz - RlVLNVBEVDgzSXB5ZGxDd3hqNDh2V2MKLS0tIEhBZHFUY3c2VXJBVEVKamZ6TzBa - MlFsNnVEV0xCdlJoRnBhUHF2MmswUEUKNYD9zssGBy9SaKeOMvTz71B6KMPW87cM - tFJzgnQceEQF658lVa5cCzG1gzraCgBtQU15XzC7e8zWI9CHquRRlQ== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA5SjAzOEozUzh1bzVvaHgr + T2xsVUszTHVSdWIyM3B5TFhtUEFMeVZlYzNrCk5IOWFNbTErbTVkQnNlVllMZWlV + Q2lHZXRIdzBiRFRSZnNUVWd2NXVXVGcKLS0tIERhcjh3VVlqSGxHUHpnc1JzVksv + VXpQVVVCUC9xR3crWm9rTk13LzVhK1EKwiuvwx3ZhcDE+9w7/dR4PrZSSoJMvklT + m7I32dMRk0o9zcl5KYU5L9Hwb+z+EBE34raoGKBF5K4aQcbZQUX3Cw== -----END AGE ENCRYPTED FILE----- - recipient: age1ytwfqfeez3dqtazyjltn7mznccwx3ua8djhned7n8mxqhw4p6e5s97skfa enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBTTnZLTlZQRzc1enVEa1BN - SHdoSi9oOXk4UTV0SlRZS2tLS2FFL3VjNzNNClVWTTNKekF6T0RTUzdEeWhLbHoz - WFZKaHJEaVBWa04zRWRiVnJZRjU0YVEKLS0tIFJVL0FEemowS3V6MmsxbWJMU2I1 - U2NnUnVKdFlRSGVzUFQ4ZFcwL0lWTlkKz1t3yqjgIdMWS/Nsy2nq3oCjOhGDP+UT - L+LAuFExJPV0qlsOG/kCGB/WtCJfnBvcp6vPDBLqjK8NllIX/iPI5g== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA5R1ZIRlN2b3M2OUQ0T2cw + eE5DTm9KY1NUY1p5eDhLNG4xMDVkVjRyWDNRClp3MTRWeGJMYTczcC9YQTNZdkxx + ejJ3QnhjcUcyUldUNEVqVUh6Z2grd00KLS0tIDVvbDZWbmZPZVhDNHM1K1kzaE95 + aHJqSU16dlJiRGl0VWNMVXVYMmhPb2MKMboq9ShGIJMFVENgLPlQdwdtTOjVb0CC + 4ttM3xWnYkf8416a0OYFrda5l1kfJJzQakbk/tbGcTu1yTcd+6lOtA== + -----END AGE ENCRYPTED FILE----- + - recipient: age17r3fxfmt6hgwe984w4lds9u0cnkf5ttq8hnqt800ayfmx7t8t5gqjddyml + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBVby8wYS9pa0szTlVUS3FI + VWhjaCtyUzNLbkw2VXRlWkVMZlRkeXJMZGlRCnBTWklnZ0Uzd2lTMGt1M2wxZ0px + NFl2RW5hSUZVdHI0aVFRMHJtMFQ3ODAKLS0tIFlYOHVRYVFGbkcvUWRmQitQQnI5 + bG5vemMvcWdpOEtxNGRpS0doQmtuUFkK8Hxl//kOtbEw3jf96ZZ4G1Yb94f4Jeb4 + TfPs7O/ESJY8ovNsoXRQEt99vOR5D1wBzyZBY9E3f2ZzY/uBmup0cw== -----END AGE ENCRYPTED FILE----- - recipient: age1jle2auermhswqtehww9gqada8car5aczrx43ztzqf9wtcld0sfmqzaecta enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBETWpkcjhINktqeGxjdWxz - UTVVNC9kalorcVJOdHpJSkZJNXlGUHZ2VUdrCjRCclBTZnJEZ3JGOVpqS1Y0b0dt - eldFMS91WUc2Y1FnWWZoN0grc01pT0UKLS0tIC96TjlEaVBGRkZhZ0hac2lmbEdI - eHMzTFhsQ0FqY05uUEZSbExCcmdscEkKdxITlc0V5ayq+9fmj77SnEMFxKJhOOta - RfJhOQUv8g3nCN+SsuaOy0TitUCiDWh5XoB0DufEQPcS/kzGZN1Inw== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBPSmRYMkNIdERJZVBxV1p1 + emlqOTBpN3l2WXkzNjRRcFI5NUZDZnQ1WXdnCkRVbm8xais5aGVCTmtSTGxaTXlT + L2ZWQ0p5WFZNRWl5SWVkRUYwc2R3b1UKLS0tIEZEck4yMmJUQWVvNHRJQnpCQTBo + cDJsaG83MTdXWVd2NUpLczhjWTBBZVUK5BxBIYVqkqVLw9LTbnJ8SQWN2i4USdI8 + 8m/hZFXTJ4GI0f795DEmbcZq9xET14aQqta0wSASqwP/5Ld1mo0a0w== -----END AGE ENCRYPTED FILE----- - recipient: age12ng08vjx5jde5ncqutwkd5vm4ygfwy33mzhzwe0lkxzglulgpqusc89r96 enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBydlQ4S1duQU53Wk1nd21K - d2RqM1F0VDFJVXB2aGRTZ2hxczI2V1lndVdrCjArVlE2N0RGZ0htUEZYdVlQMlU5 - SWIwWHVCaWxaQTJMNzg3WC8xRS9IYzgKLS0tIDRvSS8ybVlrSy9zYjQ2NXBaMlZk - Ulg4cUFBejRoS3VEWkRaZEUxMExUeWMKNeq6TN1gaBNU9vAitGttcU+8HmFQipdm - LPwo4/toyf27emb4KGs0AV0Dm4Sxj9S3Xvrv1B+qvhfT638/RIUm2w== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAwSkhjRTdBWklZUEpUanM0 + Wjl4b2c3K0g0ZUxxMlRrUFhhZzhNRXhPVnpvCmpNWVBNTXNYczV3aWhCd05FOGJ0 + YlNobFhWdStGbDRZV2NlUWV6ZFRVNEkKLS0tIGd1RUR4K21GOEQ0aWtqRi9RREpE + RXBXcXFYUDVXVzN4Q25zSklFU21wbFkKQuTHkgFC5HRPO7/PuVhJzbbHOTPaFXvN + +Y31AK3OAVdUETMEuJ2mk50Bi5BiiUeOnnv1bZ6O+iX0o20ysUseTg== -----END AGE ENCRYPTED FILE----- - recipient: age1v5h946jfke6ae8pcgz52mhj26cacqcpl9dmmrrkf37x55rnq2v3szqctvv enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB4YXpyOXE3MFovWEQvMVRr - TGVST3U0N2dCVDJGT1A3eUtlRis3bFEvTHlFClZHQ2xRWklMMCtER01QNEVHaVYr - MC94V3R4MVdNdUU3eXQ2RGFFVGo4VFEKLS0tIDQ4b2ZuMy9URUswWUZqNHlxandU - OFducVVzdGZGY0tnbFFBZDdjVzVkaUEKN8qAbbrd4pAHRGIN8O64fl7bQ6hx6Isr - Qx0xKeuhJCVXgtE8xc7xmnEhqrcONlflJ/XUnYV9jOkB71zSBJxruA== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBnc3NOZFRYT1VnaVZSaTRi + WnluSEk4d1U5TWx2REZRZ3VCRVp2ZzlKY0NvCjNlUnIwdWVqSnlQOWp1dlJ5THlW + c2xTNHhnaE94a2ZTeXJjQTVxeGRLTmsKLS0tIFV4c2NZK1ZnL2xtUlVvSksxNi9o + L3dodkJXVjZrekVldTVsRFRxSFlrTmMKiokjgIRIsI8D2aFP/Qem4iGzC4yr5lm2 + ZwggC/UfD56ysTEqrVaDnR7f5fSqZLWdstPJn7I/vr5CwKRMbMPYSA== -----END AGE ENCRYPTED FILE----- - recipient: age1p2dlc8gfgyrvtta6mty2pezjycn244gmvh456qd3wvkfwesp253qnwyta9 enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAzczdPMDdWU1ZtckJRQm5j - UWJub0Yzd3NzOEh4YWdId01nYWI1YVY3dng0ClpEYXBJV2cvWEdjdXcwUFI3Y0NG - MDgvTmNZOXRQQndyVmRHamNRbzVaVU0KLS0tIGFKVTI4TkE2UjhDUSsxQTlNQ0Vk - QmFMNnlqbnhScC90T012K1QxRnRUOHcKAV7NxUn0CMcjKwK8zrocoLO1P9jc22uG - eG+vdJ6xzA99UX51aPxQOeEJgdFPEd3y1QJszQmRzThvid7y4lv0Cw== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBpN0llOTBJU1pNNVFxVWxt + aFdKdStKL1ZlZ0p6WFRQbHpGNnpmdlJXdG1FCkx5eDhZWWJvQ2xSWEJqWnZ6NmNt + Y0MzNDg5QzVSbEZteW1LNlFyRFg5Q0EKLS0tIDBrT0dEZlBoTExYcGRNZjZ5Znpz + cnE4YWRTMmRsTENhOTl5R2dYSzQwazAKvnTvZz842Mg5AVlIoYHI2BG+0/hO5zIv + jRVJri98fgGterXADTPmeoY3p+fFQggTPhs/5s5GSQxd5aiX8vvvrA== -----END AGE ENCRYPTED FILE----- - recipient: age18u4mqrhqkrpcytxfxfex6aeap04u38emhy6u4wrp5k62sz2vae4qm5jj7s enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBsVmpzenRvWE5EK2wzRFkx - SERZV0s1Rkt0ZnZ1U3JQSFNhdGVvaWhWcTA4CjVxK0Z0MHI0ZnMrUS9YYWhTTG1z - L2lVS1Q2UkVQd2x5b1E1eWpQVGp2ZHMKLS0tIHNLOGhTYjkzWkFEM05wYkRZeXFQ - SXNTSGZZSFE2bFhybXdIc1FUb1ZBd0kKkYzflPRk6GrE6t9oVGOzc8xcyZDxiIw8 - 9SVXIgV0WVpY4lnFKYKH2i4+1sIm6tKOpizlQxTg5VgmmrTtfazWAA== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBPQWljdGg4VTlDdGhoblpk + LytxK2FnQVI1dzB2bnFaWUtoUVNGS3lpU3prCnRwUTNnZVVXTnZ6eCtScTk5YzI3 + TGM2MmNhaHQ3NXAzMk0rcnJoTlp5STQKLS0tIEp2U3YvUUhXTkt3VFczY3J1LzMv + ZzM0VHpqamRIZVROS2lQdXFhQTNBekEKEySldC+VvZvPY398ZVkB5s73bT3QbuLh + IqTv+wbkbjlvZJUavVyycY5SwMXkSX3ge9W/64mt/RDs88gSXFS+Sw== -----END AGE ENCRYPTED FILE----- - recipient: age1fw2sqaa5s9c8ml6ncsexkj8ar4288387ju92ytjys4awf9aw6smqqz94dh enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB0NHpkOTFHaXRhVGNua0dV - alRieWJ6WG5ZNzlvcTR2aTVUeWFBVGVVUUNZCnY2VUZUOWVlNGY1ZldyVGE2bkpi - VXVtQ3IyK0kyV1cyMU5nN1lYaW1oOUkKLS0tIFRVRGFCNWlGendSVEhHY0w0QTl6 - emJEQkQ3QlU0TFVWaW1uQytaUndmQlEKKahqJpX8vI+PASOzzod/sFvXSkQFnJ9O - YmnmiFxm5WZDPLHwkgVx8FgCq9RfAad4HybhsMjYPKXJ/fNa/WVZRA== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA4bGppem15NlVod2hCRkM5 + MzY1aUZOdEVzRzdEYTRNakdMQWJlRkk0eEZzClRLSnRrQUoreU5MVG40KzRKSGcw + bUU4ZnpLU0VtOWxXVllrSW5lN0NWb0kKLS0tIE1iemRlVVpieEhxRnlIb2dFUHZr + am04NVRtU2N6SThYZWdXVE5RZ1B2aE0KVcHvB5k2Gcu/St0P8WPFzlCtuZthZTKo + hwVc0lC6Xxt25hriaUFinwnyvcjxrLCx0Nq7f9Zn16nJcza5kev1nQ== -----END AGE ENCRYPTED FILE----- lastmodified: "2024-11-30T06:31:42Z" mac: ENC[AES256_GCM,data:xh8x9IrQ01ZzdcCTIfBrifIGduMYVmSSP52BkTyr/bx7AgQAz2WeA7LFrccxIayCGHrQKfMQDLUKJ/EBamG/6p8AX6QqZBTfqFD688ZhmRfxgpj7fYR9jPYnhb/9XHI9R2jTaJWwrorXvu3pa+Gy/hWB3Kb+WZc3fslmIuKuLH0=,iv:GDrHSFZxPbpACdusVDPHXEjeEusYfk53N/KGHtdvrYo=,tag:ap38sCSTZVDQ0ZazXM3vlg==,type:str] diff --git a/overlays/my-lib/default.nix b/overlays/my-lib/default.nix index 8d07bc1..c684e36 100644 --- a/overlays/my-lib/default.nix +++ b/overlays/my-lib/default.nix @@ -1,3 +1,11 @@ { + mkSystemdDebug = + { lib, pkgs }: + { + ExecStart = lib.mkForce "${pkgs.tmux}/bin/tmux -S /tmp/tmux.socket new-session -s my-session -d"; + ExecStop = lib.mkForce "${pkgs.tmux}/bin/tmux -S /tmp/tmux.socket kill-session -t my-session"; + Type = "forking"; + }; } // (import ./prometheus.nix) +// (import ./settings.nix) diff --git a/overlays/my-lib/prometheus.nix b/overlays/my-lib/prometheus.nix index da43f77..5143c71 100644 --- a/overlays/my-lib/prometheus.nix +++ b/overlays/my-lib/prometheus.nix @@ -108,22 +108,10 @@ in description = "The 1-minute load average ({{ $value }}) exceeds 80% the number of CPUs."; }; } - { - alert = "HighTransmitTraffic"; - expr = "rate(node_network_transmit_bytes_total{device!=\"lo\"}[5m]) > 100000000"; - for = "1m"; - labels = { - severity = "warning"; - }; - annotations = { - summary = "High network transmit traffic on {{ $labels.instance }} ({{ $labels.device }})"; - description = "The network interface {{ $labels.device }} on {{ $labels.instance }} is transmitting data at a rate exceeding 100 MB/s for the last 1 minute."; - }; - } { alert = "NetworkTrafficExceedLimit"; - expr = ''increase(node_network_transmit_bytes_total{device!="lo",device!~"tailscale.*",device!~"wg.*",device!~"br.*"}[30d]) > 322122547200''; - for = "0m"; + expr = ''sum by(instance) (increase(node_network_transmit_bytes_total{device!="lo", device!~"tailscale.*", device!~"wg.*", device!~"br.*"}[30d])) > 322122547200''; + for = "1m"; labels = { severity = "critical"; }; @@ -131,6 +119,66 @@ in summary = "Outbound network traffic exceed 300GB for last 30 day"; }; } + { + alert = "HighDiskUsage"; + expr = ''(1 - node_filesystem_free_bytes{fstype!~"vfat|ramfs"} / node_filesystem_size_bytes) * 100 > 85''; + for = "5m"; + labels = { + severity = "warning"; + }; + annotations = { + summary = "High disk usage on {{ $labels.instance }}"; + }; + } + { + alert = "DiskWillFull"; + expr = ''predict_linear(node_filesystem_free_bytes{fstype!~"vfat|ramfs"}[1h], 12 * 3600) < (node_filesystem_size_bytes * 0.05)''; + + for = "3m"; + labels = { + severity = "critical"; + }; + annotations = { + summary = "Disk usage will exceed 95% in 12 hours on {{ $labels.instance }}"; + description = "Disk {{ $labels.mountpoint }} is predicted to exceed 92% usage within 12 hours at current growth rate"; + }; + } + { + alert = "HighSwapUsage"; + expr = ''(1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80''; + for = "5m"; + labels = { + severity = "warning"; + }; + annotations = { + summary = "High swap usage on {{ $labels.instance }}"; + description = "Swap usage is above 80% for 5 minutes\n Current value: {{ $value }}%"; + }; + } + { + alert = "OOMKillDetected"; + expr = ''increase(node_vmstat_oom_kill[5m]) > 0''; + for = "1m"; + labels = { + severity = "critical"; + }; + annotations = { + summary = "OOM kill detected on {{ $labels.instance }}"; + description = "Out of memory killer was triggered in the last 5 minutes"; + }; + } + { + alert = "HighMemoryUsage"; + expr = ''(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 90''; + for = "5m"; + labels = { + severity = "warning"; + }; + annotations = { + summary = "High memory usage on {{ $labels.instance }}"; + description = "Memory usage is above 90% for 5 minutes\n Current value: {{ $value }}%"; + }; + } ]; } ); @@ -152,6 +200,9 @@ in static_configs = [ { targets = targetAddresses; + labels = { + from = hostAddress; + }; } ]; relabel_configs = [ @@ -187,23 +238,25 @@ in severity = "warning"; }; annotations = { - summary = "High request latency on {{ $labels.instance }}"; - description = "Request latency is above 0.5 seconds for the last 3 minutes."; + summary = "High request latency from {{ $labels.from }} to {{ $labels.instance }}"; + description = "Request latency is above 0.5 seconds for the last 2 minutes."; }; } { alert = "VeryHighProbeLatency"; - expr = "probe_duration_seconds > 1"; + expr = "probe_duration_seconds > 2"; for = "3m"; labels = { severity = "critical"; }; annotations = { - summary = "High request latency on {{ $labels.instance }}"; - description = "Request latency is above 0.5 seconds for the last 3 minutes."; + summary = "Very high request latency from {{ $labels.from }} to {{ $labels.instance }}"; + description = "Request latency is above 2 seconds for the last 2 minutes."; }; } ]; } ); + + # mkResticScrapes = mkFunction () ; } diff --git a/overlays/my-lib/settings.nix b/overlays/my-lib/settings.nix new file mode 100644 index 0000000..b0cc0eb --- /dev/null +++ b/overlays/my-lib/settings.nix @@ -0,0 +1,5 @@ +{ + settings = { + alertmanagerPort = 9093; + }; +}