diff --git a/flake.nix b/flake.nix index ca5d07c..b9b3f61 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,7 @@ ./modules/users.nix ./modules/ssh.nix ./modules/monitoring.nix + ./modules/influxdb.nix ]; in { nixosConfigurations = { diff --git a/hosts/skydick/datapool.nix b/hosts/skydick/datapool.nix index c1bdebf..13bbb5d 100644 --- a/hosts/skydick/datapool.nix +++ b/hosts/skydick/datapool.nix @@ -94,6 +94,7 @@ # SYSTEM # dick/system (canmount=off) namespace root # dick/system/backup /srv/system/backup 1M zstd-3 archival backups +# dick/system/influxdb /srv/system/influxdb 128K zstd InfluxDB v2 time-series data (fleet monitoring) # dick/system/vm /srv/system/vm 64K zstd central VM filesystem root / parent for zvol children # dick/templates/vm /srv/templates/vm 64K zstd shared read-only VM base images # @@ -142,6 +143,8 @@ # System: # zfs create -o mountpoint=none -o canmount=off dick/system # zfs create -o recordsize=1M -o compression=zstd-3 -o mountpoint=/srv/system/backup dick/system/backup +# zfs create -o recordsize=128K -o mountpoint=/srv/system/influxdb -o quota=500G dick/system/influxdb +# chown influxdb:influxdb /srv/system/influxdb # zfs create -o recordsize=64K -o mountpoint=/srv/system/vm dick/system/vm # mkdir -p /srv/system/vm/files # zfs create -o recordsize=64K -o readonly=on -o mountpoint=/srv/templates/vm dick/templates/vm diff --git a/hosts/skydick/default.nix b/hosts/skydick/default.nix index 24579da..0e91e0b 100644 --- a/hosts/skydick/default.nix +++ b/hosts/skydick/default.nix @@ -317,10 +317,13 @@ ]; # ========================================================================== - # TELEGRAF MONITORING + # INFLUXDB + TELEGRAF MONITORING # ========================================================================== + skyworks.influxdb.enable = true; + skyworks.monitoring = { enable = true; + influxUrl = "http://127.0.0.1:8086"; bucket = "skydick"; netInterfaces = [ "bond0" ]; }; diff --git a/modules/influxdb.nix b/modules/influxdb.nix new file mode 100644 index 0000000..be01679 --- /dev/null +++ b/modules/influxdb.nix @@ -0,0 +1,64 @@ +# InfluxDB v2 time-series database for infrastructure monitoring. +# +# Data lives on ZFS (dick/system/influxdb) for compression, snapshots, +# and infinite retention. All Telegraf instances across the fleet write +# here; Grafana on door1 queries the HTTP API. +# +# ZFS dataset (create once, manually): +# zfs create -o recordsize=128K -o mountpoint=/srv/system/influxdb \ +# -o quota=500G dick/system/influxdb +# chown influxdb:influxdb /srv/system/influxdb +# +{ config, lib, pkgs, ... }: + +let + cfg = config.skyworks.influxdb; +in { + options.skyworks.influxdb = { + enable = lib.mkEnableOption "InfluxDB v2 on ZFS-backed storage"; + + dataDir = lib.mkOption { + type = lib.types.path; + default = "/srv/system/influxdb"; + description = "InfluxDB data directory (should be a ZFS dataset)"; + }; + + port = lib.mkOption { + type = lib.types.port; + default = 8086; + description = "InfluxDB HTTP API listen port"; + }; + }; + + config = lib.mkIf cfg.enable { + services.influxdb2 = { + enable = true; + settings = { + http-bind-address = ":${toString cfg.port}"; + }; + }; + + # Bind-mount the ZFS dataset over the default StateDirectory so + # NixOS's influxdb2 module manages bolt/engine/sqlite paths as + # usual, but the actual data lands on ZFS. + fileSystems."/var/lib/influxdb2" = { + device = cfg.dataDir; + fsType = "none"; + options = [ "bind" ]; + }; + + # Ensure the ZFS mountpoint directory exists + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir} 0700 influxdb influxdb -" + ]; + + # Wait for ZFS to mount the dataset before starting InfluxDB + systemd.services.influxdb2 = { + after = [ "zfs-mount.service" ]; + wants = [ "zfs-mount.service" ]; + }; + + # Open firewall for internal network only + networking.firewall.allowedTCPPorts = [ cfg.port ]; + }; +} diff --git a/modules/monitoring.nix b/modules/monitoring.nix index f12c4b2..1723107 100644 --- a/modules/monitoring.nix +++ b/modules/monitoring.nix @@ -17,11 +17,16 @@ ''; in { options.skyworks.monitoring = { - enable = lib.mkEnableOption "Telegraf monitoring to door1 InfluxDB"; + enable = lib.mkEnableOption "Telegraf monitoring to InfluxDB"; bucket = lib.mkOption { type = lib.types.str; description = "InfluxDB bucket name"; }; + influxUrl = lib.mkOption { + type = lib.types.str; + default = "http://10.0.1.1:8086"; + description = "InfluxDB v2 HTTP API URL"; + }; netInterfaces = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ "*" ]; @@ -54,7 +59,7 @@ }; outputs.influxdb_v2 = [{ - urls = [ "http://10.0.91.30:8086" ]; + urls = [ cfg.influxUrl ]; token = "$INFLUX_TOKEN"; organization = "door1"; bucket = cfg.bucket;