From 16a204faa732e2e2f1c3cf8f809ff1e1485158ac Mon Sep 17 00:00:00 2001 From: Maximilian Schlenz Date: Wed, 16 Jul 2025 11:07:08 +0200 Subject: [PATCH] add remaining observability resources --- example/main.tf | 52 ++++-- example/terraform.tfvars | 364 ++++++++++++++++++++++++------------- example/variables.tf | 137 ++++++++++++-- network/main.tf | 28 +-- network/variables.tf | 6 +- observability/main.tf | 51 ++++++ observability/output.tf | 0 observability/outputs.tf | 50 +++++ observability/providers.tf | 2 +- observability/variables.tf | 113 +++++++++++- project/output.tf | 4 +- 11 files changed, 616 insertions(+), 191 deletions(-) delete mode 100644 observability/output.tf create mode 100644 observability/outputs.tf diff --git a/example/main.tf b/example/main.tf index f5c620a..526054e 100644 --- a/example/main.tf +++ b/example/main.tf @@ -1,7 +1,7 @@ module "project" { source = "../project" - name = "project-123" + name = "project-123" labels = { "example" = "test" } @@ -53,7 +53,7 @@ module "net" { labels = each.value.labels # NIC options - nics = each.value.nics + nics = each.value.nics security_group_ids_by_name = local.security_group_ids_by_name } @@ -85,25 +85,37 @@ module "postgres" { # node_pools = each.value.node_pools # } -# module "observability" { -# source = "../observability" # path to the new module -# for_each = var.observability_instances +module "observability" { + source = "../observability" + for_each = var.observability_instances -# project_id = module.project.project_id + project_id = module.project.project_id -# # required -# name = each.value.name -# plan_name = each.value.plan_name + name = each.value.name + plan_name = each.value.plan_name -# # optionals -# acl = each.value.acl -# metrics_retention_days = each.value.metrics_retention_days -# metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling -# metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling -# alert_config = each.value.alert_config -# parameters = each.value.parameters + acl = each.value.acl + metrics_retention_days = each.value.metrics_retention_days + metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling + metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling + alert_config = each.value.alert_config + parameters = each.value.parameters -# # credentials -# create_credentials = each.value.create_credentials -# credentials_count = each.value.credentials_count -# } + # Credentials + create_credentials = each.value.create_credentials + credentials_count = each.value.credentials_count + + alertgroups = each.value.alertgroups + + logalertgroups = each.value.logalertgroups + + scrapeconfigs = each.value.scrapeconfigs +} + + +output "obs_url" { + value = { + for key, instance in module.observability : + key => instance.observability_urls + } +} \ No newline at end of file diff --git a/example/terraform.tfvars b/example/terraform.tfvars index d4e2d8f..718230f 100644 --- a/example/terraform.tfvars +++ b/example/terraform.tfvars @@ -5,24 +5,24 @@ organization_id = "03a34540-3c1a-4794-b2c6-7111ecf824ef" service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json" security_groups = { - ssh_ingress_group = { - name = "ssh-ingress-group" - description = "ALLOW SSH ingress" - rules = [ - { description = "SSH RULE 1" - direction = "ingress" - ether_type = "IPv4" - ip_range = "0.0.0.0/0" - protocol = { - name = "tcp" - } - port_range = { - min = 22 - max = 22 - } - }, - ] - }, + # ssh_ingress_group = { + # name = "ssh-ingress-group" + # description = "ALLOW SSH ingress" + # rules = [ + # { description = "SSH RULE 1" + # direction = "ingress" + # ether_type = "IPv4" + # ip_range = "0.0.0.0/0" + # protocol = { + # name = "tcp" + # } + # port_range = { + # min = 22 + # max = 22 + # } + # }, + # ] + # }, # web_traffic_group = { # name = "web-traffic-group" @@ -57,126 +57,234 @@ security_groups = { } postgres_instances = { - dev = { - name = "pg-test-instance" - version = 17 - flavor = { - cpu = 2, - ram = 4 - } - storage = { - class = "premium-perf6-stackit", - size = 20 - } - replicas = 1 - acl = ["0.0.0.0/0"] - backup_schedule = "00 00 * * *" + # dev = { + # name = "pg-test-instance" + # version = 17 + # flavor = { + # cpu = 2, + # ram = 4 + # } + # storage = { + # class = "premium-perf6-stackit", + # size = 20 + # } + # replicas = 1 + # acl = ["0.0.0.0/0"] + # backup_schedule = "00 00 * * *" - users = [ - { username = "adminusr", - roles = ["login", "createdb"] - }, - { username = "testusr", - roles = ["login"] - } - ] + # users = [ + # { username = "adminusr", + # roles = ["login", "createdb"] + # }, + # { username = "testusr", + # roles = ["login"] + # } + # ] - databases = [ - { - name = "testdb", - owner = "admin" - } - ] - } + # databases = [ + # { + # name = "testdb", + # owner = "admin" + # } + # ] + # } } networks = { - wan_network = { - name = "wan_network" - ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] - ipv4_prefix_length = 24 - ipv4_prefix = "10.219.0.0/24" - routed = true - } - lan_network1 = { - name = "lan_network1" - ipv4_prefix_length = 24 - ipv4_prefix = "10.220.1.0/24" - routed = true - nics = { - p2_lan1 = { - nic_name = "P2LAN1" - nic_ipv4 = "10.220.1.32" - nic_security = true - nic_security_group_names = ["ssh-ingress-group"] + # wan_network = { + # name = "wan_network" + # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] + # ipv4_prefix_length = 24 + # ipv4_prefix = "10.219.0.0/24" + # routed = true + # } + # lan_network1 = { + # name = "lan_network1" + # ipv4_prefix_length = 24 + # ipv4_prefix = "10.220.1.0/24" + # routed = true + # nics = { + # p2_lan1 = { + # nic_name = "P2LAN1" + # nic_ipv4 = "10.220.1.32" + # nic_security = true + # nic_security_group_names = ["ssh-ingress-group"] + # } + # } + # } + # lan_network2 = { + # name = "lan_network2" + # ipv4_prefix_length = 24 + # ipv4_prefix = "10.221.0.0/24" + # routed = true + # } + # lan_network3 = { + # name = "lan_network3" + # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] + # ipv4_prefix_length = 24 + # ipv4_prefix = "10.223.3.0/24" + # routed = true + # } + # wan = { + # name = "MGMT" + # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] + # ipv4_prefix_length = 24 + # nic_ipv4 = "10.224.0.254" + # } + + # db = { + # name = "db-net" + # nic_ipv4 = "10.0.0.126" + # nic_security = true + # } +} + +observability_instances = { + test = { + # Required + name = "test-observability" + plan_name = "Observability-Large-EU01" + + # Optional instance settings + acl = ["192.168.100.10/32", "203.0.113.5/32"] + metrics_retention_days = 30 + metrics_retention_days_5m_downsampling = 10 + metrics_retention_days_1h_downsampling = 5 + # parameters = { + # "custom_param" = "value" + # } + + # Credentials + create_credentials = true + credentials_count = 2 + + # alert‑groups + alertgroups = { + test_group = { + name = "example-alert-group" + interval = "60s" + rules = [ + { + alert = "example-alert-name" + expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0" + for = "60s" + labels = { + severity = "critical" + } + annotations = { + summary = "example summary" + description = "example description" + } + }, + { + alert = "example-alert-name-2" + expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0" + for = "1m" + labels = { + severity = "critical" + } + annotations = { + summary = "example summary" + description = "example description" + } + }, + ] + } + } + + # log‑alert‑groups + logalertgroups = { + example_log = { + name = "example-log-alert-group" + interval = "60m" + rules = [ + { + alert = "example-log-alert-name" + expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Simulated error message\" [1m])) > 0" + for = "60s" + labels = { + severity = "critical" + } + annotations = { + summary = "example summary" + description = "example description" + } + }, + { + alert = "example-log-alert-name-2" + expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Another error message\" [1m])) > 0" + for = "60s" + labels = { + severity = "critical" + } + annotations = { + summary = "example summary" + description = "example description" + } + }, + ] + } + } + + # scrapeconfigs + scrapeconfigs = { + example_job = { + name = "example-job" + metrics_path = "/my-metrics" + saml2 = { + enable_url_parameters = true + } + targets = [ + { + urls = ["url1", "urls2"] + labels = { + "url1" = "dev" + } + } + ] } } } - lan_network2 = { - name = "lan_network2" - ipv4_prefix_length = 24 - ipv4_prefix = "10.221.0.0/24" - routed = true - } - lan_network3 = { - name = "lan_network3" - ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] - ipv4_prefix_length = 24 - ipv4_prefix = "10.223.3.0/24" - routed = true - } - wan = { - name = "MGMT" - ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] - ipv4_prefix_length = 24 - nic_ipv4 = "10.224.0.254" - } - - db = { - name = "db-net" - nic_ipv4 = "10.0.0.126" - nic_security = true - } } -# ske_clusters = { -# dev = { -# name = "dev-cluster" -# kubernetes_version_min = "1.31" -# node_pools = [ -# { name = "default" -# machine_type = "c2.1" -# availability_zones = ["eu01-1", "eu01-2"] -# volume_size = 40 -# minimum = 1 -# maximum = 3 -# } -# ] -# } +ske_clusters = { + # dev = { + # name = "dev-cluster" + # kubernetes_version_min = "1.31" + # node_pools = [ + # { name = "default" + # machine_type = "c2.1" + # availability_zones = ["eu01-1", "eu01-2"] + # volume_size = 40 + # minimum = 1 + # maximum = 3 + # } + # ] + # } -# staging = { -# name = "staging-cluster" -# kubernetes_version_min = "1.31" -# node_pools = [ -# { name = "general" -# machine_type = "c2.2" -# availability_zones = ["eu03-1", "eu03-2"] -# volume_size = 80 -# minimum = 2 -# maximum = 4 -# } -# ] -# } -# } + # staging = { + # name = "staging-cluster" + # kubernetes_version_min = "1.31" + # node_pools = [ + # { name = "general" + # machine_type = "c2.2" + # availability_zones = ["eu03-1", "eu03-2"] + # volume_size = 80 + # minimum = 2 + # maximum = 4 + # } + # ] + # } + # } -# observability_instances = { -# starter = { -# name = "Observability-1" -# plan_name = "Observability-Starter-EU01" -# } + # observability_instances = { + # starter = { + # name = "Observability-1" + # plan_name = "Observability-Starter-EU01" + # } -# prod = { -# name = "Observability-2" -# plan_name = "Observability-Large-EU01" -# } -# } + # prod = { + # name = "Observability-2" + # plan_name = "Observability-Large-EU01" + # } +} \ No newline at end of file diff --git a/example/variables.tf b/example/variables.tf index d17e836..71ee2ea 100644 --- a/example/variables.tf +++ b/example/variables.tf @@ -116,24 +116,121 @@ variable "networks" { } -# variable "ske_clusters" { -# type = map(object({ -# name = string -# kubernetes_version_min = string -# node_pools = list(object({ -# name = string -# machine_type = string -# availability_zones = list(string) -# volume_size = number -# minimum = number -# maximum = number -# })) -# })) -# } +variable "ske_clusters" { + type = map(object({ + name = string + kubernetes_version_min = string + node_pools = list(object({ + name = string + machine_type = string + availability_zones = list(string) + volume_size = number + minimum = number + maximum = number + })) + })) +} -# variable "observability_instances" { -# type = map(object({ -# name = string -# plan_name = string -# })) -# } +variable "observability_instances" { + description = "Map of Observability instances to create" + type = map(object({ + # Required + name = string + plan_name = string + + # Optional instance settings + acl = optional(list(string)) + metrics_retention_days = optional(number) + metrics_retention_days_5m_downsampling = optional(number) + metrics_retention_days_1h_downsampling = optional(number) + alert_config = optional(any) + parameters = optional(map(string)) + + # Control credential creation + create_credentials = optional(bool, true) + credentials_count = optional(number, 1) + + # alert‑groups + alertgroups = optional(map(object({ + name = string + interval = optional(string) + rules = list(object({ + alert = string + expression = string + for = optional(string) + labels = optional(map(string)) + annotations = optional(map(string)) + })) + })), {}) + + # log‑alert‑groups + logalertgroups = optional(map(object({ + name = string + interval = optional(string) + rules = list(object({ + alert = string + expression = string + for = optional(string) + labels = optional(map(string)) + annotations = optional(map(string)) + })) + })), {}) + + # scrape‑configs + scrapeconfigs = optional(map(object({ + name = string + metrics_path = string + targets = list(object({ + urls = list(string) + labels = optional(map(string)) + })) + basic_auth = optional(object({ + username = string + password = string + })) + saml2 = optional(object({ + enable_url_parameters = optional(bool) + })) + sample_limit = optional(number) + scheme = optional(string) + scrape_interval = optional(string) + scrape_timeout = optional(string) + })), {}) + })) + default = {} + validation { + condition = alltrue([ + for k, v in var.observability_instances : + contains([ + "Observability-Medium-EU01", + "Observability-Monitoring-XL-EU01", + "Observability-Large-EU01", + "Observability-Monitoring-Basic-EU01", + "Observability-Monitoring-Large-EU01", + "Observability-Basic-EU01", + "Observability-Monitoring-Medium-EU01", + "Observability-Monitoring-XXL-EU01", + "Observability-Metrics-Endpoint-100k-EU01", + "Observability-Frontend-Starter-EU01", + "Observability-Monitoring-Starter-EU01", + "Observability-Starter-EU01", + ], v.plan_name) + ]) + error_message = <<-EOM +One or more observability_instances specify an invalid plan_name. +See the provider error output for the list of supported plans. Allowed values: + Observability-Medium-EU01 + Observability-Monitoring-XL-EU01 + Observability-Large-EU01 + Observability-Monitoring-Basic-EU01 + Observability-Monitoring-Large-EU01 + Observability-Basic-EU01 + Observability-Monitoring-Medium-EU01 + Observability-Monitoring-XXL-EU01 + Observability-Metrics-Endpoint-100k-EU01 + Observability-Frontend-Starter-EU01 + Observability-Monitoring-Starter-EU01 + Observability-Starter-EU01 + EOM + } +} diff --git a/network/main.tf b/network/main.tf index 0f7b638..fd6691d 100644 --- a/network/main.tf +++ b/network/main.tf @@ -1,7 +1,7 @@ resource "stackit_network" "this" { - project_id = var.project_id - name = var.name - labels = var.labels + project_id = var.project_id + name = var.name + labels = var.labels # IPv4 settings ipv4_gateway = var.ipv4_gateway @@ -15,9 +15,9 @@ resource "stackit_network" "this" { ipv6_prefix = var.ipv6_prefix ipv6_prefix_length = var.ipv6_prefix_length - no_ipv4_gateway = var.no_ipv4_gateway - no_ipv6_gateway = var.no_ipv6_gateway - routed = var.routed + no_ipv4_gateway = var.no_ipv4_gateway + no_ipv6_gateway = var.no_ipv6_gateway + routed = var.routed } resource "stackit_network_interface" "nics" { @@ -25,16 +25,16 @@ resource "stackit_network_interface" "nics" { project_id = var.project_id network_id = stackit_network.this.network_id - - name = each.value.nic_name - ipv4 = each.value.nic_ipv4 - allowed_addresses = each.value.nic_allowed_addresses - labels = each.value.nic_labels - security = each.value.nic_security + + name = each.value.nic_name + ipv4 = each.value.nic_ipv4 + allowed_addresses = each.value.nic_allowed_addresses + labels = each.value.nic_labels + security = each.value.nic_security security_group_ids = ( each.value.nic_security_group_ids != null ? each.value.nic_security_group_ids : - each.value.nic_security_group_names != null ? - [for name in each.value.nic_security_group_names : var.security_group_ids_by_name[name]] + each.value.nic_security_group_names != null ? + [for name in each.value.nic_security_group_names : var.security_group_ids_by_name[name]] : [] ) } diff --git a/network/variables.tf b/network/variables.tf index 68eb3a6..35787b4 100644 --- a/network/variables.tf +++ b/network/variables.tf @@ -17,12 +17,12 @@ variable "ipv4_nameservers" { } variable "ipv4_prefix" { - type = string # CIDR, only for NON-routed nets + type = string default = null } variable "ipv4_prefix_length" { - type = number # e.g. 24 + type = number default = null } @@ -62,7 +62,7 @@ variable "no_ipv6_gateway" { } variable "routed" { - type = bool + type = bool # default = true } diff --git a/observability/main.tf b/observability/main.tf index 1268066..080d9be 100644 --- a/observability/main.tf +++ b/observability/main.tf @@ -2,4 +2,55 @@ resource "stackit_observability_instance" "this" { project_id = var.project_id name = var.name plan_name = var.plan_name + + acl = var.acl + metrics_retention_days = var.metrics_retention_days + metrics_retention_days_5m_downsampling = var.metrics_retention_days_5m_downsampling + metrics_retention_days_1h_downsampling = var.metrics_retention_days_1h_downsampling + alert_config = var.alert_config + parameters = var.parameters +} + +resource "stackit_observability_credential" "this" { + count = var.create_credentials ? var.credentials_count : 0 + project_id = var.project_id + instance_id = stackit_observability_instance.this.instance_id +} + +resource "stackit_observability_alertgroup" "this" { + for_each = var.alertgroups + project_id = var.project_id + instance_id = stackit_observability_instance.this.instance_id + + name = each.value.name + interval = each.value.interval + rules = each.value.rules +} + +resource "stackit_observability_logalertgroup" "this" { + for_each = var.logalertgroups + project_id = var.project_id + instance_id = stackit_observability_instance.this.instance_id + + name = each.value.name + interval = each.value.interval + rules = each.value.rules +} + +resource "stackit_observability_scrapeconfig" "this" { + for_each = var.scrapeconfigs + project_id = var.project_id + instance_id = stackit_observability_instance.this.instance_id + name = each.value.name + metrics_path = each.value.metrics_path + + targets = each.value.targets + + basic_auth = each.value.basic_auth + saml2 = each.value.saml2 + + sample_limit = each.value.sample_limit + scheme = each.value.scheme + scrape_interval = each.value.scrape_interval + scrape_timeout = each.value.scrape_timeout } diff --git a/observability/output.tf b/observability/output.tf deleted file mode 100644 index e69de29..0000000 diff --git a/observability/outputs.tf b/observability/outputs.tf new file mode 100644 index 0000000..a097640 --- /dev/null +++ b/observability/outputs.tf @@ -0,0 +1,50 @@ +output "observability_id" { + description = "Observability instance ID" + value = stackit_observability_instance.this.instance_id +} + +output "observability_urls" { + description = "Key Observability URLs" + value = { + grafana = stackit_observability_instance.this.grafana_url + dashboard = stackit_observability_instance.this.dashboard_url + metrics = stackit_observability_instance.this.metrics_url + logs = stackit_observability_instance.this.logs_url + } +} + +output "observability_credentials" { + description = "List of credential objects (username & password)" + value = [ + for cred in stackit_observability_credential.this : + { + username = cred.username + password = cred.password + } + ] + sensitive = true +} + +output "observability_alertgroups" { + description = "Map of created Thanos alert-group IDs" + value = { + for key, alertgrp in stackit_observability_alertgroup.this : + key => alertgrp.id + } +} + +output "observability_logalertgroups" { + description = "Map of created Loki log-alert-group IDs" + value = { + for key, logalertgrp in stackit_observability_logalertgroup.this : + key => logalertgrp.id + } +} + +output "observability_scrapeconfigs" { + description = "Map of created scrape-config IDs" + value = { + for key, sconfig in stackit_observability_scrapeconfig.this : + key => sconfig.id + } +} diff --git a/observability/providers.tf b/observability/providers.tf index 8962cf6..a2af181 100644 --- a/observability/providers.tf +++ b/observability/providers.tf @@ -4,7 +4,7 @@ terraform { required_providers { stackit = { source = "stackitcloud/stackit" - version = "0.56.0" + version = "0.54.0" } } } diff --git a/observability/variables.tf b/observability/variables.tf index f3be80f..fb14907 100644 --- a/observability/variables.tf +++ b/observability/variables.tf @@ -1,11 +1,118 @@ variable "project_id" { - type = string + description = "STACKIT project ID" + type = string } variable "name" { - type = string + description = "The name of the Observability instance." + type = string } variable "plan_name" { - type = string + description = "Specifies the Observability plan." + type = string +} + +variable "acl" { + description = "Access control list (CIDR blocks) permitted to access this instance." + type = list(string) + default = null +} + +variable "metrics_retention_days" { + description = "How many days raw metrics are kept." + type = number + default = null +} + +variable "metrics_retention_days_5m_downsampling" { + description = "How many days 5m-downsampled metrics are kept." + type = number + default = null +} + +variable "metrics_retention_days_1h_downsampling" { + description = "How many days 1h-downsampled metrics are kept." + type = number + default = null +} + +variable "alert_config" { + description = "Complex Alertmanager configuration." + type = any + default = null +} + +variable "parameters" { + description = "Additional key/value parameters for the instance." + type = map(string) + default = null +} + +variable "create_credentials" { + description = "Whether to create credentials for this instance." + type = bool + default = true +} + +variable "credentials_count" { + description = "How many credentials to create when enabled." + type = number + default = 1 +} + +variable "alertgroups" { + description = "Map of alert-groups to create." + type = map(object({ + name = string + interval = optional(string) + rules = list(object({ + alert = string + expression = string + for = optional(string) + labels = optional(map(string)) + annotations = optional(map(string)) + })) + })) + default = {} +} + +variable "logalertgroups" { + description = "Map of log-alert-groups to create." + type = map(object({ + name = string + interval = optional(string) + rules = list(object({ + alert = string + expression = string + for = optional(string) + labels = optional(map(string)) + annotations = optional(map(string)) + })) + })) + default = {} +} + +variable "scrapeconfigs" { + description = "Map of scrape-configs to create." + type = map(object({ + name = string + metrics_path = string + targets = list(object({ + urls = list(string) + labels = optional(map(string)) + })) + basic_auth = optional(object({ + username = string + password = string + })) + saml2 = optional(object({ + enable_url_parameters = optional(bool) + })) + sample_limit = optional(number) + scheme = optional(string) + scrape_interval = optional(string) + scrape_timeout = optional(string) + })) + default = {} } diff --git a/project/output.tf b/project/output.tf index 09573a9..957559e 100644 --- a/project/output.tf +++ b/project/output.tf @@ -1,4 +1,4 @@ output "project_id" { value = stackit_resourcemanager_project.this.project_id - description = "ID of the project" -} \ No newline at end of file + description = "ID of the project" +}