add remaining observability resources

This commit is contained in:
Maximilian_Schlenz 2025-07-16 11:07:08 +02:00
parent 370b15a328
commit 16a204faa7
11 changed files with 616 additions and 191 deletions

View file

@ -85,25 +85,37 @@ module "postgres" {
# node_pools = each.value.node_pools # node_pools = each.value.node_pools
# } # }
# module "observability" { module "observability" {
# source = "../observability" # path to the new module source = "../observability"
# for_each = var.observability_instances for_each = var.observability_instances
# project_id = module.project.project_id project_id = module.project.project_id
# # required name = each.value.name
# name = each.value.name plan_name = each.value.plan_name
# plan_name = each.value.plan_name
# # optionals acl = each.value.acl
# acl = each.value.acl metrics_retention_days = each.value.metrics_retention_days
# metrics_retention_days = each.value.metrics_retention_days metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling
# metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling
# metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling alert_config = each.value.alert_config
# alert_config = each.value.alert_config parameters = each.value.parameters
# parameters = each.value.parameters
# # credentials # Credentials
# create_credentials = each.value.create_credentials create_credentials = each.value.create_credentials
# credentials_count = each.value.credentials_count credentials_count = each.value.credentials_count
# }
alertgroups = each.value.alertgroups
logalertgroups = each.value.logalertgroups
scrapeconfigs = each.value.scrapeconfigs
}
output "obs_url" {
value = {
for key, instance in module.observability :
key => instance.observability_urls
}
}

View file

@ -5,24 +5,24 @@ organization_id = "03a34540-3c1a-4794-b2c6-7111ecf824ef"
service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json" service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json"
security_groups = { security_groups = {
ssh_ingress_group = { # ssh_ingress_group = {
name = "ssh-ingress-group" # name = "ssh-ingress-group"
description = "ALLOW SSH ingress" # description = "ALLOW SSH ingress"
rules = [ # rules = [
{ description = "SSH RULE 1" # { description = "SSH RULE 1"
direction = "ingress" # direction = "ingress"
ether_type = "IPv4" # ether_type = "IPv4"
ip_range = "0.0.0.0/0" # ip_range = "0.0.0.0/0"
protocol = { # protocol = {
name = "tcp" # name = "tcp"
} # }
port_range = { # port_range = {
min = 22 # min = 22
max = 22 # max = 22
} # }
}, # },
] # ]
}, # },
# web_traffic_group = { # web_traffic_group = {
# name = "web-traffic-group" # name = "web-traffic-group"
@ -57,89 +57,197 @@ security_groups = {
} }
postgres_instances = { postgres_instances = {
dev = { # dev = {
name = "pg-test-instance" # name = "pg-test-instance"
version = 17 # version = 17
flavor = { # flavor = {
cpu = 2, # cpu = 2,
ram = 4 # ram = 4
} # }
storage = { # storage = {
class = "premium-perf6-stackit", # class = "premium-perf6-stackit",
size = 20 # size = 20
} # }
replicas = 1 # replicas = 1
acl = ["0.0.0.0/0"] # acl = ["0.0.0.0/0"]
backup_schedule = "00 00 * * *" # backup_schedule = "00 00 * * *"
users = [ # users = [
{ username = "adminusr", # { username = "adminusr",
roles = ["login", "createdb"] # roles = ["login", "createdb"]
}, # },
{ username = "testusr", # { username = "testusr",
roles = ["login"] # roles = ["login"]
} # }
] # ]
databases = [ # databases = [
{ # {
name = "testdb", # name = "testdb",
owner = "admin" # owner = "admin"
} # }
] # ]
} # }
} }
networks = { networks = {
wan_network = { # wan_network = {
name = "wan_network" # name = "wan_network"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.219.0.0/24" # ipv4_prefix = "10.219.0.0/24"
routed = true # routed = true
} # }
lan_network1 = { # lan_network1 = {
name = "lan_network1" # name = "lan_network1"
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.220.1.0/24" # ipv4_prefix = "10.220.1.0/24"
routed = true # routed = true
nics = { # nics = {
p2_lan1 = { # p2_lan1 = {
nic_name = "P2LAN1" # nic_name = "P2LAN1"
nic_ipv4 = "10.220.1.32" # nic_ipv4 = "10.220.1.32"
nic_security = true # nic_security = true
nic_security_group_names = ["ssh-ingress-group"] # nic_security_group_names = ["ssh-ingress-group"]
} # }
} # }
} # }
lan_network2 = { # lan_network2 = {
name = "lan_network2" # name = "lan_network2"
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.221.0.0/24" # ipv4_prefix = "10.221.0.0/24"
routed = true # routed = true
} # }
lan_network3 = { # lan_network3 = {
name = "lan_network3" # name = "lan_network3"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.223.3.0/24" # ipv4_prefix = "10.223.3.0/24"
routed = true # routed = true
} # }
wan = { # wan = {
name = "MGMT" # name = "MGMT"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
nic_ipv4 = "10.224.0.254" # nic_ipv4 = "10.224.0.254"
# }
# db = {
# name = "db-net"
# nic_ipv4 = "10.0.0.126"
# nic_security = true
# }
} }
db = { observability_instances = {
name = "db-net" test = {
nic_ipv4 = "10.0.0.126" # Required
nic_security = true name = "test-observability"
plan_name = "Observability-Large-EU01"
# Optional instance settings
acl = ["192.168.100.10/32", "203.0.113.5/32"]
metrics_retention_days = 30
metrics_retention_days_5m_downsampling = 10
metrics_retention_days_1h_downsampling = 5
# parameters = {
# "custom_param" = "value"
# }
# Credentials
create_credentials = true
credentials_count = 2
# alertgroups
alertgroups = {
test_group = {
name = "example-alert-group"
interval = "60s"
rules = [
{
alert = "example-alert-name"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-alert-name-2"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
} }
} }
# ske_clusters = { # logalertgroups
logalertgroups = {
example_log = {
name = "example-log-alert-group"
interval = "60m"
rules = [
{
alert = "example-log-alert-name"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Simulated error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-log-alert-name-2"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Another error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
}
}
# scrapeconfigs
scrapeconfigs = {
example_job = {
name = "example-job"
metrics_path = "/my-metrics"
saml2 = {
enable_url_parameters = true
}
targets = [
{
urls = ["url1", "urls2"]
labels = {
"url1" = "dev"
}
}
]
}
}
}
}
ske_clusters = {
# dev = { # dev = {
# name = "dev-cluster" # name = "dev-cluster"
# kubernetes_version_min = "1.31" # kubernetes_version_min = "1.31"
@ -179,4 +287,4 @@ networks = {
# name = "Observability-2" # name = "Observability-2"
# plan_name = "Observability-Large-EU01" # plan_name = "Observability-Large-EU01"
# } # }
# } }

View file

@ -116,24 +116,121 @@ variable "networks" {
} }
# variable "ske_clusters" { variable "ske_clusters" {
# type = map(object({ type = map(object({
# name = string name = string
# kubernetes_version_min = string kubernetes_version_min = string
# node_pools = list(object({ node_pools = list(object({
# name = string name = string
# machine_type = string machine_type = string
# availability_zones = list(string) availability_zones = list(string)
# volume_size = number volume_size = number
# minimum = number minimum = number
# maximum = number maximum = number
# })) }))
# })) }))
# } }
# variable "observability_instances" { variable "observability_instances" {
# type = map(object({ description = "Map of Observability instances to create"
# name = string type = map(object({
# plan_name = string # Required
# })) name = string
# } plan_name = string
# Optional instance settings
acl = optional(list(string))
metrics_retention_days = optional(number)
metrics_retention_days_5m_downsampling = optional(number)
metrics_retention_days_1h_downsampling = optional(number)
alert_config = optional(any)
parameters = optional(map(string))
# Control credential creation
create_credentials = optional(bool, true)
credentials_count = optional(number, 1)
# alertgroups
alertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# logalertgroups
logalertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# scrapeconfigs
scrapeconfigs = optional(map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
})), {})
}))
default = {}
validation {
condition = alltrue([
for k, v in var.observability_instances :
contains([
"Observability-Medium-EU01",
"Observability-Monitoring-XL-EU01",
"Observability-Large-EU01",
"Observability-Monitoring-Basic-EU01",
"Observability-Monitoring-Large-EU01",
"Observability-Basic-EU01",
"Observability-Monitoring-Medium-EU01",
"Observability-Monitoring-XXL-EU01",
"Observability-Metrics-Endpoint-100k-EU01",
"Observability-Frontend-Starter-EU01",
"Observability-Monitoring-Starter-EU01",
"Observability-Starter-EU01",
], v.plan_name)
])
error_message = <<-EOM
One or more observability_instances specify an invalid plan_name.
See the provider error output for the list of supported plans. Allowed values:
Observability-Medium-EU01
Observability-Monitoring-XL-EU01
Observability-Large-EU01
Observability-Monitoring-Basic-EU01
Observability-Monitoring-Large-EU01
Observability-Basic-EU01
Observability-Monitoring-Medium-EU01
Observability-Monitoring-XXL-EU01
Observability-Metrics-Endpoint-100k-EU01
Observability-Frontend-Starter-EU01
Observability-Monitoring-Starter-EU01
Observability-Starter-EU01
EOM
}
}

View file

@ -17,12 +17,12 @@ variable "ipv4_nameservers" {
} }
variable "ipv4_prefix" { variable "ipv4_prefix" {
type = string # CIDR, only for NON-routed nets type = string
default = null default = null
} }
variable "ipv4_prefix_length" { variable "ipv4_prefix_length" {
type = number # e.g. 24 type = number
default = null default = null
} }

View file

@ -2,4 +2,55 @@ resource "stackit_observability_instance" "this" {
project_id = var.project_id project_id = var.project_id
name = var.name name = var.name
plan_name = var.plan_name plan_name = var.plan_name
acl = var.acl
metrics_retention_days = var.metrics_retention_days
metrics_retention_days_5m_downsampling = var.metrics_retention_days_5m_downsampling
metrics_retention_days_1h_downsampling = var.metrics_retention_days_1h_downsampling
alert_config = var.alert_config
parameters = var.parameters
}
resource "stackit_observability_credential" "this" {
count = var.create_credentials ? var.credentials_count : 0
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
}
resource "stackit_observability_alertgroup" "this" {
for_each = var.alertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_logalertgroup" "this" {
for_each = var.logalertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_scrapeconfig" "this" {
for_each = var.scrapeconfigs
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
metrics_path = each.value.metrics_path
targets = each.value.targets
basic_auth = each.value.basic_auth
saml2 = each.value.saml2
sample_limit = each.value.sample_limit
scheme = each.value.scheme
scrape_interval = each.value.scrape_interval
scrape_timeout = each.value.scrape_timeout
} }

50
observability/outputs.tf Normal file
View file

@ -0,0 +1,50 @@
output "observability_id" {
description = "Observability instance ID"
value = stackit_observability_instance.this.instance_id
}
output "observability_urls" {
description = "Key Observability URLs"
value = {
grafana = stackit_observability_instance.this.grafana_url
dashboard = stackit_observability_instance.this.dashboard_url
metrics = stackit_observability_instance.this.metrics_url
logs = stackit_observability_instance.this.logs_url
}
}
output "observability_credentials" {
description = "List of credential objects (username & password)"
value = [
for cred in stackit_observability_credential.this :
{
username = cred.username
password = cred.password
}
]
sensitive = true
}
output "observability_alertgroups" {
description = "Map of created Thanos alert-group IDs"
value = {
for key, alertgrp in stackit_observability_alertgroup.this :
key => alertgrp.id
}
}
output "observability_logalertgroups" {
description = "Map of created Loki log-alert-group IDs"
value = {
for key, logalertgrp in stackit_observability_logalertgroup.this :
key => logalertgrp.id
}
}
output "observability_scrapeconfigs" {
description = "Map of created scrape-config IDs"
value = {
for key, sconfig in stackit_observability_scrapeconfig.this :
key => sconfig.id
}
}

View file

@ -4,7 +4,7 @@ terraform {
required_providers { required_providers {
stackit = { stackit = {
source = "stackitcloud/stackit" source = "stackitcloud/stackit"
version = "0.56.0" version = "0.54.0"
} }
} }
} }

View file

@ -1,11 +1,118 @@
variable "project_id" { variable "project_id" {
description = "STACKIT project ID"
type = string type = string
} }
variable "name" { variable "name" {
description = "The name of the Observability instance."
type = string type = string
} }
variable "plan_name" { variable "plan_name" {
description = "Specifies the Observability plan."
type = string type = string
} }
variable "acl" {
description = "Access control list (CIDR blocks) permitted to access this instance."
type = list(string)
default = null
}
variable "metrics_retention_days" {
description = "How many days raw metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_5m_downsampling" {
description = "How many days 5m-downsampled metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_1h_downsampling" {
description = "How many days 1h-downsampled metrics are kept."
type = number
default = null
}
variable "alert_config" {
description = "Complex Alertmanager configuration."
type = any
default = null
}
variable "parameters" {
description = "Additional key/value parameters for the instance."
type = map(string)
default = null
}
variable "create_credentials" {
description = "Whether to create credentials for this instance."
type = bool
default = true
}
variable "credentials_count" {
description = "How many credentials to create when enabled."
type = number
default = 1
}
variable "alertgroups" {
description = "Map of alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "logalertgroups" {
description = "Map of log-alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "scrapeconfigs" {
description = "Map of scrape-configs to create."
type = map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
}))
default = {}
}