add remaining observability resources

This commit is contained in:
Maximilian_Schlenz 2025-07-16 11:07:08 +02:00
parent 370b15a328
commit 16a204faa7
11 changed files with 616 additions and 191 deletions

View file

@ -1,7 +1,7 @@
module "project" { module "project" {
source = "../project" source = "../project"
name = "project-123" name = "project-123"
labels = { labels = {
"example" = "test" "example" = "test"
} }
@ -53,7 +53,7 @@ module "net" {
labels = each.value.labels labels = each.value.labels
# NIC options # NIC options
nics = each.value.nics nics = each.value.nics
security_group_ids_by_name = local.security_group_ids_by_name security_group_ids_by_name = local.security_group_ids_by_name
} }
@ -85,25 +85,37 @@ module "postgres" {
# node_pools = each.value.node_pools # node_pools = each.value.node_pools
# } # }
# module "observability" { module "observability" {
# source = "../observability" # path to the new module source = "../observability"
# for_each = var.observability_instances for_each = var.observability_instances
# project_id = module.project.project_id project_id = module.project.project_id
# # required name = each.value.name
# name = each.value.name plan_name = each.value.plan_name
# plan_name = each.value.plan_name
# # optionals acl = each.value.acl
# acl = each.value.acl metrics_retention_days = each.value.metrics_retention_days
# metrics_retention_days = each.value.metrics_retention_days metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling
# metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling
# metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling alert_config = each.value.alert_config
# alert_config = each.value.alert_config parameters = each.value.parameters
# parameters = each.value.parameters
# # credentials # Credentials
# create_credentials = each.value.create_credentials create_credentials = each.value.create_credentials
# credentials_count = each.value.credentials_count credentials_count = each.value.credentials_count
# }
alertgroups = each.value.alertgroups
logalertgroups = each.value.logalertgroups
scrapeconfigs = each.value.scrapeconfigs
}
output "obs_url" {
value = {
for key, instance in module.observability :
key => instance.observability_urls
}
}

View file

@ -5,24 +5,24 @@ organization_id = "03a34540-3c1a-4794-b2c6-7111ecf824ef"
service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json" service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json"
security_groups = { security_groups = {
ssh_ingress_group = { # ssh_ingress_group = {
name = "ssh-ingress-group" # name = "ssh-ingress-group"
description = "ALLOW SSH ingress" # description = "ALLOW SSH ingress"
rules = [ # rules = [
{ description = "SSH RULE 1" # { description = "SSH RULE 1"
direction = "ingress" # direction = "ingress"
ether_type = "IPv4" # ether_type = "IPv4"
ip_range = "0.0.0.0/0" # ip_range = "0.0.0.0/0"
protocol = { # protocol = {
name = "tcp" # name = "tcp"
} # }
port_range = { # port_range = {
min = 22 # min = 22
max = 22 # max = 22
} # }
}, # },
] # ]
}, # },
# web_traffic_group = { # web_traffic_group = {
# name = "web-traffic-group" # name = "web-traffic-group"
@ -57,126 +57,234 @@ security_groups = {
} }
postgres_instances = { postgres_instances = {
dev = { # dev = {
name = "pg-test-instance" # name = "pg-test-instance"
version = 17 # version = 17
flavor = { # flavor = {
cpu = 2, # cpu = 2,
ram = 4 # ram = 4
} # }
storage = { # storage = {
class = "premium-perf6-stackit", # class = "premium-perf6-stackit",
size = 20 # size = 20
} # }
replicas = 1 # replicas = 1
acl = ["0.0.0.0/0"] # acl = ["0.0.0.0/0"]
backup_schedule = "00 00 * * *" # backup_schedule = "00 00 * * *"
users = [ # users = [
{ username = "adminusr", # { username = "adminusr",
roles = ["login", "createdb"] # roles = ["login", "createdb"]
}, # },
{ username = "testusr", # { username = "testusr",
roles = ["login"] # roles = ["login"]
} # }
] # ]
databases = [ # databases = [
{ # {
name = "testdb", # name = "testdb",
owner = "admin" # owner = "admin"
} # }
] # ]
} # }
} }
networks = { networks = {
wan_network = { # wan_network = {
name = "wan_network" # name = "wan_network"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"] # ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.219.0.0/24" # ipv4_prefix = "10.219.0.0/24"
routed = true # routed = true
} # }
lan_network1 = { # lan_network1 = {
name = "lan_network1" # name = "lan_network1"
ipv4_prefix_length = 24 # ipv4_prefix_length = 24
ipv4_prefix = "10.220.1.0/24" # ipv4_prefix = "10.220.1.0/24"
routed = true # routed = true
nics = { # nics = {
p2_lan1 = { # p2_lan1 = {
nic_name = "P2LAN1" # nic_name = "P2LAN1"
nic_ipv4 = "10.220.1.32" # nic_ipv4 = "10.220.1.32"
nic_security = true # nic_security = true
nic_security_group_names = ["ssh-ingress-group"] # nic_security_group_names = ["ssh-ingress-group"]
# }
# }
# }
# lan_network2 = {
# name = "lan_network2"
# ipv4_prefix_length = 24
# ipv4_prefix = "10.221.0.0/24"
# routed = true
# }
# lan_network3 = {
# name = "lan_network3"
# ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
# ipv4_prefix_length = 24
# ipv4_prefix = "10.223.3.0/24"
# routed = true
# }
# wan = {
# name = "MGMT"
# ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
# ipv4_prefix_length = 24
# nic_ipv4 = "10.224.0.254"
# }
# db = {
# name = "db-net"
# nic_ipv4 = "10.0.0.126"
# nic_security = true
# }
}
observability_instances = {
test = {
# Required
name = "test-observability"
plan_name = "Observability-Large-EU01"
# Optional instance settings
acl = ["192.168.100.10/32", "203.0.113.5/32"]
metrics_retention_days = 30
metrics_retention_days_5m_downsampling = 10
metrics_retention_days_1h_downsampling = 5
# parameters = {
# "custom_param" = "value"
# }
# Credentials
create_credentials = true
credentials_count = 2
# alertgroups
alertgroups = {
test_group = {
name = "example-alert-group"
interval = "60s"
rules = [
{
alert = "example-alert-name"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-alert-name-2"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
}
}
# logalertgroups
logalertgroups = {
example_log = {
name = "example-log-alert-group"
interval = "60m"
rules = [
{
alert = "example-log-alert-name"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Simulated error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-log-alert-name-2"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Another error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
}
}
# scrapeconfigs
scrapeconfigs = {
example_job = {
name = "example-job"
metrics_path = "/my-metrics"
saml2 = {
enable_url_parameters = true
}
targets = [
{
urls = ["url1", "urls2"]
labels = {
"url1" = "dev"
}
}
]
} }
} }
} }
lan_network2 = {
name = "lan_network2"
ipv4_prefix_length = 24
ipv4_prefix = "10.221.0.0/24"
routed = true
}
lan_network3 = {
name = "lan_network3"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24
ipv4_prefix = "10.223.3.0/24"
routed = true
}
wan = {
name = "MGMT"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24
nic_ipv4 = "10.224.0.254"
}
db = {
name = "db-net"
nic_ipv4 = "10.0.0.126"
nic_security = true
}
} }
# ske_clusters = { ske_clusters = {
# dev = { # dev = {
# name = "dev-cluster" # name = "dev-cluster"
# kubernetes_version_min = "1.31" # kubernetes_version_min = "1.31"
# node_pools = [ # node_pools = [
# { name = "default" # { name = "default"
# machine_type = "c2.1" # machine_type = "c2.1"
# availability_zones = ["eu01-1", "eu01-2"] # availability_zones = ["eu01-1", "eu01-2"]
# volume_size = 40 # volume_size = 40
# minimum = 1 # minimum = 1
# maximum = 3 # maximum = 3
# } # }
# ] # ]
# } # }
# staging = { # staging = {
# name = "staging-cluster" # name = "staging-cluster"
# kubernetes_version_min = "1.31" # kubernetes_version_min = "1.31"
# node_pools = [ # node_pools = [
# { name = "general" # { name = "general"
# machine_type = "c2.2" # machine_type = "c2.2"
# availability_zones = ["eu03-1", "eu03-2"] # availability_zones = ["eu03-1", "eu03-2"]
# volume_size = 80 # volume_size = 80
# minimum = 2 # minimum = 2
# maximum = 4 # maximum = 4
# } # }
# ] # ]
# } # }
# } # }
# observability_instances = { # observability_instances = {
# starter = { # starter = {
# name = "Observability-1" # name = "Observability-1"
# plan_name = "Observability-Starter-EU01" # plan_name = "Observability-Starter-EU01"
# } # }
# prod = { # prod = {
# name = "Observability-2" # name = "Observability-2"
# plan_name = "Observability-Large-EU01" # plan_name = "Observability-Large-EU01"
# } # }
# } }

View file

@ -116,24 +116,121 @@ variable "networks" {
} }
# variable "ske_clusters" { variable "ske_clusters" {
# type = map(object({ type = map(object({
# name = string name = string
# kubernetes_version_min = string kubernetes_version_min = string
# node_pools = list(object({ node_pools = list(object({
# name = string name = string
# machine_type = string machine_type = string
# availability_zones = list(string) availability_zones = list(string)
# volume_size = number volume_size = number
# minimum = number minimum = number
# maximum = number maximum = number
# })) }))
# })) }))
# } }
# variable "observability_instances" { variable "observability_instances" {
# type = map(object({ description = "Map of Observability instances to create"
# name = string type = map(object({
# plan_name = string # Required
# })) name = string
# } plan_name = string
# Optional instance settings
acl = optional(list(string))
metrics_retention_days = optional(number)
metrics_retention_days_5m_downsampling = optional(number)
metrics_retention_days_1h_downsampling = optional(number)
alert_config = optional(any)
parameters = optional(map(string))
# Control credential creation
create_credentials = optional(bool, true)
credentials_count = optional(number, 1)
# alertgroups
alertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# logalertgroups
logalertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# scrapeconfigs
scrapeconfigs = optional(map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
})), {})
}))
default = {}
validation {
condition = alltrue([
for k, v in var.observability_instances :
contains([
"Observability-Medium-EU01",
"Observability-Monitoring-XL-EU01",
"Observability-Large-EU01",
"Observability-Monitoring-Basic-EU01",
"Observability-Monitoring-Large-EU01",
"Observability-Basic-EU01",
"Observability-Monitoring-Medium-EU01",
"Observability-Monitoring-XXL-EU01",
"Observability-Metrics-Endpoint-100k-EU01",
"Observability-Frontend-Starter-EU01",
"Observability-Monitoring-Starter-EU01",
"Observability-Starter-EU01",
], v.plan_name)
])
error_message = <<-EOM
One or more observability_instances specify an invalid plan_name.
See the provider error output for the list of supported plans. Allowed values:
Observability-Medium-EU01
Observability-Monitoring-XL-EU01
Observability-Large-EU01
Observability-Monitoring-Basic-EU01
Observability-Monitoring-Large-EU01
Observability-Basic-EU01
Observability-Monitoring-Medium-EU01
Observability-Monitoring-XXL-EU01
Observability-Metrics-Endpoint-100k-EU01
Observability-Frontend-Starter-EU01
Observability-Monitoring-Starter-EU01
Observability-Starter-EU01
EOM
}
}

View file

@ -1,7 +1,7 @@
resource "stackit_network" "this" { resource "stackit_network" "this" {
project_id = var.project_id project_id = var.project_id
name = var.name name = var.name
labels = var.labels labels = var.labels
# IPv4 settings # IPv4 settings
ipv4_gateway = var.ipv4_gateway ipv4_gateway = var.ipv4_gateway
@ -15,9 +15,9 @@ resource "stackit_network" "this" {
ipv6_prefix = var.ipv6_prefix ipv6_prefix = var.ipv6_prefix
ipv6_prefix_length = var.ipv6_prefix_length ipv6_prefix_length = var.ipv6_prefix_length
no_ipv4_gateway = var.no_ipv4_gateway no_ipv4_gateway = var.no_ipv4_gateway
no_ipv6_gateway = var.no_ipv6_gateway no_ipv6_gateway = var.no_ipv6_gateway
routed = var.routed routed = var.routed
} }
resource "stackit_network_interface" "nics" { resource "stackit_network_interface" "nics" {
@ -26,11 +26,11 @@ resource "stackit_network_interface" "nics" {
project_id = var.project_id project_id = var.project_id
network_id = stackit_network.this.network_id network_id = stackit_network.this.network_id
name = each.value.nic_name name = each.value.nic_name
ipv4 = each.value.nic_ipv4 ipv4 = each.value.nic_ipv4
allowed_addresses = each.value.nic_allowed_addresses allowed_addresses = each.value.nic_allowed_addresses
labels = each.value.nic_labels labels = each.value.nic_labels
security = each.value.nic_security security = each.value.nic_security
security_group_ids = ( security_group_ids = (
each.value.nic_security_group_ids != null ? each.value.nic_security_group_ids : each.value.nic_security_group_ids != null ? each.value.nic_security_group_ids :
each.value.nic_security_group_names != null ? each.value.nic_security_group_names != null ?

View file

@ -17,12 +17,12 @@ variable "ipv4_nameservers" {
} }
variable "ipv4_prefix" { variable "ipv4_prefix" {
type = string # CIDR, only for NON-routed nets type = string
default = null default = null
} }
variable "ipv4_prefix_length" { variable "ipv4_prefix_length" {
type = number # e.g. 24 type = number
default = null default = null
} }
@ -62,7 +62,7 @@ variable "no_ipv6_gateway" {
} }
variable "routed" { variable "routed" {
type = bool type = bool
# default = true # default = true
} }

View file

@ -2,4 +2,55 @@ resource "stackit_observability_instance" "this" {
project_id = var.project_id project_id = var.project_id
name = var.name name = var.name
plan_name = var.plan_name plan_name = var.plan_name
acl = var.acl
metrics_retention_days = var.metrics_retention_days
metrics_retention_days_5m_downsampling = var.metrics_retention_days_5m_downsampling
metrics_retention_days_1h_downsampling = var.metrics_retention_days_1h_downsampling
alert_config = var.alert_config
parameters = var.parameters
}
resource "stackit_observability_credential" "this" {
count = var.create_credentials ? var.credentials_count : 0
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
}
resource "stackit_observability_alertgroup" "this" {
for_each = var.alertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_logalertgroup" "this" {
for_each = var.logalertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_scrapeconfig" "this" {
for_each = var.scrapeconfigs
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
metrics_path = each.value.metrics_path
targets = each.value.targets
basic_auth = each.value.basic_auth
saml2 = each.value.saml2
sample_limit = each.value.sample_limit
scheme = each.value.scheme
scrape_interval = each.value.scrape_interval
scrape_timeout = each.value.scrape_timeout
} }

50
observability/outputs.tf Normal file
View file

@ -0,0 +1,50 @@
output "observability_id" {
description = "Observability instance ID"
value = stackit_observability_instance.this.instance_id
}
output "observability_urls" {
description = "Key Observability URLs"
value = {
grafana = stackit_observability_instance.this.grafana_url
dashboard = stackit_observability_instance.this.dashboard_url
metrics = stackit_observability_instance.this.metrics_url
logs = stackit_observability_instance.this.logs_url
}
}
output "observability_credentials" {
description = "List of credential objects (username & password)"
value = [
for cred in stackit_observability_credential.this :
{
username = cred.username
password = cred.password
}
]
sensitive = true
}
output "observability_alertgroups" {
description = "Map of created Thanos alert-group IDs"
value = {
for key, alertgrp in stackit_observability_alertgroup.this :
key => alertgrp.id
}
}
output "observability_logalertgroups" {
description = "Map of created Loki log-alert-group IDs"
value = {
for key, logalertgrp in stackit_observability_logalertgroup.this :
key => logalertgrp.id
}
}
output "observability_scrapeconfigs" {
description = "Map of created scrape-config IDs"
value = {
for key, sconfig in stackit_observability_scrapeconfig.this :
key => sconfig.id
}
}

View file

@ -4,7 +4,7 @@ terraform {
required_providers { required_providers {
stackit = { stackit = {
source = "stackitcloud/stackit" source = "stackitcloud/stackit"
version = "0.56.0" version = "0.54.0"
} }
} }
} }

View file

@ -1,11 +1,118 @@
variable "project_id" { variable "project_id" {
type = string description = "STACKIT project ID"
type = string
} }
variable "name" { variable "name" {
type = string description = "The name of the Observability instance."
type = string
} }
variable "plan_name" { variable "plan_name" {
type = string description = "Specifies the Observability plan."
type = string
}
variable "acl" {
description = "Access control list (CIDR blocks) permitted to access this instance."
type = list(string)
default = null
}
variable "metrics_retention_days" {
description = "How many days raw metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_5m_downsampling" {
description = "How many days 5m-downsampled metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_1h_downsampling" {
description = "How many days 1h-downsampled metrics are kept."
type = number
default = null
}
variable "alert_config" {
description = "Complex Alertmanager configuration."
type = any
default = null
}
variable "parameters" {
description = "Additional key/value parameters for the instance."
type = map(string)
default = null
}
variable "create_credentials" {
description = "Whether to create credentials for this instance."
type = bool
default = true
}
variable "credentials_count" {
description = "How many credentials to create when enabled."
type = number
default = 1
}
variable "alertgroups" {
description = "Map of alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "logalertgroups" {
description = "Map of log-alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "scrapeconfigs" {
description = "Map of scrape-configs to create."
type = map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
}))
default = {}
} }