add remaining observability resources

This commit is contained in:
Maximilian_Schlenz 2025-07-16 11:07:08 +02:00
parent 370b15a328
commit 16a204faa7
11 changed files with 616 additions and 191 deletions

View file

@ -1,7 +1,7 @@
module "project" {
source = "../project"
name = "project-123"
name = "project-123"
labels = {
"example" = "test"
}
@ -53,7 +53,7 @@ module "net" {
labels = each.value.labels
# NIC options
nics = each.value.nics
nics = each.value.nics
security_group_ids_by_name = local.security_group_ids_by_name
}
@ -85,25 +85,37 @@ module "postgres" {
# node_pools = each.value.node_pools
# }
# module "observability" {
# source = "../observability" # path to the new module
# for_each = var.observability_instances
module "observability" {
source = "../observability"
for_each = var.observability_instances
# project_id = module.project.project_id
project_id = module.project.project_id
# # required
# name = each.value.name
# plan_name = each.value.plan_name
name = each.value.name
plan_name = each.value.plan_name
# # optionals
# acl = each.value.acl
# metrics_retention_days = each.value.metrics_retention_days
# metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling
# metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling
# alert_config = each.value.alert_config
# parameters = each.value.parameters
acl = each.value.acl
metrics_retention_days = each.value.metrics_retention_days
metrics_retention_days_5m_downsampling = each.value.metrics_retention_days_5m_downsampling
metrics_retention_days_1h_downsampling = each.value.metrics_retention_days_1h_downsampling
alert_config = each.value.alert_config
parameters = each.value.parameters
# # credentials
# create_credentials = each.value.create_credentials
# credentials_count = each.value.credentials_count
# }
# Credentials
create_credentials = each.value.create_credentials
credentials_count = each.value.credentials_count
alertgroups = each.value.alertgroups
logalertgroups = each.value.logalertgroups
scrapeconfigs = each.value.scrapeconfigs
}
output "obs_url" {
value = {
for key, instance in module.observability :
key => instance.observability_urls
}
}

View file

@ -5,24 +5,24 @@ organization_id = "03a34540-3c1a-4794-b2c6-7111ecf824ef"
service_account_key_path = "/Users/schlenz/sa-key-dd5fa2c9-1651-4da7-8404-9ac4fe9bc3d5.json"
security_groups = {
ssh_ingress_group = {
name = "ssh-ingress-group"
description = "ALLOW SSH ingress"
rules = [
{ description = "SSH RULE 1"
direction = "ingress"
ether_type = "IPv4"
ip_range = "0.0.0.0/0"
protocol = {
name = "tcp"
}
port_range = {
min = 22
max = 22
}
},
]
},
# ssh_ingress_group = {
# name = "ssh-ingress-group"
# description = "ALLOW SSH ingress"
# rules = [
# { description = "SSH RULE 1"
# direction = "ingress"
# ether_type = "IPv4"
# ip_range = "0.0.0.0/0"
# protocol = {
# name = "tcp"
# }
# port_range = {
# min = 22
# max = 22
# }
# },
# ]
# },
# web_traffic_group = {
# name = "web-traffic-group"
@ -57,126 +57,234 @@ security_groups = {
}
postgres_instances = {
dev = {
name = "pg-test-instance"
version = 17
flavor = {
cpu = 2,
ram = 4
}
storage = {
class = "premium-perf6-stackit",
size = 20
}
replicas = 1
acl = ["0.0.0.0/0"]
backup_schedule = "00 00 * * *"
# dev = {
# name = "pg-test-instance"
# version = 17
# flavor = {
# cpu = 2,
# ram = 4
# }
# storage = {
# class = "premium-perf6-stackit",
# size = 20
# }
# replicas = 1
# acl = ["0.0.0.0/0"]
# backup_schedule = "00 00 * * *"
users = [
{ username = "adminusr",
roles = ["login", "createdb"]
},
{ username = "testusr",
roles = ["login"]
}
]
# users = [
# { username = "adminusr",
# roles = ["login", "createdb"]
# },
# { username = "testusr",
# roles = ["login"]
# }
# ]
databases = [
{
name = "testdb",
owner = "admin"
}
]
}
# databases = [
# {
# name = "testdb",
# owner = "admin"
# }
# ]
# }
}
networks = {
wan_network = {
name = "wan_network"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24
ipv4_prefix = "10.219.0.0/24"
routed = true
}
lan_network1 = {
name = "lan_network1"
ipv4_prefix_length = 24
ipv4_prefix = "10.220.1.0/24"
routed = true
nics = {
p2_lan1 = {
nic_name = "P2LAN1"
nic_ipv4 = "10.220.1.32"
nic_security = true
nic_security_group_names = ["ssh-ingress-group"]
# wan_network = {
# name = "wan_network"
# ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
# ipv4_prefix_length = 24
# ipv4_prefix = "10.219.0.0/24"
# routed = true
# }
# lan_network1 = {
# name = "lan_network1"
# ipv4_prefix_length = 24
# ipv4_prefix = "10.220.1.0/24"
# routed = true
# nics = {
# p2_lan1 = {
# nic_name = "P2LAN1"
# nic_ipv4 = "10.220.1.32"
# nic_security = true
# nic_security_group_names = ["ssh-ingress-group"]
# }
# }
# }
# lan_network2 = {
# name = "lan_network2"
# ipv4_prefix_length = 24
# ipv4_prefix = "10.221.0.0/24"
# routed = true
# }
# lan_network3 = {
# name = "lan_network3"
# ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
# ipv4_prefix_length = 24
# ipv4_prefix = "10.223.3.0/24"
# routed = true
# }
# wan = {
# name = "MGMT"
# ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
# ipv4_prefix_length = 24
# nic_ipv4 = "10.224.0.254"
# }
# db = {
# name = "db-net"
# nic_ipv4 = "10.0.0.126"
# nic_security = true
# }
}
observability_instances = {
test = {
# Required
name = "test-observability"
plan_name = "Observability-Large-EU01"
# Optional instance settings
acl = ["192.168.100.10/32", "203.0.113.5/32"]
metrics_retention_days = 30
metrics_retention_days_5m_downsampling = 10
metrics_retention_days_1h_downsampling = 5
# parameters = {
# "custom_param" = "value"
# }
# Credentials
create_credentials = true
credentials_count = 2
# alertgroups
alertgroups = {
test_group = {
name = "example-alert-group"
interval = "60s"
rules = [
{
alert = "example-alert-name"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-alert-name-2"
expression = "kube_node_status_condition{condition=\"Ready\", status=\"false\"} > 0"
for = "1m"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
}
}
# logalertgroups
logalertgroups = {
example_log = {
name = "example-log-alert-group"
interval = "60m"
rules = [
{
alert = "example-log-alert-name"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Simulated error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
{
alert = "example-log-alert-name-2"
expression = "sum(rate({namespace=\"example\", pod=\"logger\"} |= \"Another error message\" [1m])) > 0"
for = "60s"
labels = {
severity = "critical"
}
annotations = {
summary = "example summary"
description = "example description"
}
},
]
}
}
# scrapeconfigs
scrapeconfigs = {
example_job = {
name = "example-job"
metrics_path = "/my-metrics"
saml2 = {
enable_url_parameters = true
}
targets = [
{
urls = ["url1", "urls2"]
labels = {
"url1" = "dev"
}
}
]
}
}
}
lan_network2 = {
name = "lan_network2"
ipv4_prefix_length = 24
ipv4_prefix = "10.221.0.0/24"
routed = true
}
lan_network3 = {
name = "lan_network3"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24
ipv4_prefix = "10.223.3.0/24"
routed = true
}
wan = {
name = "MGMT"
ipv4_nameservers = ["1.1.1.1", "8.8.8.8"]
ipv4_prefix_length = 24
nic_ipv4 = "10.224.0.254"
}
db = {
name = "db-net"
nic_ipv4 = "10.0.0.126"
nic_security = true
}
}
# ske_clusters = {
# dev = {
# name = "dev-cluster"
# kubernetes_version_min = "1.31"
# node_pools = [
# { name = "default"
# machine_type = "c2.1"
# availability_zones = ["eu01-1", "eu01-2"]
# volume_size = 40
# minimum = 1
# maximum = 3
# }
# ]
# }
ske_clusters = {
# dev = {
# name = "dev-cluster"
# kubernetes_version_min = "1.31"
# node_pools = [
# { name = "default"
# machine_type = "c2.1"
# availability_zones = ["eu01-1", "eu01-2"]
# volume_size = 40
# minimum = 1
# maximum = 3
# }
# ]
# }
# staging = {
# name = "staging-cluster"
# kubernetes_version_min = "1.31"
# node_pools = [
# { name = "general"
# machine_type = "c2.2"
# availability_zones = ["eu03-1", "eu03-2"]
# volume_size = 80
# minimum = 2
# maximum = 4
# }
# ]
# }
# }
# staging = {
# name = "staging-cluster"
# kubernetes_version_min = "1.31"
# node_pools = [
# { name = "general"
# machine_type = "c2.2"
# availability_zones = ["eu03-1", "eu03-2"]
# volume_size = 80
# minimum = 2
# maximum = 4
# }
# ]
# }
# }
# observability_instances = {
# starter = {
# name = "Observability-1"
# plan_name = "Observability-Starter-EU01"
# }
# observability_instances = {
# starter = {
# name = "Observability-1"
# plan_name = "Observability-Starter-EU01"
# }
# prod = {
# name = "Observability-2"
# plan_name = "Observability-Large-EU01"
# }
# }
# prod = {
# name = "Observability-2"
# plan_name = "Observability-Large-EU01"
# }
}

View file

@ -116,24 +116,121 @@ variable "networks" {
}
# variable "ske_clusters" {
# type = map(object({
# name = string
# kubernetes_version_min = string
# node_pools = list(object({
# name = string
# machine_type = string
# availability_zones = list(string)
# volume_size = number
# minimum = number
# maximum = number
# }))
# }))
# }
variable "ske_clusters" {
type = map(object({
name = string
kubernetes_version_min = string
node_pools = list(object({
name = string
machine_type = string
availability_zones = list(string)
volume_size = number
minimum = number
maximum = number
}))
}))
}
# variable "observability_instances" {
# type = map(object({
# name = string
# plan_name = string
# }))
# }
variable "observability_instances" {
description = "Map of Observability instances to create"
type = map(object({
# Required
name = string
plan_name = string
# Optional instance settings
acl = optional(list(string))
metrics_retention_days = optional(number)
metrics_retention_days_5m_downsampling = optional(number)
metrics_retention_days_1h_downsampling = optional(number)
alert_config = optional(any)
parameters = optional(map(string))
# Control credential creation
create_credentials = optional(bool, true)
credentials_count = optional(number, 1)
# alertgroups
alertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# logalertgroups
logalertgroups = optional(map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
})), {})
# scrapeconfigs
scrapeconfigs = optional(map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
})), {})
}))
default = {}
validation {
condition = alltrue([
for k, v in var.observability_instances :
contains([
"Observability-Medium-EU01",
"Observability-Monitoring-XL-EU01",
"Observability-Large-EU01",
"Observability-Monitoring-Basic-EU01",
"Observability-Monitoring-Large-EU01",
"Observability-Basic-EU01",
"Observability-Monitoring-Medium-EU01",
"Observability-Monitoring-XXL-EU01",
"Observability-Metrics-Endpoint-100k-EU01",
"Observability-Frontend-Starter-EU01",
"Observability-Monitoring-Starter-EU01",
"Observability-Starter-EU01",
], v.plan_name)
])
error_message = <<-EOM
One or more observability_instances specify an invalid plan_name.
See the provider error output for the list of supported plans. Allowed values:
Observability-Medium-EU01
Observability-Monitoring-XL-EU01
Observability-Large-EU01
Observability-Monitoring-Basic-EU01
Observability-Monitoring-Large-EU01
Observability-Basic-EU01
Observability-Monitoring-Medium-EU01
Observability-Monitoring-XXL-EU01
Observability-Metrics-Endpoint-100k-EU01
Observability-Frontend-Starter-EU01
Observability-Monitoring-Starter-EU01
Observability-Starter-EU01
EOM
}
}

View file

@ -1,7 +1,7 @@
resource "stackit_network" "this" {
project_id = var.project_id
name = var.name
labels = var.labels
project_id = var.project_id
name = var.name
labels = var.labels
# IPv4 settings
ipv4_gateway = var.ipv4_gateway
@ -15,9 +15,9 @@ resource "stackit_network" "this" {
ipv6_prefix = var.ipv6_prefix
ipv6_prefix_length = var.ipv6_prefix_length
no_ipv4_gateway = var.no_ipv4_gateway
no_ipv6_gateway = var.no_ipv6_gateway
routed = var.routed
no_ipv4_gateway = var.no_ipv4_gateway
no_ipv6_gateway = var.no_ipv6_gateway
routed = var.routed
}
resource "stackit_network_interface" "nics" {
@ -25,16 +25,16 @@ resource "stackit_network_interface" "nics" {
project_id = var.project_id
network_id = stackit_network.this.network_id
name = each.value.nic_name
ipv4 = each.value.nic_ipv4
allowed_addresses = each.value.nic_allowed_addresses
labels = each.value.nic_labels
security = each.value.nic_security
name = each.value.nic_name
ipv4 = each.value.nic_ipv4
allowed_addresses = each.value.nic_allowed_addresses
labels = each.value.nic_labels
security = each.value.nic_security
security_group_ids = (
each.value.nic_security_group_ids != null ? each.value.nic_security_group_ids :
each.value.nic_security_group_names != null ?
[for name in each.value.nic_security_group_names : var.security_group_ids_by_name[name]]
each.value.nic_security_group_names != null ?
[for name in each.value.nic_security_group_names : var.security_group_ids_by_name[name]]
: []
)
}

View file

@ -17,12 +17,12 @@ variable "ipv4_nameservers" {
}
variable "ipv4_prefix" {
type = string # CIDR, only for NON-routed nets
type = string
default = null
}
variable "ipv4_prefix_length" {
type = number # e.g. 24
type = number
default = null
}
@ -62,7 +62,7 @@ variable "no_ipv6_gateway" {
}
variable "routed" {
type = bool
type = bool
# default = true
}

View file

@ -2,4 +2,55 @@ resource "stackit_observability_instance" "this" {
project_id = var.project_id
name = var.name
plan_name = var.plan_name
acl = var.acl
metrics_retention_days = var.metrics_retention_days
metrics_retention_days_5m_downsampling = var.metrics_retention_days_5m_downsampling
metrics_retention_days_1h_downsampling = var.metrics_retention_days_1h_downsampling
alert_config = var.alert_config
parameters = var.parameters
}
resource "stackit_observability_credential" "this" {
count = var.create_credentials ? var.credentials_count : 0
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
}
resource "stackit_observability_alertgroup" "this" {
for_each = var.alertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_logalertgroup" "this" {
for_each = var.logalertgroups
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
interval = each.value.interval
rules = each.value.rules
}
resource "stackit_observability_scrapeconfig" "this" {
for_each = var.scrapeconfigs
project_id = var.project_id
instance_id = stackit_observability_instance.this.instance_id
name = each.value.name
metrics_path = each.value.metrics_path
targets = each.value.targets
basic_auth = each.value.basic_auth
saml2 = each.value.saml2
sample_limit = each.value.sample_limit
scheme = each.value.scheme
scrape_interval = each.value.scrape_interval
scrape_timeout = each.value.scrape_timeout
}

50
observability/outputs.tf Normal file
View file

@ -0,0 +1,50 @@
output "observability_id" {
description = "Observability instance ID"
value = stackit_observability_instance.this.instance_id
}
output "observability_urls" {
description = "Key Observability URLs"
value = {
grafana = stackit_observability_instance.this.grafana_url
dashboard = stackit_observability_instance.this.dashboard_url
metrics = stackit_observability_instance.this.metrics_url
logs = stackit_observability_instance.this.logs_url
}
}
output "observability_credentials" {
description = "List of credential objects (username & password)"
value = [
for cred in stackit_observability_credential.this :
{
username = cred.username
password = cred.password
}
]
sensitive = true
}
output "observability_alertgroups" {
description = "Map of created Thanos alert-group IDs"
value = {
for key, alertgrp in stackit_observability_alertgroup.this :
key => alertgrp.id
}
}
output "observability_logalertgroups" {
description = "Map of created Loki log-alert-group IDs"
value = {
for key, logalertgrp in stackit_observability_logalertgroup.this :
key => logalertgrp.id
}
}
output "observability_scrapeconfigs" {
description = "Map of created scrape-config IDs"
value = {
for key, sconfig in stackit_observability_scrapeconfig.this :
key => sconfig.id
}
}

View file

@ -4,7 +4,7 @@ terraform {
required_providers {
stackit = {
source = "stackitcloud/stackit"
version = "0.56.0"
version = "0.54.0"
}
}
}

View file

@ -1,11 +1,118 @@
variable "project_id" {
type = string
description = "STACKIT project ID"
type = string
}
variable "name" {
type = string
description = "The name of the Observability instance."
type = string
}
variable "plan_name" {
type = string
description = "Specifies the Observability plan."
type = string
}
variable "acl" {
description = "Access control list (CIDR blocks) permitted to access this instance."
type = list(string)
default = null
}
variable "metrics_retention_days" {
description = "How many days raw metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_5m_downsampling" {
description = "How many days 5m-downsampled metrics are kept."
type = number
default = null
}
variable "metrics_retention_days_1h_downsampling" {
description = "How many days 1h-downsampled metrics are kept."
type = number
default = null
}
variable "alert_config" {
description = "Complex Alertmanager configuration."
type = any
default = null
}
variable "parameters" {
description = "Additional key/value parameters for the instance."
type = map(string)
default = null
}
variable "create_credentials" {
description = "Whether to create credentials for this instance."
type = bool
default = true
}
variable "credentials_count" {
description = "How many credentials to create when enabled."
type = number
default = 1
}
variable "alertgroups" {
description = "Map of alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "logalertgroups" {
description = "Map of log-alert-groups to create."
type = map(object({
name = string
interval = optional(string)
rules = list(object({
alert = string
expression = string
for = optional(string)
labels = optional(map(string))
annotations = optional(map(string))
}))
}))
default = {}
}
variable "scrapeconfigs" {
description = "Map of scrape-configs to create."
type = map(object({
name = string
metrics_path = string
targets = list(object({
urls = list(string)
labels = optional(map(string))
}))
basic_auth = optional(object({
username = string
password = string
}))
saml2 = optional(object({
enable_url_parameters = optional(bool)
}))
sample_limit = optional(number)
scheme = optional(string)
scrape_interval = optional(string)
scrape_timeout = optional(string)
}))
default = {}
}

View file

@ -1,4 +1,4 @@
output "project_id" {
value = stackit_resourcemanager_project.this.project_id
description = "ID of the project"
}
description = "ID of the project"
}