- status-code-slo.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
resource "datadog_service_level_objective" "statuscode_slo" { name = "[${var.service_name}][SLO]Status Code" description = "[${var.service_name}][SLO]Status Code" type = "metric" query { numerator = "sum:aws.applicationelb.httpcode_target_2xx{name:${var.service_name}-alb}.as_count()" denominator = "sum:aws.applicationelb.request_count{name:${var.service_name}-alb}.as_count() - sum:aws.applicationelb.httpcode_target_4xx{name:${var.service_name}-alb}.as_count()" } thresholds { timeframe = "30d" target = 99.0 } tags = local.combined_tags } resource "datadog_monitor" "statuscode_error_budget" { name = "[${var.service_name}][SLO]Status Code Breach Error Budget" type = "slo alert" query = <<-QUERY error_budget("${datadog_service_level_objective.statuscode_slo.id}").over("30d") > 100 QUERY message = <<-EOT @${var.slack_channel} EOT tags = local.combined_tags monitor_thresholds { critical = 100 } } resource "datadog_monitor" "statuscode_burn_rate" { name = "[${var.service_name}][SLO]Status Code Breach Burn Rate" type = "slo alert" query = <<-QUERY burn_rate("${datadog_service_level_objective.statuscode_slo.id}").over("30d").long_window("6h").short_window("30m") > 6 QUERY message = <<-EOT @${var.slack_channel} EOT tags = local.combined_tags monitor_thresholds { critical = 6 } } |
- response-slo.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
resource "datadog_service_level_objective" "responsetime_slo" { name = "[${var.service_name}][SLO]Response Time" description = "[${var.service_name}][SLO]Response Time" type = "time_slice" sli_specification { time_slice { query { formula { formula_expression = "query1" } query { metric_query { name = "query1" query = "avg:synthetics.http.response.time{url:${join(" OR url:", var.slo_urls)}} by {url}" } } } comparator = "<=" threshold = 2000 } } thresholds { timeframe = "30d" target = 99.0 } tags = local.combined_tags } resource "datadog_monitor" "responsetime_error_budget" { name = "[${var.service_name}][SLO]Response Time Breach Error Budget" type = "slo alert" query = <<-QUERY error_budget("${datadog_service_level_objective.responsetime_slo.id}").over("30d") > 100 QUERY message = <<-EOT @${var.slack_channel} EOT tags = local.combined_tags monitor_thresholds { critical = 100 } } resource "datadog_monitor" "responsetime_burn_rate" { name = "[${var.service_name}][SLO]Response Time Breach Burn Rate" type = "slo alert" query = <<-QUERY burn_rate("${datadog_service_level_objective.responsetime_slo.id}").over("30d").long_window("6h").short_window("30m") > 6 QUERY message = <<-EOT @${var.slack_channel} EOT tags = local.combined_tags monitor_thresholds { critical = 6 } } |
- variables.tf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
variable "environment" { type = string } variable "slack_channel" { description = "Slack channel for alerts" type = string default = "hoge" } variable "service_name" { description = "Service name for the Datadog monitor" type = string default = "hoge" } variable "slo_urls" { description = "List of URLs for hoge" type = list(string) default = [ "https://hoge.com" ] } |
- locals.tf
1 2 3 4 |
locals { environment_tags = lookup(var.tags, var.environment, []) combined_tags = concat(local.environment_tags, ["env:${var.environment}", "service:hoge", "product:hoge"]) } |
Was this helpful?
0 / 0
1989年生まれのFindy/SRE。ホスティングから大規模なアドテクなどのインフラエンジニアとして携わる。現在はサービスの信頼性向上、DevOps、可用性、レイテンシ、パフォーマンス、モニタリング、オブザーバビリティ、緊急対応、AWSでのインフラ構築、Docker開発環境の提供、IaC、新技術の検証、リファクタリング、セキュリティ強化、分析基盤の運用などを担当している。個人事業主では数社サーバー保守とベンチャー企業のSREインフラコンサルティングやMENTA/TechBullで未経験者にインフラのコーチング/コミュニティマネージャーとして立ち上げと運営をしている。また、過去「脆弱性スキャナVuls」のOSS活動もしており、自称エバンジェリスト/技術広報/テクニカルサポート/コントリビュータでもある。