From 9b95ecc89f3ef112de28c6561a523759803b3626 Mon Sep 17 00:00:00 2001 From: "Trez.One" Date: Sun, 27 Jul 2025 16:43:47 -0400 Subject: [PATCH] OTEL config overhaul. --- .../signoz/otel/otel-collector-config.yaml.j2 | 122 +++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/ansible/app-configs/signoz/otel/otel-collector-config.yaml.j2 b/ansible/app-configs/signoz/otel/otel-collector-config.yaml.j2 index f792d080..c3804694 100644 --- a/ansible/app-configs/signoz/otel/otel-collector-config.yaml.j2 +++ b/ansible/app-configs/signoz/otel/otel-collector-config.yaml.j2 @@ -8,6 +8,104 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 + hostmetrics: + collection_interval: 60s # Frequency of metrics collection. + scrapers: + cpu: {} + load: {} + memory: {} + disk: {} + filesystem: {} + network: {} + docker_stats: + endpoint: unix:///var/run/docker.sock + collection_interval: 30s + timeout: 10s + api_version: "1.51" + metrics: + container.uptime: + enabled: true + container.restarts: + enabled: true + container.network.io.usage.rx_errors: + enabled: true + container.network.io.usage.tx_errors: + enabled: true + container.network.io.usage.rx_packets: + enabled: true + container.network.io.usage.tx_packets: + enabled: true + filelog/nginx-access-logs: + include: ["${env:NGINX_ACCESS_LOG_FILE}"] + operators: + # Parse the default nginx access log format. Nginx defaults to the "combined" log format + # $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" + # For more details, see https://nginx.org/en/docs/http/ngx_http_log_module.html + - type: regex_parser + if: body matches '^(?P[0-9\\.]+) - (?P[^\\s]+) \\[(?P.+)\\] "(?P\\w+?) (?P.+?)" (?P[0-9]+) (?P[0-9]+) "(?P.+?)" "(?P.+?)"$' + parse_from: body + parse_to: attributes + regex: '^(?P[0-9\.]+) - (?P[^\s]+) \[(?P.+)\] "(?P\w+?) (?P.+?)" (?P[0-9]+) (?P[0-9]+) "(?P.+?)" "(?P.+?)"$' + timestamp: + parse_from: attributes.ts + layout: "02/Jan/2006:15:04:05 -0700" + layout_type: gotime + severity: + parse_from: attributes.status + overwrite_text: true + mapping: + debug: "1xx" + info: + - "2xx" + - "3xx" + warn: "4xx" + error: "5xx" + - type: remove + if: attributes.ts != nil + field: attributes.ts + - type: add + field: attributes.source + value: nginx + + filelog/nginx-error-logs: + include: ["${env:NGINX_ERROR_LOG_FILE}"] + operators: + # Parse the default nginx error log format. + # YYYY/MM/DD HH:MM:SS [LEVEL] PID#TID: *CID MESSAGE + # For more details, see https://github.com/phusion/nginx/blob/master/src/core/ngx_log.c + - type: regex_parser + if: body matches '^(?P.+?) \\[(?P\\w+)\\] (?P\\d+)#(?P\\d+). \\*(?P\\d+) (?P.+)$' + parse_from: body + parse_to: attributes + regex: '^(?P.+?) \[(?P\w+)\] (?P\d+)#(?P\d+). \*(?P\d+) (?P.+)$' + timestamp: + parse_from: attributes.ts + layout: "2006/01/02 15:04:05" + layout_type: gotime + severity: + parse_from: attributes.log_level + overwrite_text: true + mapping: + debug: "debug" + info: + - "info" + - "notice" + warn: "warn" + error: + - "error" + - "crit" + - "alert" + fatal: "emerg" + - type: remove + if: attributes.ts != nil + field: attributes.ts + - type: move + if: attributes.message != nil + from: attributes.message + to: body + - type: add + field: attributes.source + value: nginx prometheus: config: global: @@ -25,9 +123,21 @@ processors: send_batch_max_size: 11000 timeout: 10s resourcedetection: - # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. detectors: [env, system] + system: + hostname_sources: [os] + resourcedetection/env: + detectors: [env] timeout: 2s + override: false + resourcedetection/system: + detectors: ["system"] + system: + hostname_sources: ["dns", "os"] + resourcedetection/docker: + detectors: [env, docker] + timeout: 2s + override: false signozspanmetrics/delta: metrics_exporter: clickhousemetricswrite, signozclickhousemetrics metrics_flush_interval: 60s @@ -78,6 +188,10 @@ exporters: timeout: 10s use_new_schema: true # debug: {} + otlp/nginx-logs: + endpoint: "localhost:4317" + tls: + insecure: true service: telemetry: logs: @@ -93,7 +207,11 @@ service: metrics: receivers: [otlp] processors: [batch] - exporters: [clickhousemetricswrite, signozclickhousemetrics] + exporters: [clickhousemetricswrite, signozclickhousemetrics, resourcedetection/docker, resourcedetection/system] + metrics/hostmetrics: + receivers: [hostmetrics] + processors: [resourcedetection, resource/env] + exporters: [otlp] metrics/prometheus: receivers: [prometheus] processors: [batch]