Renaming Grafana alloy config.

2025-05-21 09:16:28 -04:00
parent 6245e88edc
commit 77bb59f594
1 changed files with 404 additions and 0 deletions
@@ -0,0 +1,404 @@
+{% set vault_addr = 'https://vault.trez.wtf' %}
+{% set secrets_path = 'rinoa-docker/env' %}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Agent globals
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+local.file "endpoints" {
+    // The endpoints file is used to define the endpoints, credentials and options
+    // for the Agent export to.
+    filename = "/etc/alloy/endpoints.json"
+}
+
+discovery.docker "rinoadocker" {
+        host = env("DOCKER_HOST")
+}
+
+tracing {
+    write_to = [otelcol.exporter.otlp.tempo.input]
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Metrics
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+prometheus.remote_write "mimir" {
+  endpoint {
+    url = json_path(local.file.endpoints.content, ".metrics.url")[0]
+    basic_auth {
+        username = json_path(local.file.endpoints.content, ".metrics.basicAuth.username")[0]
+        password = json_path(local.file.endpoints.content, ".metrics.basicAuth.password")[0]
+    }
+  }
+}
+
+prometheus.scrape "prometheus" {
+        targets = [{
+                __address__ = "localhost:12345",
+        }]
+        forward_to = [prometheus.remote_write.mimir.receiver]
+        job_name   = "prometheus"
+}
+
+prometheus.exporter.unix "rinoa" {
+        procfs_path = "/host/proc"
+        sysfs_path  = "/host/sys"
+        rootfs_path = "/rootfs"
+}
+
+prometheus.scrape "rinoa" {
+        targets    = prometheus.exporter.unix.rinoa.targets
+        forward_to = [prometheus.remote_write.mimir.receiver]
+        job_name   = "rinoa_host"
+}
+
+prometheus.exporter.cadvisor "docker" {
+        docker_host      = env("DOCKER_HOST")
+        storage_duration = "5m"
+}
+
+prometheus.scrape "docker" {
+        targets    = prometheus.exporter.cadvisor.docker.targets
+        forward_to = [prometheus.remote_write.mimir.receiver]
+        job_name   = "docker_stats"
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Logging
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+loki.write "loki" {
+        endpoint {
+                url = json_path(local.file.endpoints.content, ".logs.url")[0]
+                basic_auth {
+                        username = json_path(local.file.endpoints.content, ".logs.basicAuth.username")[0]
+                        password = json_path(local.file.endpoints.content, ".logs.basicAuth.password")[0]
+                }
+        }
+        external_labels = {}
+}
+
+loki.source.journal "hostjournal" {
+        forward_to = [loki.write.loki.receiver]
+        max_age = "24h"
+        path = "/rootfs/var/log/journal/"
+        labels = {
+                job = "host-journal",
+        }
+}
+
+local.file_match "system" {
+        path_targets = [{
+                __address__ = "localhost",
+                __path__    = "/rootfs/var/log/*log",
+                job         = "varlogs",
+        }]
+}
+
+loki.source.file "system" {
+        targets    = local.file_match.system.targets
+        forward_to = [loki.write.loki.receiver]
+}
+
+loki.source.docker "containers" {
+        host       = env("DOCKER_HOST")
+        targets    = discovery.docker.rinoadocker.targets
+        forward_to = [loki.write.loki.receiver]
+        labels     = {
+                job = "containerlogs",
+        }
+}
+
+loki.process "containers" {
+        forward_to = [loki.write.loki.receiver]
+        // stage.docker {}
+        stage.json {
+		expressions = {
+			attrs  = "",
+			output = "log",
+			stream = "stream",
+		}
+	}
+
+	stage.json {
+		expressions = {
+			tag = "",
+		}
+		source = "attrs"
+	}
+
+	stage.regex {
+		expression = "(?P<image_name>(?:[^|]*[^|])).(?P<container_name>(?:[^|]*[^|])).(?P<image_id>(?:[^|]*[^|])).(?P<container_id>(?:[^|]*[^|]))"
+		source     = "tag"
+	}
+
+	stage.timestamp {
+		source = "time"
+		format = "RFC3339Nano"
+	}
+
+	stage.labels {
+		values = {
+			container_id   = null,
+			container_name = null,
+			image_id       = null,
+			image_name     = null,
+			stream         = null,
+			tag            = null,
+		}
+	}
+
+	stage.output {
+		source = "output"
+	}
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Traces
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+beyla.ebpf "rinoadocker" {
+    open_port = "80-65535"
+    routes {
+        unmatched = "heauristic"
+    }
+    output {
+        traces = [
+            otelcol.connector.servicegraph.tracemetrics.input,
+            otelcol.connector.spanmetrics.tracemetrics.input,
+            otelcol.processor.batch.default.input,
+            otelcol.connector.spanlogs.autologging.input,
+        ]
+    }
+}
+
+prometheus.scrape "beyla" { 
+    targets = beyla.ebpf.rinoadocker.targets
+    forward_to = [prometheus.remote_write.mimir.receiver]
+}
+
+otelcol.auth.headers "tempo" {
+    header {
+        key = "Authorization"
+        value = join(["Basic ", json_path(local.file.endpoints.content, ".traces.basicAuthToken")[0]], "")
+    }
+}
+
+otelcol.processor.batch "default" {
+    // Wait until we've received 16K of data.
+    send_batch_size = 16384
+    send_batch_max_size = 16384
+    // Or until 2 seconds have elapsed.
+    timeout = "2s"
+    // When the Agent has enough batched data, send it to the OpenTelemetry exporter named 'tempo'.
+    output {
+        traces = [otelcol.exporter.otlp.tempo.input]
+    }
+}
+
+otelcol.exporter.otlp "tempo" {
+    // Define the client for exporting.
+    client {
+        // Authentication block.
+        auth = otelcol.auth.headers.tempo.handler
+
+        // Send to the locally running Tempo instance, on port 4317 (OTLP gRPC).
+        endpoint = json_path(local.file.endpoints.content, ".traces.url")[0]
+
+        // Configure TLS settings for communicating with the endpoint.
+        tls {
+            // The connection is insecure.
+            insecure = json_path(local.file.endpoints.content, ".traces.tls.insecure")[0]
+            // Do not verify TLS certificates when connecting.
+            insecure_skip_verify = json_path(local.file.endpoints.content, ".traces.tls.insecureSkipVerify")[0]
+        }
+    }
+}
+
+otelcol.connector.spanlogs "autologging" {
+    // We only want to output a line for each root span (ie. every single trace), and not for every
+    // process or span (outputting a line for every span would be extremely verbose).
+    spans = false
+    roots = true
+    processes = false
+    // We want to ensure that the following three span attributes are included in the log line, if
+    // present.
+    span_attributes = [ "http.method", "http.target", "http.status_code" ]
+
+    // Overrides the default key in the log line to be `traceId`, which is then used by Grafana to
+    // identify the trace ID for correlation with the Tempo datasource.
+    overrides {
+        trace_id_key = "traceId"
+    }
+    // Send to the OpenTelemetry Loki exporter.
+    output {
+        logs = [otelcol.exporter.loki.autologging.input]
+    }
+}
+
+// Simply forwards the incoming OpenTelemetry log format out as a Loki log.
+// We need this stage to ensure we can then process the logline as a Loki object.
+otelcol.exporter.loki "autologging" {
+    forward_to = [loki.process.autologging.receiver]
+}
+
+// The Loki processor allows us to accept a correctly formatted Loki log and mutate it into
+// a set of fields for output.
+loki.process "autologging" {
+    // The JSON stage simply extracts the `body` (the actual logline) from the Loki log, ignoring
+    // all other fields.
+    stage.json {
+        expressions = { "body" = "" }
+    }
+    // The output stage takes the body (the main logline) and uses this as the source for the output
+    // logline. In this case, it essentially turns it into logfmt.
+    stage.output {
+        source = "body"
+    }
+
+    // Finally send the processed logline onto the Loki exporter.
+    forward_to = [loki.write.autologging.receiver]
+}
+
+// The Loki writer receives a processed Loki log and then writes it to a Loki instance.
+loki.write "autologging" {
+    // Add the `agent` value to the `job` label, so we can identify it as having been generated
+    // by Grafana Agent when querying.
+    external_labels = {
+        job = "agent",
+    }
+
+    // Output the Loki log to the local Loki instance.
+    endpoint {
+        url = json_path(local.file.endpoints.content, ".logs.url")[0]
+
+        // The basic auth credentials for the Loki instance.
+        basic_auth {
+            username = json_path(local.file.endpoints.content, ".logs.basicAuth.username")[0]
+            password = json_path(local.file.endpoints.content, ".logs.basicAuth.password")[0]
+        }
+    }
+}
+
+// The Tail Sampling processor will use a set of policies to determine which received traces to keep
+// and send to Tempo.
+otelcol.processor.tail_sampling "errors" {
+    // Total wait time from the start of a trace before making a sampling decision. Note that smaller time
+    // periods can potentially cause a decision to be made before the end of a trace has occurred.
+    decision_wait = "30s"
+
+    // The following policies follow a logical OR pattern, meaning that if any of the policies match,
+    // the trace will be kept. For logical AND, you can use the `and` policy. Every span of a trace is
+    // examined by each policy in turn. A match will cause a short-circuit.
+
+    // This policy defines that traces that contain errors should be kept.
+    policy {
+        // The name of the policy can be used for logging purposes.
+        name = "sample-erroring-traces"
+        // The type must match the type of policy to be used, in this case examing the status code
+        // of every span in the trace.
+        type = "status_code"
+        // This block determines the error codes that should match in order to keep the trace,
+        // in this case the OpenTelemetry 'ERROR' code.
+        status_code {
+            status_codes = [ "ERROR" ]
+        }
+    }
+
+    // This policy defines that only traces that are longer than 200ms in total should be kept.
+    policy {
+        // The name of the policy can be used for logging purposes.
+        name = "sample-long-traces"
+        // The type must match the policy to be used, in this case the total latency of the trace.
+        type = "latency"
+        // This block determines the total length of the trace in milliseconds.
+        latency {
+            threshold_ms = 200
+        }
+    }
+
+    // The output block forwards the kept traces onto the batch processor, which will marshall them
+    // for exporting to Tempo.
+    output {
+        traces = [otelcol.processor.batch.default.input]
+    }
+}
+
+// The Spanmetrics Connector will generate RED metrics based on the incoming trace span data.
+otelcol.connector.spanmetrics "tracemetrics" {
+    // The namespace explicit adds a prefix to all the generated span metrics names.
+    // In this case, we'll ensure they match as closely as possible those generated by Tempo.
+    namespace = "traces.spanmetrics"
+
+    // Each extra dimension (metrics label) to be added to the generated metrics from matching span attributes. These
+    // need to be defined with a name and optionally a default value (in the following cases, we do not want a default
+    // value if the span attribute is not present).
+    dimension {
+        name = "http.method"
+    }
+    dimension {
+        name = "http.target"
+    }
+    dimension {
+        name = "http.status_code"
+    }
+    dimension {
+        name = "service.version"
+    }
+
+    // A histogram block must be present, either explicitly defining bucket values or via an exponential block.
+    // We do the latter here.
+    histogram {
+        explicit {
+        }
+    }
+
+    // The exemplar block is added to ensure we generate exemplars for traces on relevant metric values.
+    exemplars {
+        enabled = true
+    }
+
+    // Generated metrics data is in OTLP format. We send this data to the OpenTelemetry Prometheus exporter to ensure
+    // it gets transformed into Prometheus format data.
+    output {
+        metrics = [otelcol.exporter.prometheus.tracemetrics.input]
+    }
+}
+
+// The Servicegraph Connector will generate service graph metrics (edges and nodes) based on incoming trace spans.
+otelcol.connector.servicegraph "tracemetrics" {
+    // Extra dimensions (metrics labels) to be added to the generated metrics from matching span attributes.
+    // For this component, this is defined as an array. There are no default values and the labels will not be generated
+    // for missing span attributes.
+    dimensions = [
+        "http.method",
+        "http.target",
+        "http.status_code",
+        "service.version",
+    ]
+
+    // Generated metrics data is in OTLP format. We send this data to the OpenTelemetry Prometheus exporter to ensure
+    // it gets transformed into Prometheus format data.
+    output {
+        metrics = [otelcol.exporter.prometheus.tracemetrics.input]
+    }
+}
+
+otelcol.exporter.prometheus "tracemetrics" {
+    // Forward to our local Prometheus remote writer which will send the metrics to Mimir.
+    forward_to = [prometheus.remote_write.mimir.receiver]
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Profiling
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+pyroscope.write "pyroscope" {
+        endpoint {
+                url = json_path(local.file.endpoints.content, ".profiles.url")[0]
+                basic_auth {
+                        username = json_path(local.file.endpoints.content, ".profiles.basicAuth.username")[0]
+                        password = json_path(local.file.endpoints.content, ".profiles.basicAuth.password")[0]
+                }
+        }
+        external_labels = {}
+}
+
+pyroscope.ebpf "rinoadocker" {
+        forward_to = [pyroscope.write.pyroscope.receiver]
+        targets = discovery.docker.rinoadocker.targets
+}