kroki_rs/server/
metrics.rs

1use metrics::{counter, describe_counter, describe_gauge, describe_histogram, gauge, histogram};
2use metrics_exporter_prometheus::PrometheusBuilder;
3pub use metrics_exporter_prometheus::PrometheusHandle;
4
5use std::sync::OnceLock;
6
7/// A global handle to the Prometheus exporter.
8static METRICS_HANDLE: OnceLock<PrometheusHandle> = OnceLock::new();
9
10/// Initialize the Prometheus metrics exporter.
11///
12/// This registers the recorder and defines all core metrics (counters, histograms, gauges)
13/// according to ADR 0006.
14///
15/// Returns a handle that can be used to scrape metrics if the endpoint is enabled.
16/// This function is idempotent and safe to call multiple times (e.g. in integration tests).
17pub fn init_metrics() -> PrometheusHandle {
18    METRICS_HANDLE
19        .get_or_init(|| {
20            let builder = PrometheusBuilder::new();
21
22            // Configure histogram buckets as per ADR 0006
23            let buckets = [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0];
24            let handle = builder
25                .set_buckets(&buckets)
26                .expect("Failed to set metrics buckets")
27                .install_recorder()
28                .expect("Failed to install Prometheus recorder");
29
30            describe_metrics();
31
32            // Record cold start to ensure metrics page has data immediately
33            counter!("kroki_server_starts_total").increment(1);
34
35            handle
36        })
37        .clone()
38}
39
40fn describe_metrics() {
41    describe_counter!("kroki_requests_total", "Total number of diagram requests");
42    describe_histogram!(
43        "kroki_request_duration_seconds",
44        "Total duration of the diagram request in seconds"
45    );
46    describe_counter!(
47        "kroki_rendering_errors_total",
48        "Total number of rendering errors"
49    );
50    describe_histogram!(
51        "kroki_payload_size_bytes",
52        "Input payload size distribution in bytes"
53    );
54    describe_histogram!(
55        "kroki_conversion_time_seconds",
56        "Time spent in the diagram provider rendering"
57    );
58    describe_gauge!(
59        "kroki_active_connections",
60        "Number of currently active concurrent requests"
61    );
62    describe_gauge!(
63        "kroki_circuit_breaker_state",
64        "Current state of the circuit breaker (0=closed, 1=open, 2=half-open)"
65    );
66    describe_counter!(
67        "kroki_server_starts_total",
68        "Total number of server cold starts"
69    );
70}
71
72/// Helper for recording common metrics in API and Admin handlers.
73///
74/// This provides a static interface to record requests, durations, errors,
75/// and circuit breaker states without needing to pass around recorder handles.
76pub struct Metrics;
77
78impl Metrics {
79    /// Increments the total request count for a given provider and format.
80    pub fn increment_requests(provider: &str, format: &str) {
81        counter!("kroki_requests_total", "provider" => provider.to_string(), "format" => format.to_string()).increment(1);
82    }
83
84    /// Records the end-to-end duration of a request.
85    pub fn record_duration(provider: &str, format: &str, seconds: f64) {
86        histogram!("kroki_request_duration_seconds", "provider" => provider.to_string(), "format" => format.to_string()).record(seconds);
87    }
88
89    /// Records a rendering error, categorized by error kind.
90    pub fn increment_errors(provider: &str, format: &str, error_kind: &str) {
91        counter!("kroki_rendering_errors_total",
92            "provider" => provider.to_string(),
93            "format" => format.to_string(),
94            "error_kind" => error_kind.to_string()
95        )
96        .increment(1);
97    }
98
99    /// Records the size of the input payload (compressed/encoded source).
100    pub fn record_payload_size(provider: &str, format: &str, bytes: f64) {
101        histogram!("kroki_payload_size_bytes", "provider" => provider.to_string(), "format" => format.to_string()).record(bytes);
102    }
103
104    /// Records the pure conversion time (excluding server overhead).
105    pub fn record_conversion_time(provider: &str, format: &str, seconds: f64) {
106        histogram!("kroki_conversion_time_seconds", "provider" => provider.to_string(), "format" => format.to_string()).record(seconds);
107    }
108
109    /// Updates the gauge for currently active concurrent requests.
110    pub fn set_active_connections(provider: &str, format: &str, count: f64) {
111        gauge!("kroki_active_connections", "provider" => provider.to_string(), "format" => format.to_string()).set(count);
112    }
113
114    /// Updates the gauge for a provider's circuit breaker state (0=Closed, 1=Open, 2=HalfOpen).
115    pub fn set_circuit_breaker_state(provider: &str, state: f64) {
116        gauge!("kroki_circuit_breaker_state", "provider" => provider.to_string()).set(state);
117    }
118}