Prometheus自定义指标实战一、Prometheus概述Prometheus是一个开源的监控系统支持多维数据模型和灵活的查询语言。1.1 指标类型类型说明示例Counter单调递增计数器请求总数Gauge可增可减指标当前内存使用量Histogram直方图请求延迟分布Summary摘要统计请求延迟分位数1.2 指标命名规范namespace_subsystem_name_unit二、自定义指标实现2.1 Counter指标import io.prometheus.client.Counter; public class RequestCounter { private static final Counter requestCounter Counter.build() .name(http_requests_total) .help(Total number of HTTP requests) .labelNames(method, endpoint, status) .register(); public static void increment(String method, String endpoint, int status) { requestCounter.labels(method, endpoint, String.valueOf(status)).inc(); } }2.2 Gauge指标import io.prometheus.client.Gauge; public class MemoryGauge { private static final Gauge memoryUsage Gauge.build() .name(jvm_memory_usage_bytes) .help(JVM memory usage in bytes) .labelNames(type) .register(); public static void update(String type, double value) { memoryUsage.labels(type).set(value); } public static void increment(String type, double amount) { memoryUsage.labels(type).inc(amount); } public static void decrement(String type, double amount) { memoryUsage.labels(type).dec(amount); } }2.3 Histogram指标import io.prometheus.client.Histogram; public class RequestLatencyHistogram { private static final Histogram requestLatency Histogram.build() .name(http_request_duration_seconds) .help(HTTP request latency in seconds) .labelNames(endpoint) .buckets(0.1, 0.5, 1.0, 2.0, 5.0) .register(); public static Histogram.Timer startTimer(String endpoint) { return requestLatency.labels(endpoint).startTimer(); } }2.4 Summary指标import io.prometheus.client.Summary; public class ResponseTimeSummary { private static final Summary responseTime Summary.build() .name(http_response_time_seconds) .help(HTTP response time summary) .labelNames(method) .quantile(0.5, 0.05) .quantile(0.9, 0.01) .quantile(0.99, 0.001) .register(); public static void observe(String method, double seconds) { responseTime.labels(method).observe(seconds); } }三、指标注册与暴露3.1 Spring Boot集成Configuration public class PrometheusConfig { Bean public ServletRegistrationBeanMetricsServlet metricsServlet() { return new ServletRegistrationBean(new MetricsServlet(), /actuator/prometheus); } Bean public FilterRegistrationBeanMetricsFilter metricsFilter() { FilterRegistrationBeanMetricsFilter registration new FilterRegistrationBean(); registration.setFilter(new MetricsFilter()); registration.addUrlPatterns(/*); return registration; } }3.2 自定义端点RestController public class MetricsController { GetMapping(/metrics) public ResponseEntityString getMetrics() { ByteArrayOutputStream outputStream new ByteArrayOutputStream(); try { CollectorRegistry.defaultRegistry.scrape(outputStream); return ResponseEntity.ok(outputStream.toString()); } catch (IOException e) { return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); } } }四、业务指标实战4.1 订单指标public class OrderMetrics { private static final Counter orderCreated Counter.build() .name(orders_created_total) .help(Total number of orders created) .labelNames(status, payment_method) .register(); private static final Gauge activeOrders Gauge.build() .name(orders_active_count) .help(Number of active orders) .labelNames(status) .register(); private static final Histogram orderProcessingTime Histogram.build() .name(order_processing_duration_seconds) .help(Order processing time in seconds) .buckets(0.1, 0.5, 1.0, 2.0, 5.0) .register(); public static void onOrderCreated(String status, String paymentMethod) { orderCreated.labels(status, paymentMethod).inc(); } public static void updateActiveOrders(String status, double count) { activeOrders.labels(status).set(count); } public static Histogram.Timer startProcessingTimer() { return orderProcessingTime.startTimer(); } }4.2 缓存指标public class CacheMetrics { private static final Counter cacheHits Counter.build() .name(cache_hits_total) .help(Total cache hits) .labelNames(cache_name) .register(); private static final Counter cacheMisses Counter.build() .name(cache_misses_total) .help(Total cache misses) .labelNames(cache_name) .register(); private static final Gauge cacheSize Gauge.build() .name(cache_size_bytes) .help(Cache size in bytes) .labelNames(cache_name) .register(); public static void recordHit(String cacheName) { cacheHits.labels(cacheName).inc(); } public static void recordMiss(String cacheName) { cacheMisses.labels(cacheName).inc(); } public static void updateSize(String cacheName, long size) { cacheSize.labels(cacheName).set(size); } }五、指标采集配置5.1 Prometheus配置scrape_configs: - job_name: spring-app scrape_interval: 15s metrics_path: /actuator/prometheus static_configs: - targets: [localhost:8080] - job_name: custom-app scrape_interval: 10s metrics_path: /metrics static_configs: - targets: [localhost:9090]5.2 自动发现配置scrape_configs: - job_name: kubernetes-pods kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_label_app] action: keep regex: my-app - source_labels: [__meta_kubernetes_pod_container_port_number] action: keep regex: 8080六、指标查询示例6.1 查询请求总数sum(http_requests_total)6.2 查询请求速率rate(http_requests_total[5m])6.3 查询P95延迟histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))6.4 查询缓存命中率sum(cache_hits_total) / (sum(cache_hits_total) sum(cache_misses_total))七、最佳实践7.1 指标设计原则命名规范使用snake_case命名标签适度标签不宜过多建议不超过5个单位统一使用标准单位seconds、bytes等文档完善为每个指标添加help描述7.2 性能优化// 使用批量指标更新 public class BatchMetrics { private static final Counter batchCounter Counter.build() .name(batch_processed_total) .help(Total batch processed) .register(); public static void recordBatch(int count) { batchCounter.inc(count); } }7.3 监控告警groups: - name: application.rules rules: - alert: HighErrorRate expr: rate(http_requests_total{status~5..}[5m]) / rate(http_requests_total[5m]) 0.1 for: 1m labels: severity: critical annotations: summary: High error rate detected description: Error rate is {{ $value }}%八、总结Prometheus自定义指标是监控系统的核心选择合适类型Counter用于计数Gauge用于状态Histogram/Summary用于分布合理设计标签标签用于维度分析但不宜过多定期采集配置合适的采集间隔结合告警设置合理的告警规则通过有效的指标设计可以全面监控系统运行状态。
Prometheus自定义指标实战
发布时间:2026/5/24 20:53:26
Prometheus自定义指标实战一、Prometheus概述Prometheus是一个开源的监控系统支持多维数据模型和灵活的查询语言。1.1 指标类型类型说明示例Counter单调递增计数器请求总数Gauge可增可减指标当前内存使用量Histogram直方图请求延迟分布Summary摘要统计请求延迟分位数1.2 指标命名规范namespace_subsystem_name_unit二、自定义指标实现2.1 Counter指标import io.prometheus.client.Counter; public class RequestCounter { private static final Counter requestCounter Counter.build() .name(http_requests_total) .help(Total number of HTTP requests) .labelNames(method, endpoint, status) .register(); public static void increment(String method, String endpoint, int status) { requestCounter.labels(method, endpoint, String.valueOf(status)).inc(); } }2.2 Gauge指标import io.prometheus.client.Gauge; public class MemoryGauge { private static final Gauge memoryUsage Gauge.build() .name(jvm_memory_usage_bytes) .help(JVM memory usage in bytes) .labelNames(type) .register(); public static void update(String type, double value) { memoryUsage.labels(type).set(value); } public static void increment(String type, double amount) { memoryUsage.labels(type).inc(amount); } public static void decrement(String type, double amount) { memoryUsage.labels(type).dec(amount); } }2.3 Histogram指标import io.prometheus.client.Histogram; public class RequestLatencyHistogram { private static final Histogram requestLatency Histogram.build() .name(http_request_duration_seconds) .help(HTTP request latency in seconds) .labelNames(endpoint) .buckets(0.1, 0.5, 1.0, 2.0, 5.0) .register(); public static Histogram.Timer startTimer(String endpoint) { return requestLatency.labels(endpoint).startTimer(); } }2.4 Summary指标import io.prometheus.client.Summary; public class ResponseTimeSummary { private static final Summary responseTime Summary.build() .name(http_response_time_seconds) .help(HTTP response time summary) .labelNames(method) .quantile(0.5, 0.05) .quantile(0.9, 0.01) .quantile(0.99, 0.001) .register(); public static void observe(String method, double seconds) { responseTime.labels(method).observe(seconds); } }三、指标注册与暴露3.1 Spring Boot集成Configuration public class PrometheusConfig { Bean public ServletRegistrationBeanMetricsServlet metricsServlet() { return new ServletRegistrationBean(new MetricsServlet(), /actuator/prometheus); } Bean public FilterRegistrationBeanMetricsFilter metricsFilter() { FilterRegistrationBeanMetricsFilter registration new FilterRegistrationBean(); registration.setFilter(new MetricsFilter()); registration.addUrlPatterns(/*); return registration; } }3.2 自定义端点RestController public class MetricsController { GetMapping(/metrics) public ResponseEntityString getMetrics() { ByteArrayOutputStream outputStream new ByteArrayOutputStream(); try { CollectorRegistry.defaultRegistry.scrape(outputStream); return ResponseEntity.ok(outputStream.toString()); } catch (IOException e) { return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); } } }四、业务指标实战4.1 订单指标public class OrderMetrics { private static final Counter orderCreated Counter.build() .name(orders_created_total) .help(Total number of orders created) .labelNames(status, payment_method) .register(); private static final Gauge activeOrders Gauge.build() .name(orders_active_count) .help(Number of active orders) .labelNames(status) .register(); private static final Histogram orderProcessingTime Histogram.build() .name(order_processing_duration_seconds) .help(Order processing time in seconds) .buckets(0.1, 0.5, 1.0, 2.0, 5.0) .register(); public static void onOrderCreated(String status, String paymentMethod) { orderCreated.labels(status, paymentMethod).inc(); } public static void updateActiveOrders(String status, double count) { activeOrders.labels(status).set(count); } public static Histogram.Timer startProcessingTimer() { return orderProcessingTime.startTimer(); } }4.2 缓存指标public class CacheMetrics { private static final Counter cacheHits Counter.build() .name(cache_hits_total) .help(Total cache hits) .labelNames(cache_name) .register(); private static final Counter cacheMisses Counter.build() .name(cache_misses_total) .help(Total cache misses) .labelNames(cache_name) .register(); private static final Gauge cacheSize Gauge.build() .name(cache_size_bytes) .help(Cache size in bytes) .labelNames(cache_name) .register(); public static void recordHit(String cacheName) { cacheHits.labels(cacheName).inc(); } public static void recordMiss(String cacheName) { cacheMisses.labels(cacheName).inc(); } public static void updateSize(String cacheName, long size) { cacheSize.labels(cacheName).set(size); } }五、指标采集配置5.1 Prometheus配置scrape_configs: - job_name: spring-app scrape_interval: 15s metrics_path: /actuator/prometheus static_configs: - targets: [localhost:8080] - job_name: custom-app scrape_interval: 10s metrics_path: /metrics static_configs: - targets: [localhost:9090]5.2 自动发现配置scrape_configs: - job_name: kubernetes-pods kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_label_app] action: keep regex: my-app - source_labels: [__meta_kubernetes_pod_container_port_number] action: keep regex: 8080六、指标查询示例6.1 查询请求总数sum(http_requests_total)6.2 查询请求速率rate(http_requests_total[5m])6.3 查询P95延迟histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))6.4 查询缓存命中率sum(cache_hits_total) / (sum(cache_hits_total) sum(cache_misses_total))七、最佳实践7.1 指标设计原则命名规范使用snake_case命名标签适度标签不宜过多建议不超过5个单位统一使用标准单位seconds、bytes等文档完善为每个指标添加help描述7.2 性能优化// 使用批量指标更新 public class BatchMetrics { private static final Counter batchCounter Counter.build() .name(batch_processed_total) .help(Total batch processed) .register(); public static void recordBatch(int count) { batchCounter.inc(count); } }7.3 监控告警groups: - name: application.rules rules: - alert: HighErrorRate expr: rate(http_requests_total{status~5..}[5m]) / rate(http_requests_total[5m]) 0.1 for: 1m labels: severity: critical annotations: summary: High error rate detected description: Error rate is {{ $value }}%八、总结Prometheus自定义指标是监控系统的核心选择合适类型Counter用于计数Gauge用于状态Histogram/Summary用于分布合理设计标签标签用于维度分析但不宜过多定期采集配置合适的采集间隔结合告警设置合理的告警规则通过有效的指标设计可以全面监控系统运行状态。