6 "datasource": "-- Grafana --",
9 "iconColor": "rgba(0, 211, 255, 1)",
10 "name": "Annotations & Alerts",
21 "description": "Kafka resource usage and throughput",
26 "iteration": 1647427255896,
40 "title": "Healthcheck",
45 "datasource": "Prometheus",
46 "description": "Number of active controllers in the cluster.",
93 "justifyMode": "auto",
94 "orientation": "vertical",
103 "textMode": "value_and_name"
105 "pluginVersion": "8.1.3",
108 "expr": "kafka_controller_kafkacontroller_activecontrollercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"} > 0",
109 "format": "time_series",
113 "legendFormat": "{{instance}}",
117 "title": "Active Controllers",
121 "cacheTimeout": null,
122 "datasource": "Prometheus",
123 "description": "Number of Brokers Online",
148 "color": "rgba(237, 129, 40, 0.89)",
170 "maxDataPoints": 100,
172 "colorMode": "value",
174 "justifyMode": "auto",
175 "orientation": "horizontal",
186 "pluginVersion": "8.1.3",
188 "repeatDirection": "h",
191 "expr": "count(kafka_server_replicamanager_leadercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
192 "format": "time_series",
200 "title": "Brokers Online",
204 "cacheTimeout": null,
205 "datasource": "Prometheus",
206 "description": "Partitions that are online",
231 "color": "rgba(237, 129, 40, 0.89)",
253 "maxDataPoints": 100,
255 "colorMode": "value",
257 "justifyMode": "auto",
258 "orientation": "horizontal",
269 "pluginVersion": "8.1.3",
272 "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
273 "format": "time_series",
281 "title": "Online Partitions",
285 "cacheTimeout": null,
286 "datasource": "Prometheus",
312 "color": "rgba(237, 129, 40, 0.89)",
333 "maxDataPoints": 100,
335 "colorMode": "value",
337 "justifyMode": "auto",
338 "orientation": "horizontal",
349 "pluginVersion": "8.1.3",
352 "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
353 "format": "time_series",
361 "title": "Preferred Replica Imbalance",
365 "datasource": "Prometheus",
369 "mode": "palette-classic"
372 "axisLabel": "Bytes/s",
373 "axisPlacement": "auto",
377 "gradientMode": "none",
383 "lineInterpolation": "linear",
386 "scaleDistribution": {
389 "showPoints": "never",
433 "displayMode": "list",
434 "placement": "bottom"
440 "pluginVersion": "8.1.3",
443 "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))",
444 "format": "time_series",
447 "legendFormat": "Bytes in",
448 "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
453 "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))",
454 "format": "time_series",
458 "legendFormat": "Bytes out",
459 "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
466 "title": "Broker network throughput",
470 "cacheTimeout": null,
471 "datasource": "Prometheus",
472 "description": "Number of under-replicated partitions (| ISR | < | all replicas |).",
497 "color": "rgba(237, 129, 40, 0.89)",
519 "maxDataPoints": 100,
521 "colorMode": "value",
523 "justifyMode": "auto",
524 "orientation": "horizontal",
535 "pluginVersion": "8.1.3",
538 "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
539 "format": "time_series",
548 "title": "Under Replicated Partitions",
552 "cacheTimeout": null,
553 "datasource": "Prometheus",
554 "description": "Number of partitions under min insync replicas.",
579 "color": "rgba(237, 129, 40, 0.89)",
601 "maxDataPoints": 100,
603 "colorMode": "value",
605 "justifyMode": "auto",
606 "orientation": "horizontal",
617 "pluginVersion": "8.1.3",
620 "expr": "sum(kafka_cluster_partition_underminisr{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
621 "format": "time_series",
630 "title": "Under Min ISR Partitions",
634 "cacheTimeout": null,
635 "datasource": "Prometheus",
636 "description": "Number of partitions that dont have an active leader and are hence not writable or readable.",
683 "maxDataPoints": 100,
685 "colorMode": "value",
687 "justifyMode": "auto",
688 "orientation": "horizontal",
699 "pluginVersion": "8.1.3",
702 "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
703 "format": "time_series",
711 "title": "Offline Partitions Count",
715 "cacheTimeout": null,
716 "datasource": "Prometheus",
717 "description": "Unclean leader election rate",
742 "color": "rgba(237, 129, 40, 0.89)",
763 "maxDataPoints": 100,
765 "colorMode": "value",
767 "justifyMode": "auto",
768 "orientation": "horizontal",
779 "pluginVersion": "8.1.3",
782 "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
783 "format": "time_series",
791 "title": "Unclean Leader Election Rate",
805 "title": "Request rate",
809 "cacheTimeout": null,
810 "datasource": "Prometheus",
811 "description": "Produce request rate.",
847 "maxDataPoints": 100,
849 "colorMode": "value",
851 "justifyMode": "auto",
852 "orientation": "horizontal",
863 "pluginVersion": "8.1.3",
866 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))",
874 "title": "All Request Per Sec",
878 "cacheTimeout": null,
879 "datasource": "Prometheus",
880 "description": "Produce request rate.",
916 "maxDataPoints": 100,
918 "colorMode": "value",
920 "justifyMode": "auto",
921 "orientation": "horizontal",
932 "pluginVersion": "8.1.3",
935 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Produce\"}[5m]))",
943 "title": "Produce Request Per Sec",
947 "cacheTimeout": null,
948 "datasource": "Prometheus",
949 "description": "Fetch request rate.",
985 "maxDataPoints": 100,
987 "colorMode": "value",
989 "justifyMode": "auto",
990 "orientation": "horizontal",
1001 "pluginVersion": "8.1.3",
1004 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"FetchConsumer\"}[5m]))",
1012 "title": "Consumer Fetch Request Per Sec",
1020 "mode": "palette-classic"
1024 "axisPlacement": "auto",
1026 "drawStyle": "line",
1028 "gradientMode": "none",
1034 "lineInterpolation": "linear",
1037 "scaleDistribution": {
1040 "showPoints": "auto",
1046 "thresholdsStyle": {
1081 "displayMode": "table",
1082 "placement": "bottom"
1091 "expr": "rate(kafka_network_requestmetrics_errorspersec{error!=\"NONE\"}[5m])",
1093 "legendFormat": "{{error}} @ {{hostname}}",
1098 "type": "timeseries"
1101 "cacheTimeout": null,
1102 "datasource": "Prometheus",
1103 "description": "Fetch request rate.",
1139 "maxDataPoints": 100,
1141 "colorMode": "value",
1142 "graphMode": "area",
1143 "justifyMode": "auto",
1144 "orientation": "horizontal",
1155 "pluginVersion": "8.1.3",
1158 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Fetch\"}[5m]))",
1166 "title": "Broker Fetch Request Per Sec",
1170 "cacheTimeout": null,
1171 "datasource": "Prometheus",
1172 "description": "Offset Commit request rate.",
1208 "maxDataPoints": 100,
1210 "colorMode": "value",
1211 "graphMode": "area",
1212 "justifyMode": "auto",
1213 "orientation": "horizontal",
1224 "pluginVersion": "8.1.3",
1227 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"OffsetCommit\"}[5m]))",
1235 "title": "Offset Commit Request Per Sec",
1239 "cacheTimeout": null,
1240 "datasource": "Prometheus",
1241 "description": "Metadata request rate.",
1277 "maxDataPoints": 100,
1279 "colorMode": "value",
1280 "graphMode": "area",
1281 "justifyMode": "auto",
1282 "orientation": "horizontal",
1293 "pluginVersion": "8.1.3",
1296 "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Metadata\"}[5m]))",
1304 "title": "Metadata Request Per Sec",
1322 "datasource": "Prometheus",
1326 "mode": "palette-classic"
1329 "axisLabel": "Cores",
1330 "axisPlacement": "auto",
1332 "drawStyle": "line",
1334 "gradientMode": "none",
1340 "lineInterpolation": "linear",
1343 "scaleDistribution": {
1346 "showPoints": "never",
1352 "thresholdsStyle": {
1377 "options": "localhost:7071"
1383 "fixedColor": "#629E51",
1406 "displayMode": "table",
1407 "placement": "bottom"
1413 "pluginVersion": "8.1.3",
1416 "expr": "irate(process_cpu_seconds_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])*100",
1417 "format": "time_series",
1419 "intervalFactor": 2,
1420 "legendFormat": "{{instance}}",
1421 "metric": "process_cpu_secondspersec",
1428 "title": "CPU Usage",
1429 "type": "timeseries"
1432 "datasource": "Prometheus",
1436 "mode": "palette-classic"
1439 "axisLabel": "Memory",
1440 "axisPlacement": "auto",
1442 "drawStyle": "line",
1444 "gradientMode": "none",
1450 "lineInterpolation": "linear",
1453 "scaleDistribution": {
1456 "showPoints": "never",
1462 "thresholdsStyle": {
1487 "options": "localhost:7071"
1493 "fixedColor": "#BA43A9",
1516 "displayMode": "table",
1517 "placement": "bottom"
1523 "pluginVersion": "8.1.3",
1526 "expr": "sum without(area)(jvm_memory_bytes_used{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
1528 "intervalFactor": 2,
1529 "legendFormat": "{{instance}}",
1530 "metric": "jvm_memory_bytes_used",
1535 "expr": "jvm_memory_bytes_max{job=\"kafka-broker\",area=\"heap\",env=\"$env\",instance=~\"$instance\"}",
1537 "legendFormat": "{{instance}}",
1543 "title": "JVM Memory Used",
1544 "type": "timeseries"
1547 "datasource": "Prometheus",
1551 "mode": "palette-classic"
1554 "axisLabel": "% time in GC",
1555 "axisPlacement": "auto",
1557 "drawStyle": "line",
1559 "gradientMode": "none",
1565 "lineInterpolation": "linear",
1568 "scaleDistribution": {
1571 "showPoints": "never",
1577 "thresholdsStyle": {
1596 "unit": "percentunit"
1602 "options": "localhost:7071"
1608 "fixedColor": "#890F02",
1631 "displayMode": "table",
1632 "placement": "bottom"
1638 "pluginVersion": "8.1.3",
1641 "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))",
1643 "intervalFactor": 2,
1644 "legendFormat": "{{instance}}",
1645 "metric": "jvm_gc_collection_seconds_sum",
1652 "title": "Time spent in GC",
1653 "type": "timeseries"
1667 "datasource": "Prometheus",
1671 "mode": "palette-classic"
1674 "axisLabel": "Messages/s",
1675 "axisPlacement": "auto",
1677 "drawStyle": "line",
1679 "gradientMode": "none",
1685 "lineInterpolation": "linear",
1688 "scaleDistribution": {
1691 "showPoints": "never",
1697 "thresholdsStyle": {
1732 "displayMode": "table",
1733 "placement": "bottom"
1739 "pluginVersion": "8.1.3",
1743 "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))",
1745 "intervalFactor": 2,
1746 "legendFormat": "bytes/sec",
1747 "metric": "kafka_server_brokertopicmetrics_messagesinpersec",
1754 "title": "Messages In",
1755 "type": "timeseries"
1758 "datasource": "Prometheus",
1762 "mode": "palette-classic"
1765 "axisLabel": "Bytes/s",
1766 "axisPlacement": "auto",
1768 "drawStyle": "line",
1770 "gradientMode": "none",
1776 "lineInterpolation": "linear",
1779 "scaleDistribution": {
1782 "showPoints": "never",
1788 "thresholdsStyle": {
1827 "displayMode": "table",
1828 "placement": "bottom"
1834 "pluginVersion": "8.1.3",
1838 "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))",
1839 "format": "time_series",
1841 "intervalFactor": 2,
1842 "legendFormat": "bytes/sec",
1843 "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
1850 "title": "Bytes In",
1851 "type": "timeseries"
1854 "datasource": "Prometheus",
1858 "mode": "palette-classic"
1861 "axisLabel": "Bytes/s",
1862 "axisPlacement": "auto",
1864 "drawStyle": "line",
1866 "gradientMode": "none",
1872 "lineInterpolation": "linear",
1875 "scaleDistribution": {
1878 "showPoints": "never",
1884 "thresholdsStyle": {
1923 "displayMode": "table",
1924 "placement": "bottom"
1930 "pluginVersion": "8.1.3",
1934 "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))",
1936 "intervalFactor": 2,
1937 "legendFormat": "bytes/sec",
1938 "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
1945 "title": "Bytes Out",
1946 "type": "timeseries"
1949 "datasource": "Prometheus",
1953 "mode": "palette-classic"
1956 "axisLabel": "Messages/s",
1957 "axisPlacement": "auto",
1959 "drawStyle": "line",
1961 "gradientMode": "none",
1967 "lineInterpolation": "linear",
1970 "scaleDistribution": {
1973 "showPoints": "never",
1979 "thresholdsStyle": {
2017 "displayMode": "table",
2018 "placement": "bottom"
2024 "pluginVersion": "8.1.3",
2027 "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))",
2028 "format": "time_series",
2030 "intervalFactor": 2,
2031 "legendFormat": "{{instance}}",
2032 "metric": "kafka_server_brokertopicmetrics_messagesinpersec",
2039 "title": "Messages In Per Broker",
2040 "type": "timeseries"
2043 "datasource": "Prometheus",
2047 "mode": "palette-classic"
2050 "axisLabel": "Bytes/s",
2051 "axisPlacement": "auto",
2053 "drawStyle": "line",
2055 "gradientMode": "none",
2061 "lineInterpolation": "linear",
2064 "scaleDistribution": {
2067 "showPoints": "never",
2073 "thresholdsStyle": {
2111 "displayMode": "table",
2112 "placement": "bottom"
2118 "pluginVersion": "8.1.3",
2121 "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))",
2122 "format": "time_series",
2124 "intervalFactor": 2,
2125 "legendFormat": "{{instance}}",
2126 "metric": "kafka_server_brokertopicmetrics_bytesinpersec",
2133 "title": "Bytes In Per Broker",
2134 "type": "timeseries"
2137 "datasource": "Prometheus",
2141 "mode": "palette-classic"
2145 "axisPlacement": "auto",
2147 "drawStyle": "line",
2149 "gradientMode": "none",
2155 "lineInterpolation": "linear",
2158 "scaleDistribution": {
2161 "showPoints": "never",
2167 "thresholdsStyle": {
2206 "displayMode": "table",
2207 "placement": "bottom"
2213 "pluginVersion": "8.1.3",
2216 "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))",
2217 "format": "time_series",
2219 "intervalFactor": 1,
2220 "legendFormat": "{{instance}}",
2226 "title": "Bytes Out Per Broker",
2227 "type": "timeseries"
2230 "title": "Throughput In/Out",
2245 "datasource": "Prometheus",
2246 "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available)\n",
2250 "mode": "palette-classic"
2254 "axisPlacement": "auto",
2256 "drawStyle": "line",
2258 "gradientMode": "none",
2264 "lineInterpolation": "linear",
2267 "scaleDistribution": {
2270 "showPoints": "never",
2276 "thresholdsStyle": {
2295 "unit": "percentunit"
2313 "displayMode": "table",
2314 "placement": "bottom"
2320 "pluginVersion": "8.1.3",
2323 "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2325 "legendFormat": "{{instance}}",
2331 "title": "Network Processor Avg Usage Percent",
2332 "type": "timeseries"
2335 "datasource": "Prometheus",
2336 "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available).\n",
2340 "mode": "palette-classic"
2344 "axisPlacement": "auto",
2346 "drawStyle": "line",
2348 "gradientMode": "none",
2354 "lineInterpolation": "linear",
2357 "scaleDistribution": {
2360 "showPoints": "never",
2366 "thresholdsStyle": {
2386 "unit": "percentunit"
2404 "displayMode": "table",
2405 "placement": "bottom"
2411 "pluginVersion": "8.1.3",
2414 "expr": "1 - kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2416 "legendFormat": "{{instance}}",
2422 "title": "Request Handler Avg Percent",
2423 "type": "timeseries"
2426 "title": "Thread utilization",
2441 "datasource": "Prometheus",
2442 "description": "Latency in millseconds for ZooKeeper requests from broker.\n",
2446 "mode": "palette-classic"
2450 "axisPlacement": "auto",
2452 "drawStyle": "line",
2454 "gradientMode": "none",
2460 "lineInterpolation": "linear",
2463 "scaleDistribution": {
2466 "showPoints": "never",
2472 "thresholdsStyle": {
2509 "displayMode": "table",
2510 "placement": "bottom"
2516 "pluginVersion": "8.1.3",
2519 "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\"}",
2521 "legendFormat": "{{instance}} - {{quantile}}",
2527 "title": "Zookeeper Request Latency",
2528 "type": "timeseries"
2531 "datasource": "Prometheus",
2536 "mode": "palette-classic"
2540 "axisPlacement": "auto",
2542 "drawStyle": "line",
2544 "gradientMode": "none",
2550 "lineInterpolation": "linear",
2553 "scaleDistribution": {
2556 "showPoints": "never",
2562 "thresholdsStyle": {
2600 "displayMode": "table",
2601 "placement": "bottom"
2607 "pluginVersion": "8.1.3",
2610 "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2614 "legendFormat": "{{instance}}",
2620 "title": "Zookeeper connections per sec",
2621 "type": "timeseries"
2624 "datasource": "Prometheus",
2629 "mode": "palette-classic"
2633 "axisPlacement": "auto",
2635 "drawStyle": "line",
2637 "gradientMode": "none",
2643 "lineInterpolation": "linear",
2646 "scaleDistribution": {
2649 "showPoints": "never",
2655 "thresholdsStyle": {
2693 "displayMode": "table",
2694 "placement": "bottom"
2700 "pluginVersion": "8.1.3",
2703 "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2706 "legendFormat": "{{instance}}",
2712 "title": "Zookeeper expired connections per sec",
2713 "type": "timeseries"
2716 "datasource": "Prometheus",
2721 "mode": "palette-classic"
2725 "axisPlacement": "auto",
2727 "drawStyle": "line",
2729 "gradientMode": "none",
2735 "lineInterpolation": "linear",
2738 "scaleDistribution": {
2741 "showPoints": "never",
2747 "thresholdsStyle": {
2785 "displayMode": "table",
2786 "placement": "bottom"
2792 "pluginVersion": "8.1.3",
2795 "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2798 "legendFormat": "{{instance}}",
2804 "title": "Zookeeper disconnect per sec",
2805 "type": "timeseries"
2808 "datasource": "Prometheus",
2813 "mode": "palette-classic"
2817 "axisPlacement": "auto",
2819 "drawStyle": "line",
2821 "gradientMode": "none",
2827 "lineInterpolation": "linear",
2830 "scaleDistribution": {
2833 "showPoints": "never",
2839 "thresholdsStyle": {
2877 "displayMode": "table",
2878 "placement": "bottom"
2884 "pluginVersion": "8.1.3",
2887 "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}",
2890 "legendFormat": "{{instance}}",
2896 "title": "Zookeeper auth failures per sec",
2897 "type": "timeseries"
2900 "title": "Zookeeper",
2915 "datasource": "Prometheus",
2916 "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.",
2920 "mode": "palette-classic"
2924 "axisPlacement": "auto",
2926 "drawStyle": "line",
2928 "gradientMode": "none",
2934 "lineInterpolation": "linear",
2937 "scaleDistribution": {
2940 "showPoints": "never",
2946 "thresholdsStyle": {
2984 "displayMode": "table",
2985 "placement": "bottom"
2991 "pluginVersion": "8.1.3",
2994 "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])",
2996 "legendFormat": "{{instance}}",
3002 "title": "IsrShrinks per Sec",
3003 "type": "timeseries"
3006 "datasource": "Prometheus",
3007 "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.",
3011 "mode": "palette-classic"
3015 "axisPlacement": "auto",
3017 "drawStyle": "line",
3019 "gradientMode": "none",
3025 "lineInterpolation": "linear",
3028 "scaleDistribution": {
3031 "showPoints": "never",
3037 "thresholdsStyle": {
3075 "displayMode": "table",
3076 "placement": "bottom"
3082 "pluginVersion": "8.1.3",
3085 "expr": "rate(kafka_server_replicamanager_isrexpandspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])",
3088 "legendFormat": "{{instance}}",
3094 "title": "IsrExpands per Sec",
3095 "type": "timeseries"
3098 "title": "Isr Shrinks / Expands",
3113 "datasource": "Prometheus",
3117 "mode": "palette-classic"
3121 "axisPlacement": "auto",
3123 "drawStyle": "line",
3125 "gradientMode": "none",
3131 "lineInterpolation": "linear",
3134 "scaleDistribution": {
3137 "showPoints": "never",
3143 "thresholdsStyle": {
3180 "displayMode": "table",
3181 "placement": "bottom"
3187 "pluginVersion": "8.1.3",
3190 "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (topic)",
3191 "legendFormat": "{{topic}}",
3197 "title": "Log size per Topic",
3198 "type": "timeseries"
3201 "datasource": "Prometheus",
3205 "mode": "palette-classic"
3209 "axisPlacement": "auto",
3211 "drawStyle": "line",
3213 "gradientMode": "none",
3219 "lineInterpolation": "linear",
3222 "scaleDistribution": {
3225 "showPoints": "never",
3231 "thresholdsStyle": {
3268 "displayMode": "table",
3269 "placement": "bottom"
3275 "pluginVersion": "8.1.3",
3278 "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (instance)",
3279 "legendFormat": "{{instance}}",
3285 "title": "Log size per Broker",
3286 "type": "timeseries"
3289 "title": "Logs size",
3304 "datasource": "Prometheus",
3305 "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.",
3309 "mode": "palette-classic"
3313 "axisPlacement": "auto",
3315 "drawStyle": "line",
3317 "gradientMode": "none",
3323 "lineInterpolation": "linear",
3326 "scaleDistribution": {
3329 "showPoints": "never",
3335 "thresholdsStyle": {
3372 "displayMode": "list",
3373 "placement": "bottom"
3379 "pluginVersion": "8.1.3",
3382 "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}",
3384 "legendFormat": "{{instance}} - {{quantile}}",
3390 "title": "Producer - RequestQueueTimeMs",
3391 "type": "timeseries"
3394 "datasource": "Prometheus",
3395 "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.",
3399 "mode": "palette-classic"
3403 "axisPlacement": "auto",
3405 "drawStyle": "line",
3407 "gradientMode": "none",
3413 "lineInterpolation": "linear",
3416 "scaleDistribution": {
3419 "showPoints": "never",
3425 "thresholdsStyle": {
3462 "displayMode": "list",
3463 "placement": "bottom"
3469 "pluginVersion": "8.1.3",
3472 "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}",
3474 "legendFormat": "{{instance}} - {{quantile}}",
3480 "title": "Producer - LocalTimeMs",
3481 "type": "timeseries"
3484 "datasource": "Prometheus",
3485 "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n",
3489 "mode": "palette-classic"
3493 "axisPlacement": "auto",
3495 "drawStyle": "line",
3497 "gradientMode": "none",
3503 "lineInterpolation": "linear",
3506 "scaleDistribution": {
3509 "showPoints": "never",
3515 "thresholdsStyle": {
3552 "displayMode": "table",
3553 "placement": "bottom"
3559 "pluginVersion": "8.1.3",
3562 "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}",
3564 "legendFormat": "{{instance}} - {{quantile}}",
3570 "title": "Producer - RemoteTimeMs",
3571 "type": "timeseries"
3574 "datasource": "Prometheus",
3575 "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n",
3579 "mode": "palette-classic"
3583 "axisPlacement": "auto",
3585 "drawStyle": "line",
3587 "gradientMode": "none",
3593 "lineInterpolation": "linear",
3596 "scaleDistribution": {
3599 "showPoints": "never",
3605 "thresholdsStyle": {
3642 "displayMode": "table",
3643 "placement": "bottom"
3649 "pluginVersion": "8.1.3",
3652 "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}",
3654 "legendFormat": "{{instance}} - {{quantile}}",
3660 "title": "Producer - ResponseQueueTimeMs",
3661 "type": "timeseries"
3664 "datasource": "Prometheus",
3665 "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n",
3669 "mode": "palette-classic"
3673 "axisPlacement": "auto",
3675 "drawStyle": "line",
3677 "gradientMode": "none",
3683 "lineInterpolation": "linear",
3686 "scaleDistribution": {
3689 "showPoints": "never",
3695 "thresholdsStyle": {
3733 "displayMode": "table",
3734 "placement": "bottom"
3740 "pluginVersion": "8.1.3",
3743 "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}",
3745 "legendFormat": "{{instance}} - {{quantile}}",
3751 "title": "Producer - ResponseSendTimeMs",
3752 "type": "timeseries"
3755 "title": "Producer Performance",
3770 "datasource": "Prometheus",
3771 "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.",
3775 "mode": "palette-classic"
3779 "axisPlacement": "auto",
3781 "drawStyle": "line",
3783 "gradientMode": "none",
3789 "lineInterpolation": "linear",
3792 "scaleDistribution": {
3795 "showPoints": "never",
3801 "thresholdsStyle": {
3839 "displayMode": "list",
3840 "placement": "bottom"
3846 "pluginVersion": "8.1.3",
3849 "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}",
3851 "legendFormat": "{{instance}} - {{quantile}}",
3857 "title": "Consumer - RequestQueueTimeMs",
3858 "type": "timeseries"
3861 "datasource": "Prometheus",
3862 "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.",
3866 "mode": "palette-classic"
3870 "axisPlacement": "auto",
3872 "drawStyle": "line",
3874 "gradientMode": "none",
3880 "lineInterpolation": "linear",
3883 "scaleDistribution": {
3886 "showPoints": "never",
3892 "thresholdsStyle": {
3929 "displayMode": "list",
3930 "placement": "bottom"
3936 "pluginVersion": "8.1.3",
3939 "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}",
3941 "legendFormat": "{{instance}} - {{quantile}}",
3947 "title": "Consumer - LocalTimeMs",
3948 "type": "timeseries"
3951 "datasource": "Prometheus",
3952 "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n",
3956 "mode": "palette-classic"
3960 "axisPlacement": "auto",
3962 "drawStyle": "line",
3964 "gradientMode": "none",
3970 "lineInterpolation": "linear",
3973 "scaleDistribution": {
3976 "showPoints": "never",
3982 "thresholdsStyle": {
4019 "displayMode": "table",
4020 "placement": "bottom"
4026 "pluginVersion": "8.1.3",
4029 "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}",
4031 "legendFormat": "{{instance}} - {{quantile}}",
4037 "title": "Consumer - RemoteTimeMs",
4038 "type": "timeseries"
4041 "datasource": "Prometheus",
4042 "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n",
4046 "mode": "palette-classic"
4050 "axisPlacement": "auto",
4052 "drawStyle": "line",
4054 "gradientMode": "none",
4060 "lineInterpolation": "linear",
4063 "scaleDistribution": {
4066 "showPoints": "never",
4072 "thresholdsStyle": {
4110 "displayMode": "table",
4111 "placement": "bottom"
4117 "pluginVersion": "8.1.3",
4120 "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}",
4122 "legendFormat": "{{instance}} - {{quantile}}",
4128 "title": "Consumer - ResponseQueueTimeMs",
4129 "type": "timeseries"
4132 "datasource": "Prometheus",
4133 "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n",
4137 "mode": "palette-classic"
4141 "axisPlacement": "auto",
4143 "drawStyle": "line",
4145 "gradientMode": "none",
4151 "lineInterpolation": "linear",
4154 "scaleDistribution": {
4157 "showPoints": "never",
4163 "thresholdsStyle": {
4201 "displayMode": "table",
4202 "placement": "bottom"
4208 "pluginVersion": "8.1.3",
4211 "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}",
4213 "legendFormat": "{{instance}} - {{quantile}}",
4219 "title": "Consumer - ResponseSendTimeMs",
4220 "type": "timeseries"
4223 "title": "Consumer Performance",
4238 "datasource": "Prometheus",
4239 "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.",
4243 "mode": "palette-classic"
4247 "axisPlacement": "auto",
4249 "drawStyle": "line",
4251 "gradientMode": "none",
4257 "lineInterpolation": "linear",
4260 "scaleDistribution": {
4263 "showPoints": "never",
4269 "thresholdsStyle": {
4307 "displayMode": "list",
4308 "placement": "bottom"
4314 "pluginVersion": "8.1.3",
4317 "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}",
4319 "legendFormat": "{{instance}} - {{quantile}}",
4325 "title": "FetchFollower - RequestQueueTimeMs",
4326 "type": "timeseries"
4329 "datasource": "Prometheus",
4330 "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.",
4334 "mode": "palette-classic"
4338 "axisPlacement": "auto",
4340 "drawStyle": "line",
4342 "gradientMode": "none",
4348 "lineInterpolation": "linear",
4351 "scaleDistribution": {
4354 "showPoints": "never",
4360 "thresholdsStyle": {
4398 "displayMode": "list",
4399 "placement": "bottom"
4405 "pluginVersion": "8.1.3",
4408 "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}",
4410 "legendFormat": "{{instance}} - {{quantile}}",
4416 "title": "FetchFollower - LocalTimeMs",
4417 "type": "timeseries"
4420 "datasource": "Prometheus",
4421 "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n",
4425 "mode": "palette-classic"
4429 "axisPlacement": "auto",
4431 "drawStyle": "line",
4433 "gradientMode": "none",
4439 "lineInterpolation": "linear",
4442 "scaleDistribution": {
4445 "showPoints": "never",
4451 "thresholdsStyle": {
4488 "displayMode": "table",
4489 "placement": "bottom"
4495 "pluginVersion": "8.1.3",
4498 "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}",
4500 "legendFormat": "{{instance}} - {{quantile}}",
4506 "title": "FetchFollower - RemoteTimeMs",
4507 "type": "timeseries"
4510 "datasource": "Prometheus",
4511 "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n",
4515 "mode": "palette-classic"
4519 "axisPlacement": "auto",
4521 "drawStyle": "line",
4523 "gradientMode": "none",
4529 "lineInterpolation": "linear",
4532 "scaleDistribution": {
4535 "showPoints": "never",
4541 "thresholdsStyle": {
4578 "displayMode": "table",
4579 "placement": "bottom"
4585 "pluginVersion": "8.1.3",
4588 "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}",
4590 "legendFormat": "{{instance}} - {{quantile}}",
4596 "title": "FetchFollower - ResponseQueueTimeMs",
4597 "type": "timeseries"
4600 "datasource": "Prometheus",
4601 "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n",
4605 "mode": "palette-classic"
4609 "axisPlacement": "auto",
4611 "drawStyle": "line",
4613 "gradientMode": "none",
4619 "lineInterpolation": "linear",
4622 "scaleDistribution": {
4625 "showPoints": "never",
4631 "thresholdsStyle": {
4669 "displayMode": "table",
4670 "placement": "bottom"
4676 "pluginVersion": "8.1.3",
4679 "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}",
4681 "legendFormat": "{{instance}} - {{quantile}}",
4687 "title": "FetchFollower - ResponseSendTimeMs",
4688 "type": "timeseries"
4691 "title": "Fetch Follower Performance",
4706 "datasource": "Prometheus",
4710 "mode": "palette-classic"
4714 "axisPlacement": "auto",
4716 "drawStyle": "line",
4718 "gradientMode": "none",
4724 "lineInterpolation": "linear",
4727 "scaleDistribution": {
4730 "showPoints": "never",
4736 "thresholdsStyle": {
4773 "displayMode": "table",
4774 "placement": "bottom"
4780 "pluginVersion": "8.1.3",
4783 "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)",
4785 "legendFormat": "{{listener}}",
4791 "title": "Connections count per listener",
4792 "type": "timeseries"
4795 "datasource": "Prometheus",
4799 "mode": "palette-classic"
4803 "axisPlacement": "auto",
4805 "drawStyle": "line",
4807 "gradientMode": "none",
4813 "lineInterpolation": "linear",
4816 "scaleDistribution": {
4819 "showPoints": "never",
4825 "thresholdsStyle": {
4862 "displayMode": "table",
4863 "placement": "bottom"
4869 "pluginVersion": "8.1.3",
4872 "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)",
4874 "legendFormat": "{{instance}}",
4880 "title": "Connections count per broker",
4881 "type": "timeseries"
4884 "datasource": "Prometheus",
4888 "mode": "palette-classic"
4892 "axisPlacement": "auto",
4894 "drawStyle": "line",
4896 "gradientMode": "none",
4902 "lineInterpolation": "linear",
4905 "scaleDistribution": {
4908 "showPoints": "never",
4914 "thresholdsStyle": {
4951 "displayMode": "table",
4952 "placement": "bottom"
4958 "pluginVersion": "8.1.3",
4961 "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)",
4963 "legendFormat": "{{listener}}",
4969 "title": "Connections creation rate per listener",
4970 "type": "timeseries"
4973 "datasource": "Prometheus",
4977 "mode": "palette-classic"
4981 "axisPlacement": "auto",
4983 "drawStyle": "line",
4985 "gradientMode": "none",
4991 "lineInterpolation": "linear",
4994 "scaleDistribution": {
4997 "showPoints": "never",
5003 "thresholdsStyle": {
5040 "displayMode": "table",
5041 "placement": "bottom"
5047 "pluginVersion": "8.1.3",
5050 "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)",
5052 "legendFormat": "{{instance}}",
5058 "title": "Connections creation rate per instance",
5059 "type": "timeseries"
5062 "datasource": "Prometheus",
5066 "mode": "palette-classic"
5070 "axisPlacement": "auto",
5072 "drawStyle": "line",
5074 "gradientMode": "none",
5080 "lineInterpolation": "linear",
5083 "scaleDistribution": {
5086 "showPoints": "never",
5092 "thresholdsStyle": {
5129 "displayMode": "table",
5130 "placement": "bottom"
5136 "pluginVersion": "8.1.3",
5139 "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)",
5141 "legendFormat": "{{listener}}",
5147 "title": "Connections close rate per listener",
5148 "type": "timeseries"
5151 "datasource": "Prometheus",
5155 "mode": "palette-classic"
5159 "axisPlacement": "auto",
5161 "drawStyle": "line",
5163 "gradientMode": "none",
5169 "lineInterpolation": "linear",
5172 "scaleDistribution": {
5175 "showPoints": "never",
5181 "thresholdsStyle": {
5218 "displayMode": "table",
5219 "placement": "bottom"
5225 "pluginVersion": "8.1.3",
5228 "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)",
5230 "legendFormat": "{{instance}}",
5236 "title": "Connections close rate per instance",
5237 "type": "timeseries"
5240 "datasource": "Prometheus",
5241 "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.",
5245 "mode": "palette-classic"
5249 "axisPlacement": "auto",
5251 "drawStyle": "line",
5253 "gradientMode": "none",
5259 "lineInterpolation": "linear",
5262 "scaleDistribution": {
5265 "showPoints": "never",
5271 "thresholdsStyle": {
5309 "displayMode": "table",
5310 "placement": "bottom"
5316 "pluginVersion": "8.1.3",
5319 "expr": "kafka_network_acceptor_acceptorblockedpercent{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}",
5321 "legendFormat": "{{instance}} - {{listener}}",
5327 "title": "Acceptor Blocked Percentage",
5328 "type": "timeseries"
5331 "datasource": "Prometheus",
5335 "mode": "palette-classic"
5339 "axisPlacement": "auto",
5341 "drawStyle": "line",
5343 "gradientMode": "none",
5349 "lineInterpolation": "linear",
5352 "scaleDistribution": {
5355 "showPoints": "never",
5361 "thresholdsStyle": {
5398 "displayMode": "table",
5399 "placement": "bottom"
5405 "pluginVersion": "8.1.3",
5408 "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version)",
5410 "legendFormat": "{{client_software_name}} {{client_software_version}}",
5416 "title": "Connections per client version",
5417 "type": "timeseries"
5420 "title": "Connections",
5435 "datasource": "Prometheus",
5436 "description": "Number of consumer groups per group coordinator",
5440 "mode": "palette-classic"
5444 "axisPlacement": "auto",
5446 "drawStyle": "line",
5448 "gradientMode": "none",
5454 "lineInterpolation": "linear",
5457 "scaleDistribution": {
5460 "showPoints": "never",
5466 "thresholdsStyle": {
5503 "displayMode": "table",
5504 "placement": "bottom"
5510 "pluginVersion": "8.1.3",
5513 "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}",
5516 "legendFormat": "{{instance}}",
5522 "title": "Consumer groups number per coordinator",
5523 "type": "timeseries"
5526 "datasource": "Prometheus",
5527 "description": "Number of consumer group per state",
5531 "mode": "palette-classic"
5535 "axisPlacement": "auto",
5537 "drawStyle": "line",
5539 "gradientMode": "none",
5545 "lineInterpolation": "linear",
5548 "scaleDistribution": {
5551 "showPoints": "never",
5557 "thresholdsStyle": {
5594 "displayMode": "table",
5595 "placement": "bottom"
5601 "pluginVersion": "8.1.3",
5604 "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})",
5607 "legendFormat": "stable",
5611 "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})",
5613 "legendFormat": "preparing-rebalance",
5617 "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})",
5619 "legendFormat": "dead",
5623 "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})",
5625 "legendFormat": "completing-rebalance",
5629 "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})",
5631 "legendFormat": "empty",
5637 "title": "Nb consumer groups per state",
5638 "type": "timeseries"
5641 "title": "Group Coordinator",
5656 "datasource": "Prometheus",
5657 "description": "The number of messages produced converted to match the log.message.format.version.",
5661 "mode": "palette-classic"
5665 "axisPlacement": "auto",
5667 "drawStyle": "line",
5669 "gradientMode": "none",
5675 "lineInterpolation": "linear",
5678 "scaleDistribution": {
5681 "showPoints": "never",
5687 "thresholdsStyle": {
5725 "displayMode": "list",
5726 "placement": "bottom"
5732 "pluginVersion": "8.1.3",
5735 "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
5738 "legendFormat": "{{topic}}",
5744 "title": "Number of produced message conversion",
5745 "type": "timeseries"
5748 "datasource": "Prometheus",
5749 "description": "The number of messages consumed converted at consumer to match the log.message.format.version.",
5753 "mode": "palette-classic"
5757 "axisPlacement": "auto",
5759 "drawStyle": "line",
5761 "gradientMode": "none",
5767 "lineInterpolation": "linear",
5770 "scaleDistribution": {
5773 "showPoints": "never",
5779 "thresholdsStyle": {
5817 "displayMode": "list",
5818 "placement": "bottom"
5824 "pluginVersion": "8.1.3",
5827 "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})",
5830 "legendFormat": "{{topic}}",
5836 "title": "Number of consumed message conversion",
5837 "type": "timeseries"
5840 "cacheTimeout": null,
5842 "description": "Number of connection per client version",
5846 "mode": "palette-classic"
5873 "displayMode": "table",
5874 "placement": "bottom",
5893 "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version) ",
5895 "legendFormat": "{{client_software_name}} - {{client_software_version}}",
5901 "title": "Client version repartition",
5905 "title": "Message Conversion",
5910 "schemaVersion": 30,
5922 "datasource": "Prometheus",
5923 "definition": "label_values(env)",
5924 "description": null,
5927 "includeAll": false,
5928 "label": "Environment",
5933 "query": "label_values(env)",
5934 "refId": "Prometheus-env-Variable-Query"
5938 "skipUrlSync": false,
5940 "tagValuesQuery": "",
5956 "datasource": "Prometheus",
5957 "definition": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)",
5958 "description": null,
5962 "label": "Instance",
5967 "query": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)",
5968 "refId": "Prometheus-instance-Variable-Query"
5972 "skipUrlSync": false,
5974 "tagValuesQuery": "",
5990 "datasource": "Prometheus",
5991 "definition": "label_values(quantile)",
5992 "description": null,
5996 "label": "Percentile",
5998 "name": "percentile",
6001 "query": "label_values(quantile)",
6002 "refId": "Prometheus-percentile-Variable-Query"
6006 "skipUrlSync": false,
6008 "tagValuesQuery": "",
6020 "refresh_intervals": [
6044 "timezone": "browser",
6045 "title": "Kafka cluster",