catalog/operators/cloudnative-pg/0.0.4/ix_values.yaml

821 lines
26 KiB
YAML

image:
repository: ghcr.io/cloudnative-pg/cloudnative-pg
tag: "1.20.0@sha256:e41f52ed914ce54499aaf7249b50b9b71acb28ca13f29aaaa54b43071674f2e2"
pullPolicy:
workload:
main:
podSpec:
containers:
main:
args:
- controller
- --leader-elect
- --config-map-name={{ include "tc.v1.common.lib.chart.names.fullname" $ }}-config
- --secret-name={{ include "tc.v1.common.lib.chart.names.fullname" $ }}-config
- --webhook-port=9443
command:
- /manager
probes:
liveness:
port: webhook
type: https
path: /readyz
readiness:
port: webhook
type: https
path: /readyz
startup:
port: webhook
type: tcp
env:
OPERATOR_IMAGE_NAME: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
OPERATOR_NAMESPACE:
fieldRef:
fieldPath: metadata.namespace
MONITORING_QUERIES_CONFIGMAP: '{{ include "tc.v1.common.lib.chart.names.fullname" $ }}-monitoring'
podOptions:
automountServiceAccountToken: true
service:
main:
ports:
main:
protocol: http
port: 8080
cnpg-webhook-service:
enabled: true
expandObjectName: false
ports:
webhook:
enabled: true
protocol: https
port: 443
targetPort: 9443
operator:
register: true
persistence:
scratch-data:
enabled: true
type: emptyDir
mountPath: /controller
webhook-certificates:
enabled: true
type: secret
objectName: cnpg-webhook-cert
expandObjectName: false
optional: true
defaultMode: "0420"
readOnly: true
targetSelector:
main:
main:
mountPath: "/run/secrets/cnpg.io/webhook"
portal:
open:
enabled: false
metrics:
main:
enabled: false
type: "podmonitor"
endpoints:
- port: main
interval: 5s
scrapeTimeout: 5s
path: /
honorLabels: false
rbac:
main:
enabled: true
primary: true
clusterWide: true
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- configmaps/status
verbs:
- get
- patch
- update
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- ""
resources:
- pods/status
verbs:
- get
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- secrets/status
verbs:
- get
- patch
- update
- apiGroups:
- ""
resources:
- serviceaccounts
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
verbs:
- get
- list
- patch
- update
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
verbs:
- get
- list
- patch
- update
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- get
- list
- update
- apiGroups:
- apps
resources:
- deployments
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- get
- update
- apiGroups:
- monitoring.coreos.com
resources:
- podmonitors
verbs:
- create
- delete
- get
- list
- patch
- watch
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- backups
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- backups/status
verbs:
- get
- patch
- update
- apiGroups:
- postgresql.cnpg.io
resources:
- clusters
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- clusters/finalizers
verbs:
- update
- apiGroups:
- postgresql.cnpg.io
resources:
- clusters/status
verbs:
- get
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- poolers
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- poolers/finalizers
verbs:
- update
- apiGroups:
- postgresql.cnpg.io
resources:
- poolers/status
verbs:
- get
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- scheduledbackups
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- postgresql.cnpg.io
resources:
- scheduledbackups/status
verbs:
- get
- patch
- update
- apiGroups:
- rbac.authorization.k8s.io
resources:
- rolebindings
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
verbs:
- create
- get
- list
- patch
- update
- watch
serviceAccount:
main:
enabled: true
primary: true
webhook:
mutating:
create: true
failurePolicy: Fail
validating:
create: true
failurePolicy: Fail
manifestManager:
enabled: true
staging: false
install: false
check: false
delete: true
configmap:
config:
enabled: true
data:
CREATE_ANY_SERVICE: "true"
monitoring:
enabled: true
data:
queries: |
backends:
query: |
SELECT sa.datname
, sa.usename
, sa.application_name
, states.state
, COALESCE(sa.count, 0) AS total
, COALESCE(sa.max_tx_secs, 0) AS max_tx_duration_seconds
FROM ( VALUES ('active')
, ('idle')
, ('idle in transaction')
, ('idle in transaction (aborted)')
, ('fastpath function call')
, ('disabled')
) AS states(state)
LEFT JOIN (
SELECT datname
, state
, usename
, COALESCE(application_name, '') AS application_name
, COUNT(*)
, COALESCE(EXTRACT (EPOCH FROM (max(now() - xact_start))), 0) AS max_tx_secs
FROM pg_catalog.pg_stat_activity
GROUP BY datname, state, usename, application_name
) sa ON states.state = sa.state
WHERE sa.usename IS NOT NULL
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- usename:
usage: "LABEL"
description: "Name of the user"
- application_name:
usage: "LABEL"
description: "Name of the application"
- state:
usage: "LABEL"
description: "State of the backend"
- total:
usage: "GAUGE"
description: "Number of backends"
- max_tx_duration_seconds:
usage: "GAUGE"
description: "Maximum duration of a transaction in seconds"
backends_waiting:
query: |
SELECT count(*) AS total
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted
metrics:
- total:
usage: "GAUGE"
description: "Total number of backends that are currently waiting on other queries"
pg_database:
query: |
SELECT datname
, pg_catalog.pg_database_size(datname) AS size_bytes
, pg_catalog.age(datfrozenxid) AS xid_age
, pg_catalog.mxid_age(datminmxid) AS mxid_age
FROM pg_catalog.pg_database
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- size_bytes:
usage: "GAUGE"
description: "Disk space used by the database"
- xid_age:
usage: "GAUGE"
description: "Number of transactions from the frozen XID to the current one"
- mxid_age:
usage: "GAUGE"
description: "Number of multiple transactions (Multixact) from the frozen XID to the current one"
pg_postmaster:
query: |
SELECT EXTRACT(EPOCH FROM pg_postmaster_start_time) AS start_time
FROM pg_catalog.pg_postmaster_start_time()
metrics:
- start_time:
usage: "GAUGE"
description: "Time at which postgres started (based on epoch)"
pg_replication:
query: "SELECT CASE WHEN NOT pg_catalog.pg_is_in_recovery()
THEN 0
ELSE GREATEST (0,
EXTRACT(EPOCH FROM (now() - pg_catalog.pg_last_xact_replay_timestamp())))
END AS lag,
pg_catalog.pg_is_in_recovery() AS in_recovery,
EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up,
(SELECT count(*) FROM pg_stat_replication) AS streaming_replicas"
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind primary in seconds"
- in_recovery:
usage: "GAUGE"
description: "Whether the instance is in recovery"
- is_wal_receiver_up:
usage: "GAUGE"
description: "Whether the instance wal_receiver is up"
- streaming_replicas:
usage: "GAUGE"
description: "Number of streaming replicas connected to the instance"
pg_replication_slots:
query: |
SELECT slot_name,
slot_type,
database,
active,
pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), restart_lsn)
FROM pg_catalog.pg_replication_slots
WHERE NOT temporary
metrics:
- slot_name:
usage: "LABEL"
description: "Name of the replication slot"
- slot_type:
usage: "LABEL"
description: "Type of the replication slot"
- database:
usage: "LABEL"
description: "Name of the database"
- active:
usage: "GAUGE"
description: "Flag indicating whether the slot is active"
- pg_wal_lsn_diff:
usage: "GAUGE"
description: "Replication lag in bytes"
pg_stat_archiver:
query: |
SELECT archived_count
, failed_count
, COALESCE(EXTRACT(EPOCH FROM (now() - last_archived_time)), -1) AS seconds_since_last_archival
, COALESCE(EXTRACT(EPOCH FROM (now() - last_failed_time)), -1) AS seconds_since_last_failure
, COALESCE(EXTRACT(EPOCH FROM last_archived_time), -1) AS last_archived_time
, COALESCE(EXTRACT(EPOCH FROM last_failed_time), -1) AS last_failed_time
, COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_archived_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_archived_wal_start_lsn
, COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_failed_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_failed_wal_start_lsn
, EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time
FROM pg_catalog.pg_stat_archiver
metrics:
- archived_count:
usage: "COUNTER"
description: "Number of WAL files that have been successfully archived"
- failed_count:
usage: "COUNTER"
description: "Number of failed attempts for archiving WAL files"
- seconds_since_last_archival:
usage: "GAUGE"
description: "Seconds since the last successful archival operation"
- seconds_since_last_failure:
usage: "GAUGE"
description: "Seconds since the last failed archival operation"
- last_archived_time:
usage: "GAUGE"
description: "Epoch of the last time WAL archiving succeeded"
- last_failed_time:
usage: "GAUGE"
description: "Epoch of the last time WAL archiving failed"
- last_archived_wal_start_lsn:
usage: "GAUGE"
description: "Archived WAL start LSN"
- last_failed_wal_start_lsn:
usage: "GAUGE"
description: "Last failed WAL LSN"
- stats_reset_time:
usage: "GAUGE"
description: "Time at which these statistics were last reset"
pg_stat_bgwriter:
query: |
SELECT checkpoints_timed
, checkpoints_req
, checkpoint_write_time
, checkpoint_sync_time
, buffers_checkpoint
, buffers_clean
, maxwritten_clean
, buffers_backend
, buffers_backend_fsync
, buffers_alloc
FROM pg_catalog.pg_stat_bgwriter
metrics:
- checkpoints_timed:
usage: "COUNTER"
description: "Number of scheduled checkpoints that have been performed"
- checkpoints_req:
usage: "COUNTER"
description: "Number of requested checkpoints that have been performed"
- checkpoint_write_time:
usage: "COUNTER"
description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds"
- checkpoint_sync_time:
usage: "COUNTER"
description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds"
- buffers_checkpoint:
usage: "COUNTER"
description: "Number of buffers written during checkpoints"
- buffers_clean:
usage: "COUNTER"
description: "Number of buffers written by the background writer"
- maxwritten_clean:
usage: "COUNTER"
description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers"
- buffers_backend:
usage: "COUNTER"
description: "Number of buffers written directly by a backend"
- buffers_backend_fsync:
usage: "COUNTER"
description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)"
- buffers_alloc:
usage: "COUNTER"
description: "Number of buffers allocated"
pg_stat_database:
query: |
SELECT datname
, xact_commit
, xact_rollback
, blks_read
, blks_hit
, tup_returned
, tup_fetched
, tup_inserted
, tup_updated
, tup_deleted
, conflicts
, temp_files
, temp_bytes
, deadlocks
, blk_read_time
, blk_write_time
FROM pg_catalog.pg_stat_database
metrics:
- datname:
usage: "LABEL"
description: "Name of this database"
- xact_commit:
usage: "COUNTER"
description: "Number of transactions in this database that have been committed"
- xact_rollback:
usage: "COUNTER"
description: "Number of transactions in this database that have been rolled back"
- blks_read:
usage: "COUNTER"
description: "Number of disk blocks read in this database"
- blks_hit:
usage: "COUNTER"
description: "Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)"
- tup_returned:
usage: "COUNTER"
description: "Number of rows returned by queries in this database"
- tup_fetched:
usage: "COUNTER"
description: "Number of rows fetched by queries in this database"
- tup_inserted:
usage: "COUNTER"
description: "Number of rows inserted by queries in this database"
- tup_updated:
usage: "COUNTER"
description: "Number of rows updated by queries in this database"
- tup_deleted:
usage: "COUNTER"
description: "Number of rows deleted by queries in this database"
- conflicts:
usage: "COUNTER"
description: "Number of queries canceled due to conflicts with recovery in this database"
- temp_files:
usage: "COUNTER"
description: "Number of temporary files created by queries in this database"
- temp_bytes:
usage: "COUNTER"
description: "Total amount of data written to temporary files by queries in this database"
- deadlocks:
usage: "COUNTER"
description: "Number of deadlocks detected in this database"
- blk_read_time:
usage: "COUNTER"
description: "Time spent reading data file blocks by backends in this database, in milliseconds"
- blk_write_time:
usage: "COUNTER"
description: "Time spent writing data file blocks by backends in this database, in milliseconds"
pg_stat_replication:
primary: true
query: |
SELECT usename
, COALESCE(application_name, '') AS application_name
, COALESCE(client_addr::text, '') AS client_addr
, EXTRACT(EPOCH FROM backend_start) AS backend_start
, COALESCE(pg_catalog.age(backend_xmin), 0) AS backend_xmin_age
, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), sent_lsn) AS sent_diff_bytes
, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), write_lsn) AS write_diff_bytes
, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), flush_lsn) AS flush_diff_bytes
, COALESCE(pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), replay_lsn),0) AS replay_diff_bytes
, COALESCE((EXTRACT(EPOCH FROM write_lag)),0)::float AS write_lag_seconds
, COALESCE((EXTRACT(EPOCH FROM flush_lag)),0)::float AS flush_lag_seconds
, COALESCE((EXTRACT(EPOCH FROM replay_lag)),0)::float AS replay_lag_seconds
FROM pg_catalog.pg_stat_replication
metrics:
- usename:
usage: "LABEL"
description: "Name of the replication user"
- application_name:
usage: "LABEL"
description: "Name of the application"
- client_addr:
usage: "LABEL"
description: "Client IP address"
- backend_start:
usage: "COUNTER"
description: "Time when this process was started"
- backend_xmin_age:
usage: "COUNTER"
description: "The age of this standby's xmin horizon"
- sent_diff_bytes:
usage: "GAUGE"
description: "Difference in bytes from the last write-ahead log location sent on this connection"
- write_diff_bytes:
usage: "GAUGE"
description: "Difference in bytes from the last write-ahead log location written to disk by this standby server"
- flush_diff_bytes:
usage: "GAUGE"
description: "Difference in bytes from the last write-ahead log location flushed to disk by this standby server"
- replay_diff_bytes:
usage: "GAUGE"
description: "Difference in bytes from the last write-ahead log location replayed into the database on this standby server"
- write_lag_seconds:
usage: "GAUGE"
description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it"
- flush_lag_seconds:
usage: "GAUGE"
description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it"
- replay_lag_seconds:
usage: "GAUGE"
description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it"
pg_settings:
query: |
SELECT name,
CASE setting WHEN 'on' THEN '1' WHEN 'off' THEN '0' ELSE setting END AS setting
FROM pg_catalog.pg_settings
WHERE vartype IN ('integer', 'real', 'bool')
ORDER BY 1
metrics:
- name:
usage: "LABEL"
description: "Name of the setting"
- setting:
usage: "GAUGE"
description: "Setting value"