tux is on call

1 overview.tux 2 alerts.log 3 pods.yaml buffers: 3

┤ latency p99 (ms) — OK ├

10s agonow 42ms

┤ cluster ├

availability 100.000% pods ██████████ 42/42 pages resolved 0 coffee ██████████ 100%

┤ journal ├

┤ pods.yaml — READ-ONLY ├

apiVersion: v1
kind: PodList
metadata:
  # resourceVersion is a lie kubectl tells itself
  resourceVersion: "1337"
items:
  - metadata:
      name: tux-prod-7f9
      namespace: prod
      labels:
        app: tux
        mood: fine-probably
    spec:
      containers:
        - name: tux
          image: tux:v1.4.2   # DO NOT roll back. (we always roll back.)
          resources:
            requests:
              coffee: 100m
              patience: 0
    status:
      phase: Running
      # ^ aspirational. see restartCount.
      restartCount: 137        # same number as the exit code. no, not a coincidence.
      lastState:
        terminated:
          reason: OOMKilled    # it dreamed too big
          exitCode: 137

  - metadata:
      name: tux-prod-2b1
      namespace: prod
    status:
      phase: Running
      restartCount: 0
      note: "the reliable one. nobody thanks it."

  - metadata:
      name: tux-canary-9k2
      namespace: prod
      annotations:
        deployed-by: "intern"
        deployed-on: "friday"   # brave. foolish. brave.
    status:
      phase: CrashLoopBackOff
      restartCount: 9001
      conditions:
        - type: Ready
          status: "False"   # narrator: it was not ready

  - metadata:
      name: coffee-machine-sidecar
      namespace: prod
      # not in the architecture diagram. load-bearing anyway.
    status:
      phase: Running
      restartCount: 0
      criticality: maximum

  - metadata:
      name: dns-resolver-4x1
      namespace: prod
    status:
      phase: Running
      # it's fine. it's always fine. until it's DNS.
      restartCount: 3

# kubectl get pods -o yaml
# do not edit. regenerates itself from the cluster's trauma every 30s.

NORMAL prod-cluster.yaml utf-8 yaml 00:00

:checkhealth prod