MacBook
% uname -a
Darwin xxx 22.5.0 Darwin Kernel Version 22.5.0: Mon Apr 24 20:51:50 PDT 2023; root:xnu-8796.121.2~5/RELEASE_X86_64 x86_64
kind 1.27
% kubectl version
Client Version: version.Info{Major:"1", Minor:"21", GitVersion:"v1.21.1", GitCommit:"5e58841cce77d4bc13713ad2b91fa0d961e69192", GitTreeState:"clean", BuildDate:"2021-05-12T14:18:45Z", GoVersion:"go1.16.4", Compiler:"gc", Platform:"darwin/amd64"}
Server Version: version.Info{Major:"1", Minor:"27", GitVersion:"v1.27.1", GitCommit:"4c9411232e10168d7b050c49a1b59f6df9d7ea4b", GitTreeState:"clean", BuildDate:"2023-05-12T19:03:40Z", GoVersion:"go1.20.3", Compiler:"gc", Platform:"linux/amd64"}
% kubectl logs -n kepler daemonset/kepler-exporter
I0614 13:41:10.014533 1 gpu.go:46] Failed to init nvml, err: could not init nvml: error opening libnvidia-ml.so.1: libnvidia-ml.so.1: cannot open shared object file: No such file or directory
I0614 13:41:10.034400 1 exporter.go:148] Kepler running on version: 221cb2a
I0614 13:41:10.034429 1 config.go:172] using gCgroup ID in the BPF program: true
I0614 13:41:10.034471 1 config.go:174] kernel version: 5.1
I0614 13:41:10.034488 1 exporter.go:161] EnabledBPFBatchDelete: true
I0614 13:41:10.035100 1 power.go:77] Not able to obtain power, use estimate method
I0614 13:41:10.652596 1 exporter.go:174] Initializing the GPU collector
modprobe: FATAL: Module kheaders not found in directory /lib/modules/5.10.25-linuxkit
chdir(/lib/modules/5.10.25-linuxkit/build): No such file or directory
I0614 13:41:10.671321 1 bcc_attacher.go:73] failed to attach the bpf program: <nil>
I0614 13:41:10.671356 1 bcc_attacher.go:142] failed to attach perf module with options [-DMAP_SIZE=10240 -DNUM_CPUS=8]: failed to attach the bpf program: <nil>, not able to load eBPF modules
I0614 13:41:10.671373 1 exporter.go:191] failed to start : failed to attach bpf assets: failed to attach the bpf program: <nil>
I0614 13:41:10.671514 1 exporter.go:218] Started Kepler in 637.129155ms
% kubectl logs -n kepler daemonset/kepler-exporter -f
I0614 13:47:09.269344 1 gpu.go:46] Failed to init nvml, err: could not init nvml: error opening libnvidia-ml.so.1: libnvidia-ml.so.1: cannot open shared object file: No such file or directory
I0614 13:47:09.276243 1 exporter.go:149] Kepler running on version: v0.5-46-g8a3cfa3-dirty
I0614 13:47:09.276349 1 config.go:197] using gCgroup ID in the BPF program: true
I0614 13:47:09.276381 1 config.go:199] kernel version: 5.1
I0614 13:47:09.276401 1 config.go:159] kernel source dir is set to /usr/share/kepler/kernel_sources
I0614 13:47:09.276495 1 exporter.go:163] EnabledBPFBatchDelete: true
I0614 13:47:09.277574 1 power.go:77] Not able to obtain power, use estimate method
I0614 13:47:09.734056 1 exporter.go:176] Initializing the GPU collector
modprobe: FATAL: Module kheaders not found in directory /lib/modules/5.10.25-linuxkit
chdir(/lib/modules/5.10.25-linuxkit/build): No such file or directory
I0614 13:47:09.740528 1 bcc_attacher.go:74] failed to attach the bpf program: <nil>
I0614 13:47:09.740739 1 bcc_attacher.go:143] failed to attach perf module with options [-DMAP_SIZE=10240 -DNUM_CPUS=8]: failed to attach the bpf program: <nil>, from default kernel source.
I0614 13:47:09.740886 1 bcc_attacher.go:146] try to load eBPF module with kernel source dir /usr/share/kepler/kernel_sources/4.18.0-477.13.1.el8_8.x86_64
perf_event_open: No such file or directory
I0614 13:47:10.569284 1 bcc_attacher.go:108] failed to attach perf event cpu_cycles_hc_reader: failed to open bpf perf event: no such file or directory
perf_event_open: No such file or directory
I0614 13:47:10.569474 1 bcc_attacher.go:108] failed to attach perf event cpu_ref_cycles_hc_reader: failed to open bpf perf event: no such file or directory
perf_event_open: No such file or directory
I0614 13:47:10.569801 1 bcc_attacher.go:108] failed to attach perf event cpu_instr_hc_reader: failed to open bpf perf event: no such file or directory
perf_event_open: No such file or directory
I0614 13:47:10.570051 1 bcc_attacher.go:108] failed to attach perf event cache_miss_hc_reader: failed to open bpf perf event: no such file or directory
I0614 13:47:10.570137 1 bcc_attacher.go:152] Successfully load eBPF module with option: [-DMAP_SIZE=10240 -DNUM_CPUS=8] from kernel source "/usr/share/kepler/kernel_sources/4.18.0-477.13.1.el8_8.x86_64"
I0614 13:47:10.570222 1 bcc_attacher.go:171] Successfully load eBPF module with option: [-DMAP_SIZE=10240 -DNUM_CPUS=8]
% kubectl describe -n kepler daemonset kepler-exporter
Name: kepler-exporter
Selector: app.kubernetes.io/component=exporter,app.kubernetes.io/name=kepler-exporter,sustainable-computing.io/app=kepler
Node-Selector: <none>
Labels: sustainable-computing.io/app=kepler
Annotations: deprecated.daemonset.template.generation: 2
Desired Number of Nodes Scheduled: 1
Current Number of Nodes Scheduled: 1
Number of Nodes Scheduled with Up-to-date Pods: 1
Number of Nodes Scheduled with Available Pods: 1
Number of Nodes Misscheduled: 0
Pods Status: 1 Running / 0 Waiting / 0 Succeeded / 0 Failed
Pod Template:
Labels: app.kubernetes.io/component=exporter
app.kubernetes.io/name=kepler-exporter
sustainable-computing.io/app=kepler
Service Account: kepler-sa
Containers:
kepler-exporter:
Image: quay.io/sustainable_computing_io/kepler:pr733
Port: 9102/TCP
Host Port: 0/TCP
Command:
/bin/sh
-c
Args:
/usr/bin/kepler -v=1 --kernel-source-dir=/usr/share/kepler/kernel_sources
Requests:
cpu: 100m
memory: 400Mi
Liveness: http-get http://:9102/healthz delay=10s timeout=10s period=60s #success=1 #failure=5
Environment:
NODE_IP: (v1:status.hostIP)
Mounts:
/etc/kepler/kepler.config from cfm (ro)
/lib/modules from lib-modules (rw)
/proc from proc (rw)
/sys from tracing (rw)
Volumes:
lib-modules:
Type: HostPath (bare host directory volume)
Path: /lib/modules
HostPathType: Directory
tracing:
Type: HostPath (bare host directory volume)
Path: /sys
HostPathType: Directory
proc:
Type: HostPath (bare host directory volume)
Path: /proc
HostPathType: Directory
cfm:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: kepler-cfm
Optional: false
% kubectl exec -ti -n kepler daemonset/kepler-exporter -- bash -c "curl localhost:9102/metrics" |grep kepler_container_joules |sort -k 2 -g |tail -10
kepler_container_joules_total{command="",container_id="0a5c450bfd02400d13d982d1feb922fc9f5b2a5f5e29508a42d94095b070029c",container_name="kube-apiserver",container_namespace="kube-system",mode="dynamic",pod_name="kube-apiserver-kind-control-plane"} 328.50800000000004
kepler_container_joules_total{command="",container_id="3ce3263a78f5667edd6013d3b43df12045456c5f3339665fc1039462a3d97434",container_name="kindnet-cni",container_namespace="kube-system",mode="dynamic",pod_name="kindnet-wtfng"} 328.50800000000004
kepler_container_joules_total{command="",container_id="6cbcd04f84d38a5d50e705520a1ebdc4c81441a0d94b3a5224bc4e3ae78a4864",container_name="kube-controller-manager",container_namespace="kube-system",mode="dynamic",pod_name="kube-controller-manager-kind-control-plane"} 328.50800000000004
kepler_container_joules_total{command="",container_id="7ab73310fa1535799f5ddc957bc695c6745a28e96780909972eb7ea0229ff16e",container_name="coredns",container_namespace="kube-system",mode="dynamic",pod_name="coredns-5d78c9869d-9qpnk"} 328.50800000000004
kepler_container_joules_total{command="",container_id="b2ea4fdb1034bb545cd4cc687ae43e340f001bd820996bc59aaf68a9b4a52153",container_name="kepler-exporter",container_namespace="kepler",mode="dynamic",pod_name="kepler-exporter-7xz58"} 328.50800000000004
kepler_container_joules_total{command="",container_id="system_processes",container_name="system_processes",container_namespace="system",mode="dynamic",pod_name="system_processes"} 328.50800000000004
kepler_container_joules_total{command="containerd",container_id="e7e940e1a3879022670295a836dff77af3969e4727a9460b396453ded7ac2b5b",container_name="kube-proxy",container_namespace="kube-system",mode="dynamic",pod_name="kube-proxy-86l4x"} 328.53200000000004
kepler_container_joules_total{command="vpnkit-for",container_id="344aa4e5dfff04b08934f603dae9dbed879e6c0dedde895d0e2453e8f001662b",container_name="local-path-provisioner",container_namespace="local-path-storage",mode="dynamic",pod_name="local-path-provisioner-6bc4bddd6b-kkhnv"} 328.579
kepler_container_joules_total{command="containerd",container_id="b820bc8299ef22538019d5e7959531913cfade2c7989643dfdd066f4f1a75bd5",container_name="coredns",container_namespace="kube-system",mode="dynamic",pod_name="coredns-5d78c9869d-qwpvr"} 328.694
kepler_container_joules_total{command="jbd2/vda1-",container_id="1a16001c73016dd6b6f7f313f882df65e7f6791237dd7b457e6d0caff3a378d8",container_name="kube-scheduler",container_namespace="kube-system",mode="dynamic",pod_name="kube-scheduler-kind-control-plane"} 328.98400000000004