rmm-scripts/nvgpuload.sh
2024-03-19 12:21:25 -07:00

70 lines
2.1 KiB
Bash

#!/bin/bash
#Zebra - 3/18/2024
#Uses nvidia-smi to query the GPU load of one or more installed NVIDIA GPUs on linux.
# Check if nvidia-smi command is available
if ! command -v nvidia-smi &> /dev/null; then
echo "NVIDIA GPU driver not found"
exit 0
fi
# Run nvidia-smi command and store the output
output=$(nvidia-smi)
# Check if there is a fan installed. (it messes up the output) Unless "N/A" exits on line 10 of the output,
# Then skip the first occurrence of numbers with percentage values. (AKA The fan speed percentage)
if echo "$output" | awk 'NR==10' | grep -q "N/A"; then
# Extract GPU loads using grep and awk without skipping
gpu_loads=$(echo "$output" | grep -oP '\d+%' | awk '{print substr($0, 1, length($0)-1)}')
else
# Extract GPU loads using grep and awk, skipping the first occurrence
gpu_loads=$(echo "$output" | grep -oP '\d+%' | awk '{print substr($0, 1, length($0)-1)}' | tail -n +2)
fi
# If the command above returns with no results, a GPU must not be installed or detected.
if [ -z "$gpu_loads" ]; then
echo "No GPUs found"
exit 1
fi
# If multiple GPUs exist, we concatenate GPU indices and load into a single line
gpu_info=""
gpu_index=0
for load in $gpu_loads; do
if [ $gpu_index == 0 ]; then
gpu_info+="$load"
((gpu_index++))
else
gpu_info+="gpu$gpu_index: $load | "
((gpu_index++))
fi
done
# Remove trailing "| " from the concatenated string
gpu_info="${gpu_info% | }"
#Re-add the Percent sign
gpu_info="$gpu_info%"
# Output the concatenated GPU info
echo "$gpu_info"
#Build CURL variables - Specific to TacticalRMM
AGENTID=$(cat /etc/tacticalagent | jq -r .agentid)
URL="https://your.RMM.api.here/agents
DATA=$(echo {} | jq -n --arg LOAD "$gpu_info%" '{"custom_fields":[{"field":20,"string_value":$LOAD}]}')
# Make sure to declare an API key in a secure fashion.
curl -s -X PUT -H "Content-Type: application/json" -H "X-API-KEY:$API" $URL --data "$DATA" >/dev/null
# Check GPU load and exit with appropriate exit code
if echo "$gpu_loads" | grep -q -E '100%'; then
exit 3
elif echo "$gpu_loads" | grep -q -E '90%'; then
exit 2
else
exit 0
fi