blades-ssh-scraper/prom_servers.py

121 lines
6.6 KiB
Python

from datetime import datetime
from warnings import warn
def prom_header(desc, item, promtype):
# Input validation
if not type(desc) is str:
raise TypeError("promify_item: desc must be a string, '" + str(desc) + "' is not.")
if not type(item) is list:
raise TypeError("promify_item: item must be a list, '" + str(item) + "' is not.")
if not type(promtype) is str:
raise TypeError("promify_item: promtype must be a string, '" + str(promtype) + "' is not.")
name = item[0].split('{')[0] # something like bladetest_blade_bladeonly_dc, could be given as an argument, but human changing this code might forget to change the argument inputs
item.insert(0, '# HELP ' + name + ' ' + desc) # insert 2 header lines (in the same order as seen here) for the item
if promtype == "gauge":
item.insert(1, '# TYPE ' + name + ' gauge')
elif promtype == "counter":
item.insert(1, '# TYPE ' + name + ' counter')
else:
raise ValueError("promify_item: " + promtype + " promtype is either not implemented as a type, or invalid.")
return(item)
def prom_servers(git_hash, PREFIX, baysInUseCount, servers):
# Input validation
if not type(git_hash) is str:
raise TypeError("git_hash: desc must be a string, '" + str(git_hash) + "' is not.")
if not type(baysInUseCount) is int:
raise TypeError("parseServers: baysInUseCount must be an integer, '" + str(baysInUseCount) + "' is not.")
if not type(servers) is list:
raise TypeError("parseServers: servers must be a list, '" + str(servers) + "' is not.")
promRelAC = [] # Init all, is there a way to condense this to single line, and not repeat the '= []'?
promBldDC = []
promHealth = []
promUID = []
promPower = []
errorsHealth = 0
errors = 0
for n in range(baysInUseCount):
if servers[n][2] == '': # If no serial
warn("no serial on blade '" + servers[n][1] + "' in enc " + servers[n][8] + " bay " + str(servers[n][0]) + ".")
errors += 1
labelsPart = '{' + 'enc="' + servers[n][8] + '",' + 'bay="' + str(servers[n][0]) + '",' 'name="' + servers[n][1] + '",' + 'blade_serial="' + servers[n][2] + '"' + '} '
promRelAC.append( PREFIX + 'blade_relative_ac_watts' + labelsPart + str(float(servers[n][7])) )
promBldDC.append( PREFIX + 'blade_bladeonly_dc_watts' + labelsPart + str(float(servers[n][6])) )
if servers[n][3] == 'OK':
health = 1
elif servers[n][3] == 'Failed':
health = 0
errorsHealth += 1
errors += 1
else:
health = 'NaN'
warn("unknown health on blade '" + servers[n][1] + "' in enc " + servers[n][8] + " bay " + str(servers[n][0]) + "." )
errors += 1
promHealth.append( PREFIX + 'blade_health_state' + labelsPart + str(float(health)) )
if servers[n][5] == 'On':
uid = 1
elif servers[n][5] == 'Off':
uid = 0
elif servers[n][5] == '*':
uid = 2 # Blade UID is blinking and a critical operation is being performed on the blade (firmware update in progress or remote console in use).
else:
uid = 'NaN'
warn("unknown UID status for blade '" + servers[n][1] + "' in enc " + servers[n][8] + " bay " + str(servers[n][0]) + "." )
errors += 1
promUID.append( PREFIX + 'blade_uid_state' + labelsPart + str(float(uid)) )
if servers[n][4] == 'On':
power = 1
elif servers[n][4] == 'Off':
power = 0
else:
power = 'NaN'
warn("unknown power state for blade '" + servers[n][1] + "' in enc " + servers[n][8] + " bay " + str(servers[n][0]) + "." )
errors += 1
promPower.append( PREFIX + 'blade_power_state' + labelsPart + str(float(power)) )
#breakpoint()
# Add differnet types of things together.
prom_disp_servers = []
prom_disp_servers = prom_disp_servers + prom_header('Relative usage of whole enc AC the blade is consuming in watts', promRelAC, 'gauge')
prom_disp_servers = prom_disp_servers + prom_header('Actual DC in watts what the blade uses directly', promBldDC, 'gauge')
prom_disp_servers = prom_disp_servers + prom_header('Blade health 1 for ok 0 for nok NaN for err', promHealth, 'gauge')
prom_disp_servers = prom_disp_servers + prom_header('Blade UID blinkyboy 1 for blinky blinky 0 for no blinky blinky 2 if critical fw update or remote console in progress NaN for err', promUID, 'gauge')
prom_disp_servers = prom_disp_servers + prom_header('Blade power 1 for on and everything in between 0 for off NaN for err', promPower, 'gauge')
errorsHealth_prom = [PREFIX + 'errors_health_total{} ' + str(float(errors))]
prom_disp_servers = prom_disp_servers + prom_header('Number of blades with bad health.', errorsHealth_prom, 'counter')
errors_prom = [PREFIX + 'errors_total{} ' + str(float(errors))]
prom_disp_servers = prom_disp_servers + prom_header('Number of errors or warnings encountered during the gathering of data see logs of promServers in blade-ssh-scraper', errors_prom, 'counter')
prom_git_hash = [PREFIX + 'info{git_hash="' + git_hash + '"} 1.0']
prom_disp_servers = prom_disp_servers + prom_header('Git hash of HEAD on current branch, version', prom_git_hash, 'gauge')
#prom_disp_servers = '\n'.join(prom_disp_servers)
#breakpoint()
return(prom_disp_servers)
# Test data
#git_hash = 'fakegithash'
#PREFIX = 'bladetest_'
#baysInUseCount = 11
# 0~ 1~ 2~ 3~ 4~ 5~ 6~ 7~ 8~
# [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "encname"]
#servers = [[1, 'foo-lab-1', '', 'OK', 'On', 'Off', 142, 222, "k-space-blade-02"], [2, 'foo-lab-2', 'CZ302243P9', 'OK', 'On', 'Off', 87, 136, "k-space-blade-02"], [3, 'foo-lab-3', 'CZJI441OKP', 'Failed', 'On', 'Off', 127, 198, "k-space-blade-02"], [4, 'kspve1', 'CZJ18450FK', 'OK', 'On', 'Off', 86, 134, "k-space-blade-02"], [5, 'kspve2-2', '', 'OK', 'On', 'Off', 71, 111, "k-space-blade-02"], [6, 'kspve3', '', 'OK', 'On', 'Off', 80, 125, "k-space-blade-02"], [7, 'foo-blade', '', 'OK', 'On', 'Off', 81, 127, "k-space-blade-02"], [8, 'Bar-01', 'CZ241274CC', 'OK', 'On', 'Off', 126, 197, "k-space-blade-02"], [9, 'baz-sar', 'CZ3217FNYE', 'OK', 'On', 'Off', 129, 202, "k-space-blade-02"], [10, 'baz-sar2', 'CZ3217FFSS', 'OK', 'On', 'Off', 97, 152, "k-space-blade-02"], [12, 'bee-bar', '', 'OK', 'On', 'Off', 86, 134, "k-space-blade-02"]]
#print(prom_servers(git_hash, PREFIX, baysInUseCount, servers))