prom 2/2. Add prometheus output.

(also part 2/2 for the presentPower bugfix)
This commit is contained in:
rasmus 2020-12-23 06:01:21 +02:00
parent 896dced5eb
commit fd7c4d4458
4 changed files with 81 additions and 66 deletions

3
.gitignore vendored
View File

@ -1 +1,2 @@
__pycache__
__pycache__
scraperMain.out

View File

@ -24,6 +24,10 @@ app = Flask(__name__)
@app.route('/', methods=['GET'])
def parse_request(): # If somebody accesses us
data = scraperMain.scraperMain(hostname, enc, sshkeypath) # Gather up, we're going to wait a few minutes on the data.
#with open("prom_servers.out", "a") as f: # DO NOT UNCOMMENT IN PROD
# print(data, file=f)
response_list = prom_servers.prom_servers(PREFIX, data[0], data[1]) # Convert our python lists to prometheus format whatever.
response = '\n'.join(response_list) # Oh you still don't want a list? Fine, newlines!
return response # The pizza is already cold.
return '<pre>' + response + '</pre>' # The pizza is already cold.

View File

@ -94,7 +94,8 @@ def prom_servers(PREFIX, baysInUseCount, servers):
prom_disp_servers = prom_disp_servers + prom_header('Number of errors or warnings encountered during the gathering of data see logs of promServers in blade-ssh-scraper', [PREFIX + 'errors ' + str(errors)], 'counter')
#prom_disp_servers = '\n'.join(prom_disp_servers)
breakpoint()
#breakpoint()
return(prom_disp_servers)
# Test data

View File

@ -12,28 +12,11 @@ import listServers, relativeUsage
#logging.basicConfig()
#logging.getLogger("paramiko").setLevel(logging.DEBUG)
"""
ENV VARIABLES AVAILABLE:
- hostname
- sshkeylocation
- sshuser (optional)
"""
# Creds
hostname = str(os.environ['hostname'])
sshuser = str(os.getenv('sshuser', 'Administrator'))
sshkeylocation = os.getenv('sshkeypath')
paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities
ssh = paramiko.SSHClient() # Alias
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways)
ssh.connect(hostname, username=sshuser, key_filename=sshkeylocation) # Attempt to connect
channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.)
def ssh_runcmd(command): # Some abstraction to run ssh commands.
# Input validation
if not type(command) is str:
raise TypeError("ssh_runcmd: command must be a string, '" + command + "' is not.")
#TODO: valiate channel, though not really
channel.send(command + '\n') # Execute command.
@ -66,62 +49,88 @@ def ssh_runcmd(command): # Some abstraction to run ssh commands.
#cmdout = '\n'.join(cmdout) # DO NOT UNCOMMENT THIS, HERE FOR ONLY REFERENCE TO JOIN THE LINES BACK.
return(cmdout) # Return list of output stuff.
### MAIN ###
logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this.
def scraperMain(hostname, enc, sshkeypath):
global channel # needed for ssh_runcmd
# Input validation
if not type(hostname) is str:
raise TypeError("scraperMain: hostname must be a string, '" + str(hostname) + "' is not.")
if not type(enc) is str:
raise TypeError("scraperMain: enc must be a string, '" + str(enc) + "' is not.")
if not type(sshkeypath) is str:
raise TypeError("scraperMain: sshkeypath must be a string, '" + str(sshkeypath) + "' is not.")
## Get list of blades with some added info ##
serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r']
servers = listServers.listServers(serverName)
# ssh_init() # it takes more lines to put it in a function than to dump it here.
sshuser = 'Administrator' # Admin is hardcoded in to the enc.
baysInUse = [x[0] for x in servers] # List of blades in use.
baysInUseCount = len(baysInUse) # How many bays in use.
logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off.
paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities
ssh = paramiko.SSHClient() # Alias
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways)
ssh.connect(hostname, username=sshuser, key_filename=sshkeypath) # Attempt to connect
channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.)
## Get blade data ##
for n in range(baysInUseCount):
bay = servers[n][0] # We want the bay, not how many times we have looped over.
logging.info("Accessing server " + str(bay))
logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host.
powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data
presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using.
if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe.
presentPower = 0
elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something…
warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower)
presentPower = 0
servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list.
logging.info("UsageRawDC: " + presentPower)
## MAIN ##
logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this. Reading buffer before each command isn't a good idea and doesn't work either, trust me.
logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface.
## Get list of blades with some added info ##
serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r']
servers = listServers.listServers(serverName)
baysInUse = [x[0] for x in servers] # List of blades in use.
baysInUseCount = len(baysInUse) # How many bays in use.
logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off.
## Get blade data ##
for n in range(baysInUseCount):
bay = servers[n][0] # We want the bay, not how many times we have looped over.
logging.info("Accessing server " + str(bay))
logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host.
powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data
presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using.
if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe.
presentPower = 0
elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something…
warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower)
presentPower = 0
servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list.
logging.info("UsageRawDC: " + str(presentPower))
logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface.
#breakpoint()
#print(servers)
#breakpoint()
#print(servers)
#breakpoint()
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC]
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC]
## Get enc's _AC_ usage. ##
encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] ))
logging.info("Server total usage AC: " + str(encPowerUsageAC))
## Get enc's _AC_ usage. ##
encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] ))
logging.info("Server total usage AC: " + str(encPowerUsageAC))
## End sesion with enc. ##
channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here.
ssh.close()
## End sesion with enc. ##
channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here.
ssh.close()
## Calculating blade usage by percentage. ##
# Keep in mind the querying of the data took a while, a minute or so.
serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers)
serversAverageAC = serversAverageACAndServers[0]
servers = serversAverageACAndServers[1]
## Calculating blade usage by percentage. ##
# Keep in mind the querying of the data took a while, a minute or so.
serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers)
#serversAverageAC = serversAverageACAndServers[0] # I guess you don't want any stonks by me, but prom instead…
servers = serversAverageACAndServers[1]
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]
for n in range(baysInUseCount):
servers[n].append( enc ) # Add what enc was used.
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]
return(baysInUseCount, servers) # we could return more, but nobody wants our data :/
print("Enclosure usage AC: " + str(encPowerUsageAC))
print("Average blade relative usage AC: " + str(serversAverageAC))
print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]')
print(servers)
#print("Enclosure usage AC: " + str(encPowerUsageAC))
#print("Average blade relative usage AC: " + str(serversAverageAC))
#print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]')
#print(servers)