prom 2/2. Add prometheus output.
(also part 2/2 for the presentPower bugfix)
This commit is contained in:
parent
896dced5eb
commit
fd7c4d4458
|
@ -1 +1,2 @@
|
||||||
__pycache__
|
__pycache__
|
||||||
|
scraperMain.out
|
6
prom.py
6
prom.py
|
@ -24,6 +24,10 @@ app = Flask(__name__)
|
||||||
@app.route('/', methods=['GET'])
|
@app.route('/', methods=['GET'])
|
||||||
def parse_request(): # If somebody accesses us
|
def parse_request(): # If somebody accesses us
|
||||||
data = scraperMain.scraperMain(hostname, enc, sshkeypath) # Gather up, we're going to wait a few minutes on the data.
|
data = scraperMain.scraperMain(hostname, enc, sshkeypath) # Gather up, we're going to wait a few minutes on the data.
|
||||||
|
|
||||||
|
#with open("prom_servers.out", "a") as f: # DO NOT UNCOMMENT IN PROD
|
||||||
|
# print(data, file=f)
|
||||||
|
|
||||||
response_list = prom_servers.prom_servers(PREFIX, data[0], data[1]) # Convert our python lists to prometheus format whatever.
|
response_list = prom_servers.prom_servers(PREFIX, data[0], data[1]) # Convert our python lists to prometheus format whatever.
|
||||||
response = '\n'.join(response_list) # Oh you still don't want a list? Fine, newlines!
|
response = '\n'.join(response_list) # Oh you still don't want a list? Fine, newlines!
|
||||||
return response # The pizza is already cold.
|
return '<pre>' + response + '</pre>' # The pizza is already cold.
|
|
@ -94,7 +94,8 @@ def prom_servers(PREFIX, baysInUseCount, servers):
|
||||||
prom_disp_servers = prom_disp_servers + prom_header('Number of errors or warnings encountered during the gathering of data see logs of promServers in blade-ssh-scraper', [PREFIX + 'errors ' + str(errors)], 'counter')
|
prom_disp_servers = prom_disp_servers + prom_header('Number of errors or warnings encountered during the gathering of data see logs of promServers in blade-ssh-scraper', [PREFIX + 'errors ' + str(errors)], 'counter')
|
||||||
|
|
||||||
#prom_disp_servers = '\n'.join(prom_disp_servers)
|
#prom_disp_servers = '\n'.join(prom_disp_servers)
|
||||||
breakpoint()
|
#breakpoint()
|
||||||
|
|
||||||
return(prom_disp_servers)
|
return(prom_disp_servers)
|
||||||
|
|
||||||
# Test data
|
# Test data
|
||||||
|
|
133
scraperMain.py
133
scraperMain.py
|
@ -12,28 +12,11 @@ import listServers, relativeUsage
|
||||||
#logging.basicConfig()
|
#logging.basicConfig()
|
||||||
#logging.getLogger("paramiko").setLevel(logging.DEBUG)
|
#logging.getLogger("paramiko").setLevel(logging.DEBUG)
|
||||||
|
|
||||||
"""
|
|
||||||
ENV VARIABLES AVAILABLE:
|
|
||||||
- hostname
|
|
||||||
- sshkeylocation
|
|
||||||
- sshuser (optional)
|
|
||||||
"""
|
|
||||||
# Creds
|
|
||||||
hostname = str(os.environ['hostname'])
|
|
||||||
sshuser = str(os.getenv('sshuser', 'Administrator'))
|
|
||||||
sshkeylocation = os.getenv('sshkeypath')
|
|
||||||
|
|
||||||
paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities
|
|
||||||
ssh = paramiko.SSHClient() # Alias
|
|
||||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways)
|
|
||||||
|
|
||||||
ssh.connect(hostname, username=sshuser, key_filename=sshkeylocation) # Attempt to connect
|
|
||||||
channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.)
|
|
||||||
|
|
||||||
def ssh_runcmd(command): # Some abstraction to run ssh commands.
|
def ssh_runcmd(command): # Some abstraction to run ssh commands.
|
||||||
# Input validation
|
# Input validation
|
||||||
if not type(command) is str:
|
if not type(command) is str:
|
||||||
raise TypeError("ssh_runcmd: command must be a string, '" + command + "' is not.")
|
raise TypeError("ssh_runcmd: command must be a string, '" + command + "' is not.")
|
||||||
|
#TODO: valiate channel, though not really
|
||||||
|
|
||||||
channel.send(command + '\n') # Execute command.
|
channel.send(command + '\n') # Execute command.
|
||||||
|
|
||||||
|
@ -66,62 +49,88 @@ def ssh_runcmd(command): # Some abstraction to run ssh commands.
|
||||||
#cmdout = '\n'.join(cmdout) # DO NOT UNCOMMENT THIS, HERE FOR ONLY REFERENCE TO JOIN THE LINES BACK.
|
#cmdout = '\n'.join(cmdout) # DO NOT UNCOMMENT THIS, HERE FOR ONLY REFERENCE TO JOIN THE LINES BACK.
|
||||||
return(cmdout) # Return list of output stuff.
|
return(cmdout) # Return list of output stuff.
|
||||||
|
|
||||||
### MAIN ###
|
def scraperMain(hostname, enc, sshkeypath):
|
||||||
|
global channel # needed for ssh_runcmd
|
||||||
logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this.
|
# Input validation
|
||||||
|
if not type(hostname) is str:
|
||||||
|
raise TypeError("scraperMain: hostname must be a string, '" + str(hostname) + "' is not.")
|
||||||
|
if not type(enc) is str:
|
||||||
|
raise TypeError("scraperMain: enc must be a string, '" + str(enc) + "' is not.")
|
||||||
|
if not type(sshkeypath) is str:
|
||||||
|
raise TypeError("scraperMain: sshkeypath must be a string, '" + str(sshkeypath) + "' is not.")
|
||||||
|
|
||||||
|
|
||||||
## Get list of blades with some added info ##
|
# ssh_init() # it takes more lines to put it in a function than to dump it here.
|
||||||
serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r']
|
sshuser = 'Administrator' # Admin is hardcoded in to the enc.
|
||||||
servers = listServers.listServers(serverName)
|
|
||||||
|
|
||||||
baysInUse = [x[0] for x in servers] # List of blades in use.
|
paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities
|
||||||
baysInUseCount = len(baysInUse) # How many bays in use.
|
ssh = paramiko.SSHClient() # Alias
|
||||||
logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off.
|
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways)
|
||||||
|
|
||||||
|
ssh.connect(hostname, username=sshuser, key_filename=sshkeypath) # Attempt to connect
|
||||||
|
channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.)
|
||||||
|
|
||||||
|
|
||||||
## Get blade data ##
|
## MAIN ##
|
||||||
for n in range(baysInUseCount):
|
logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this. Reading buffer before each command isn't a good idea and doesn't work either, trust me.
|
||||||
bay = servers[n][0] # We want the bay, not how many times we have looped over.
|
|
||||||
logging.info("Accessing server " + str(bay))
|
|
||||||
logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host.
|
|
||||||
powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data
|
|
||||||
|
|
||||||
presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using.
|
|
||||||
if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe.
|
|
||||||
presentPower = 0
|
|
||||||
elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something…
|
|
||||||
warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower)
|
|
||||||
presentPower = 0
|
|
||||||
servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list.
|
|
||||||
logging.info("UsageRawDC: " + presentPower)
|
|
||||||
|
|
||||||
logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface.
|
## Get list of blades with some added info ##
|
||||||
|
serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r']
|
||||||
|
servers = listServers.listServers(serverName)
|
||||||
|
|
||||||
|
baysInUse = [x[0] for x in servers] # List of blades in use.
|
||||||
|
baysInUseCount = len(baysInUse) # How many bays in use.
|
||||||
|
logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off.
|
||||||
|
|
||||||
|
|
||||||
|
## Get blade data ##
|
||||||
|
for n in range(baysInUseCount):
|
||||||
|
bay = servers[n][0] # We want the bay, not how many times we have looped over.
|
||||||
|
logging.info("Accessing server " + str(bay))
|
||||||
|
logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host.
|
||||||
|
powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data
|
||||||
|
|
||||||
|
presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using.
|
||||||
|
if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe.
|
||||||
|
presentPower = 0
|
||||||
|
elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something…
|
||||||
|
warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower)
|
||||||
|
presentPower = 0
|
||||||
|
servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list.
|
||||||
|
logging.info("UsageRawDC: " + str(presentPower))
|
||||||
|
|
||||||
|
logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface.
|
||||||
|
#breakpoint()
|
||||||
|
|
||||||
|
#print(servers)
|
||||||
#breakpoint()
|
#breakpoint()
|
||||||
|
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC]
|
||||||
#print(servers)
|
|
||||||
#breakpoint()
|
|
||||||
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC]
|
|
||||||
|
|
||||||
|
|
||||||
## Get enc's _AC_ usage. ##
|
## Get enc's _AC_ usage. ##
|
||||||
encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] ))
|
encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] ))
|
||||||
logging.info("Server total usage AC: " + str(encPowerUsageAC))
|
logging.info("Server total usage AC: " + str(encPowerUsageAC))
|
||||||
|
|
||||||
## End sesion with enc. ##
|
## End sesion with enc. ##
|
||||||
channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here.
|
channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here.
|
||||||
ssh.close()
|
ssh.close()
|
||||||
|
|
||||||
|
|
||||||
## Calculating blade usage by percentage. ##
|
## Calculating blade usage by percentage. ##
|
||||||
# Keep in mind the querying of the data took a while, a minute or so.
|
# Keep in mind the querying of the data took a while, a minute or so.
|
||||||
serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers)
|
serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers)
|
||||||
serversAverageAC = serversAverageACAndServers[0]
|
#serversAverageAC = serversAverageACAndServers[0] # I guess you don't want any stonks by me, but prom instead…
|
||||||
servers = serversAverageACAndServers[1]
|
servers = serversAverageACAndServers[1]
|
||||||
|
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]
|
||||||
|
|
||||||
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]
|
for n in range(baysInUseCount):
|
||||||
|
servers[n].append( enc ) # Add what enc was used.
|
||||||
|
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]
|
||||||
|
|
||||||
print("Enclosure usage AC: " + str(encPowerUsageAC))
|
return(baysInUseCount, servers) # we could return more, but nobody wants our data :/
|
||||||
print("Average blade relative usage AC: " + str(serversAverageAC))
|
|
||||||
print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]')
|
#print("Enclosure usage AC: " + str(encPowerUsageAC))
|
||||||
print(servers)
|
#print("Average blade relative usage AC: " + str(serversAverageAC))
|
||||||
|
#print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]')
|
||||||
|
#print(servers)
|
||||||
|
|
Loading…
Reference in New Issue