From fd7c4d4458c77f735162931ae218eb282dbdc418 Mon Sep 17 00:00:00 2001 From: rasmus Date: Wed, 23 Dec 2020 06:01:21 +0200 Subject: [PATCH] prom 2/2. Add prometheus output. (also part 2/2 for the presentPower bugfix) --- .gitignore | 3 +- prom.py | 6 ++- prom_servers.py | 3 +- scraperMain.py | 135 ++++++++++++++++++++++++++---------------------- 4 files changed, 81 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index ed8ebf5..7dea42c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -__pycache__ \ No newline at end of file +__pycache__ +scraperMain.out \ No newline at end of file diff --git a/prom.py b/prom.py index 5886843..7189ff9 100644 --- a/prom.py +++ b/prom.py @@ -24,6 +24,10 @@ app = Flask(__name__) @app.route('/', methods=['GET']) def parse_request(): # If somebody accesses us data = scraperMain.scraperMain(hostname, enc, sshkeypath) # Gather up, we're going to wait a few minutes on the data. + + #with open("prom_servers.out", "a") as f: # DO NOT UNCOMMENT IN PROD + # print(data, file=f) + response_list = prom_servers.prom_servers(PREFIX, data[0], data[1]) # Convert our python lists to prometheus format whatever. response = '\n'.join(response_list) # Oh you still don't want a list? Fine, newlines! - return response # The pizza is already cold. \ No newline at end of file + return '
' + response + '
' # The pizza is already cold. \ No newline at end of file diff --git a/prom_servers.py b/prom_servers.py index 2360609..55a53d0 100644 --- a/prom_servers.py +++ b/prom_servers.py @@ -94,7 +94,8 @@ def prom_servers(PREFIX, baysInUseCount, servers): prom_disp_servers = prom_disp_servers + prom_header('Number of errors or warnings encountered during the gathering of data see logs of promServers in blade-ssh-scraper', [PREFIX + 'errors ' + str(errors)], 'counter') #prom_disp_servers = '\n'.join(prom_disp_servers) - breakpoint() + #breakpoint() + return(prom_disp_servers) # Test data diff --git a/scraperMain.py b/scraperMain.py index 0439bf0..5e67306 100644 --- a/scraperMain.py +++ b/scraperMain.py @@ -12,28 +12,11 @@ import listServers, relativeUsage #logging.basicConfig() #logging.getLogger("paramiko").setLevel(logging.DEBUG) -""" -ENV VARIABLES AVAILABLE: - - hostname - - sshkeylocation - - sshuser (optional) -""" -# Creds -hostname = str(os.environ['hostname']) -sshuser = str(os.getenv('sshuser', 'Administrator')) -sshkeylocation = os.getenv('sshkeypath') - -paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities -ssh = paramiko.SSHClient() # Alias -ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways) - -ssh.connect(hostname, username=sshuser, key_filename=sshkeylocation) # Attempt to connect -channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.) - def ssh_runcmd(command): # Some abstraction to run ssh commands. # Input validation if not type(command) is str: raise TypeError("ssh_runcmd: command must be a string, '" + command + "' is not.") + #TODO: valiate channel, though not really channel.send(command + '\n') # Execute command. @@ -66,62 +49,88 @@ def ssh_runcmd(command): # Some abstraction to run ssh commands. #cmdout = '\n'.join(cmdout) # DO NOT UNCOMMENT THIS, HERE FOR ONLY REFERENCE TO JOIN THE LINES BACK. return(cmdout) # Return list of output stuff. -### MAIN ### - -logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this. +def scraperMain(hostname, enc, sshkeypath): + global channel # needed for ssh_runcmd + # Input validation + if not type(hostname) is str: + raise TypeError("scraperMain: hostname must be a string, '" + str(hostname) + "' is not.") + if not type(enc) is str: + raise TypeError("scraperMain: enc must be a string, '" + str(enc) + "' is not.") + if not type(sshkeypath) is str: + raise TypeError("scraperMain: sshkeypath must be a string, '" + str(sshkeypath) + "' is not.") -## Get list of blades with some added info ## -serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r'] -servers = listServers.listServers(serverName) + # ssh_init() # it takes more lines to put it in a function than to dump it here. + sshuser = 'Administrator' # Admin is hardcoded in to the enc. -baysInUse = [x[0] for x in servers] # List of blades in use. -baysInUseCount = len(baysInUse) # How many bays in use. -logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off. + paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities + ssh = paramiko.SSHClient() # Alias + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways) + + ssh.connect(hostname, username=sshuser, key_filename=sshkeypath) # Attempt to connect + channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.) -## Get blade data ## -for n in range(baysInUseCount): - bay = servers[n][0] # We want the bay, not how many times we have looped over. - logging.info("Accessing server " + str(bay)) - logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host. - powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data - - presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using. - if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe. - presentPower = 0 - elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something… - warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower) - presentPower = 0 - servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list. - logging.info("UsageRawDC: " + presentPower) + ## MAIN ## + logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this. Reading buffer before each command isn't a good idea and doesn't work either, trust me. - logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface. + + ## Get list of blades with some added info ## + serverName = ssh_runcmd('show server names') # Testing data: serverName = ['Bay Server Name Serial Number Status Power UID Partner\r', '--- ------------------------------------------------- --------------- -------- ------- --- -------\r', ' 1 tty-lab-1 OK On Off \r', ' 2 tty-lab-2 CZ320263P9 OK On Off \r', ' 3 tty-lab-3 CZJ14410KP Failed On Off \r', ' 4 kspve1 CZJ14410KK OK On Off \r', ' 5 kspve2-2 OK On Off \r', ' 6 kspve3 OK On Off \r', ' 7 plaes-blade OK On Off \r', ' 8 Ringly-01 CZ3402Y48C OK On Off \r', ' 9 toomas-lepik CZ3217FNYE OK On Off \r', ' 10 toomas-lepik2 CZ3217FFSS OK On Off \r', ' 11 [Absent] \r', ' 12 erki-naumanis OK On Off \r', ' 13 [Absent] \r', ' 14 [Absent] \r', ' 15 [Absent] \r', ' 16 [Absent] \r'] + servers = listServers.listServers(serverName) + + baysInUse = [x[0] for x in servers] # List of blades in use. + baysInUseCount = len(baysInUse) # How many bays in use. + logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off. + + + ## Get blade data ## + for n in range(baysInUseCount): + bay = servers[n][0] # We want the bay, not how many times we have looped over. + logging.info("Accessing server " + str(bay)) + logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host. + powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data + + presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using. + if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe. + presentPower = 0 + elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something… + warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower) + presentPower = 0 + servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list. + logging.info("UsageRawDC: " + str(presentPower)) + + logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface. + #breakpoint() + + #print(servers) #breakpoint() - -#print(servers) -#breakpoint() -# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC] + # Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC] -## Get enc's _AC_ usage. ## -encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] )) -logging.info("Server total usage AC: " + str(encPowerUsageAC)) + ## Get enc's _AC_ usage. ## + encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] )) + logging.info("Server total usage AC: " + str(encPowerUsageAC)) -## End sesion with enc. ## -channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here. -ssh.close() + ## End sesion with enc. ## + channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here. + ssh.close() -## Calculating blade usage by percentage. ## -# Keep in mind the querying of the data took a while, a minute or so. -serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers) -serversAverageAC = serversAverageACAndServers[0] -servers = serversAverageACAndServers[1] + ## Calculating blade usage by percentage. ## + # Keep in mind the querying of the data took a while, a minute or so. + serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers) + #serversAverageAC = serversAverageACAndServers[0] # I guess you don't want any stonks by me, but prom instead… + servers = serversAverageACAndServers[1] + # Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC] -# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC] + for n in range(baysInUseCount): + servers[n].append( enc ) # Add what enc was used. + # Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"] + + return(baysInUseCount, servers) # we could return more, but nobody wants our data :/ -print("Enclosure usage AC: " + str(encPowerUsageAC)) -print("Average blade relative usage AC: " + str(serversAverageAC)) -print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]') -print(servers) + #print("Enclosure usage AC: " + str(encPowerUsageAC)) + #print("Average blade relative usage AC: " + str(serversAverageAC)) + #print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]') + #print(servers)