blades-ssh-scraper/scraperMain.py

137 lines
7.4 KiB
Python

# External deps
import paramiko # for ssh
import logging # for possible debugging
from time import sleep # for sleep()
from warnings import warn # you guessed it
import os # for CLI arguments
# Local deps
import listServers, relativeUsage
# Debugging stuffs
#logging.basicConfig()
#logging.getLogger("paramiko").setLevel(logging.DEBUG)
def ssh_runcmd(command): # Some abstraction to run ssh commands.
# Input validation
if not type(command) is str:
raise TypeError("ssh_runcmd: command must be a string, '" + command + "' is not.")
#TODO: valiate channel, though not really
channel.send(command + '\n') # Execute command.
cmdout = "" # Init
lockedoutCounter = 0 # Init
while channel.recv_ready() or not cmdout[-2:] == '> ': # Fetch new output, if there is any or look for '> ' (prompt ready). The latter is useful when connecting to blades.
"""
SSH to a blade from the enc takes about 6-7s. If we are at 15s, something is probably wrong. connect servers fail due to:
a) There are too many users on SSH (ungraceful sessions?)
b) The iLO is unavailable (has been just reset)
c) The blade has been just plugged in (not initialized)
d) SSH access has been disabled
e) other
All but e) are known to drop you back in to the enc's shell, in what case we'll be in a problematic spot, since we're going to exit out of the enc ssh, not the blade's.
#TODO: somehow magically detect if we've successfully connected to the blade. There's no standard way to determine.
The probable way would be to look at the prompt. Blade's seem to all be '</>hpiLO-> '.
I would fetch the prompt on connection to the enc, then compare the current prompt to determine if we're connected to a blade.
"""
lockedoutCounter += 1
if lockedoutCounter > 30:
raise EOFError("Waited for a prompt for 15s. Something is probably wrong. Aborting.")
else:
logging.info("No prompt, waiting for more data.")
sleep(0.5)
cmdout += channel.recv(65536).decode("ascii") # Get output, capped to 64KiB, format ascii. | Must get, otherwise next command will get this command's output.
#breakpoint()
cmdout = cmdout.split('\n')[2:-3] # Split to a list, per newlines. Remove first 2, and last 3 lines.
#cmdout = '\n'.join(cmdout) # DO NOT UNCOMMENT THIS, HERE FOR ONLY REFERENCE TO JOIN THE LINES BACK.
return(cmdout) # Return list of output stuff.
def scraperMain(hostname, encname, sshkeypath):
global channel # needed for ssh_runcmd
# Input validation
if not type(hostname) is str:
raise TypeError("scraperMain: hostname must be a string, '" + str(hostname) + "' is not.")
if not type(encname) is str:
raise TypeError("scraperMain: enc must be a string, '" + str(encname) + "' is not.")
if not type(sshkeypath) is str:
raise TypeError("scraperMain: sshkeypath must be a string, '" + str(sshkeypath) + "' is not.")
# ssh_init() # it takes more lines to put it in a function than to dump it here.
sshuser = 'Administrator' # Admin is hardcoded in to the enc.
paramiko.transport.Transport._preferred_keys += ('ssh-dss',) # Allow the insecurities
ssh = paramiko.SSHClient() # Alias
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Ignore host key REMOVE ME (though it doesn't kind of matter anyways)
ssh.connect(hostname, username=sshuser, key_filename=sshkeypath) # Attempt to connect
channel = ssh.invoke_shell() # Get a new shell(, and keep it open, since we need to exec multiple commands depending on the last command being executed successfully.)
## MAIN ##
logging.debug(ssh_runcmd('show date')) # Get rid of motd, init for next cmds. This is better than indefinitely reading buffer before any command as to counter this. Reading buffer before each command isn't a good idea and doesn't work either, trust me.
## Get list of blades with some added info ##
serverName = ssh_runcmd('show server names') # get data within some format
servers = listServers.listServers(serverName)
baysInUse = [x[0] for x in servers] # List of blades in use.
baysInUseCount = len(baysInUse) # How many bays in use.
logging.info("There are " + str(baysInUseCount+1) + "servers presenet.") # Further optimizations could be made by not connecting to servers, what are turned off.
## Get blade data ##
for n in range(baysInUseCount):
bay = servers[n][0] # We want the bay, not how many times we have looped over.
logging.info("Accessing server " + str(bay))
logging.debug( ssh_runcmd('connect server ' + str(bay)) ) # Use the enc as a jump host.
powerInfoTmp = ssh_runcmd('show system1/oemhp_power1') # Get the data
presentPower = [i for i in powerInfoTmp if i.startswith(' oemhp_PresentPower=')][0][23:-7] # Get the line with PresentPower, then remove first 23, and last 7 chars to end up with the Watts DC the blade is directly using.
if presentPower == 'Not Ava': # When the server is powered off, it's not zero (LIKE LITERALLY EVERYWHERE ELSE). It is NoT aVaILAbLe. 'Not Ava' because the previous command takes last 7 chars (normally 'Watts') off of it. Ah my fault for not checking all possible input from hpe.
presentPower = 0
elif not presentPower.isdigit(): # Just in case some other anomalities come up in the future, like negative power draw or something…
warn('presentPower for bay ' + bay + 'was corrected to 0 from it\'s abnormal state: ' + presentPower)
presentPower = 0
servers[n].append(int(presentPower)) # And push it to our miniDB of the servers list.
logging.info("UsageRawDC: " + str(presentPower))
logging.debug(ssh_runcmd('exit')) # Exit the blade's iLO, return to the enc's iLO SSH interface.
#breakpoint()
#print(servers)
#breakpoint()
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC]
## Get enc's _AC_ usage. ##
encPowerUsageAC = int(str.strip( [i for i in ssh_runcmd('show power') if i.startswith('Present Power:')][0][14:-9] ))
logging.info("Server total usage AC: " + str(encPowerUsageAC))
## End sesion with enc. ##
channel.send('exit' + '\n') # Using lower level cmd since we're not going to get a prompt back here.
ssh.close()
## Calculating blade usage by percentage. ##
# Keep in mind the querying of the data took a while, a minute or so.
serversAverageACAndServers = relativeUsage.relativeUsage(encPowerUsageAC, baysInUseCount, servers)
#serversAverageAC = serversAverageACAndServers[0] # I guess you don't want any stonks by me, but prom instead…
servers = serversAverageACAndServers[1]
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC]
for n in range(baysInUseCount):
servers[n].append( encname ) # Add what enc was used.
# Fields: [BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]
return(baysInUseCount, servers) # we could return more, but nobody wants our data :/
#print("Enclosure usage AC: " + str(encPowerUsageAC))
#print("Average blade relative usage AC: " + str(serversAverageAC))
#print('[BayNumber "Server Name", "Serial Number", "Status", "Power", "UID Partner", presentPowerDirectDC, relativeUsageAC, "enc"]')
#print(servers)