suser

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Get the SLURM cluster usage per user

author: Stephen Kelly, NYU Langone Medical Center
https://github.com/stevekm
"""
import subprocess as sp
from collections import defaultdict, namedtuple
import re
import time
import datetime
import csv
import sys
import argparse

SLURM_command = ('squeue', '-O', 'username,jobid,partition,state,minmemory,mincpus,timeused,timelimit')
# USER JOBID PARTITION STATE MIN_MEMORY MIN_CPUS TIME TIME_LIMIT

class Squeue(object):
    """
    View information about jobs located in the Slurm scheduling queue.

    https://slurm.schedmd.com/squeue.html

    Examples
    ---------

    sq = Squeue()
    sq.get()
    """
    def __init__(self, command = SLURM_command, debug = False):
        self.command = command
        if not debug:
            self.update()

    def update(self):
        """
        Updates the attributes of the object
        """
        returncode, entries = self.get_squeue()
        self.returncode = returncode
        self.entries = entries

    def get_squeue(self):
        """
        Get the 'squeue' HPC cluster usage information

        Returns
        -------
        (int, list)
            integer error code from the 'squeue' command
            a list of dicts representing the 'squeue' values; the case of an error, returns an empty list
        """
        # system command to run
        process = sp.Popen(self.command,
                            stdout = sp.PIPE,
                            stderr = sp.PIPE,
                            shell = False,
                            universal_newlines = True)
        # run the command, capture stdout and stderr
        proc_stdout, proc_stderr = process.communicate()
        # check the exit status
        if process.returncode == 0:
            # parse the stdout table
            entries = [ entry for entry in self.parse_SLURM_table(stdout = proc_stdout) ]
        else:
            entries = []
        return(process.returncode, entries)

    def parse_SLURM_table(self, stdout):
        """
        Convert the table formated output of SLURM 'sinfo -o '%all', 'squeue -o '%all', etc., commands into a list of dicts

        Parameters
        ----------
        stdout: str
            the stdout of a SLURM sinfo or squeue command

        Returns
        -------
        dict
            yields a dict of entries from each valid line in the stdout
        """
        # split all the stdout lines
        lines = stdout.split('\n')
        # get the headers from the first line
        header_line = lines.pop(0)
        # split the headers apart
        header_cols = header_line.split()
        header_cols = [ x.strip() for x in header_cols ]
        # iterate over remaining lines
        for line in lines:
            # split each line
            parts = line.split()
            parts = [ x.strip() for x in parts ]
            # start building dict for the values
            d = {}
            # make sure that the stdout line has the same number of fields as the headers
            if len(parts) == len(header_cols):
                # fill in the dict values and yield the results
                for i, header_col in enumerate(header_cols):
                    d[header_col] = parts[i]
                yield(d)
            else:
                pass # do something here

def parse_time(time_str):
    """
    Parse a SLURM timestamp, return total seconds

    Returns
    -------
    float
        the number of seconds represented by the timestamp

    Examples
    --------
    >>> parse_time('2-14:01:37')
    223297.0

    >>> parse_time('18-02:02:46')
    1562566.0

    >>> parse_time('0:00:00')
    0.0

    >>> parse_time("0:02")
    2.0

    """
    # num seconds in a day
    seconds_per_day = 86400.0
    num_days = 0
    total_seconds = 0.0
    if '-' in time_str:
        parts = time_str.split('-')
        num_days += int(parts[0])
        time_str_split = parts[1]
        x = time.strptime(time_str_split, '%H:%M:%S')
        total_seconds += datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
        total_seconds += num_days * seconds_per_day
    elif time_str.count(':') == 1:
        x = time.strptime(time_str, '%M:%S')
        total_seconds += datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
    else:
        x = time.strptime(time_str, '%H:%M:%S')
        total_seconds += datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
    return(total_seconds)

# number of bytes per size
mem_key = {
'T': 1024 * 1024 * 1024 * 1024,
'G': 1024 * 1024 * 1024,
'M': 1024 * 1024,
'K': 1024
}

# need regex to strip the letters from the mem value; "16G"
non_decimal = re.compile(r'[^\d.]+')

def parse_mem(mem_str, mem_key = mem_key, non_decimal = non_decimal):
    """
    Parse SLURM memory values, return total in bytes
    SLURM reports memory in values such as "16T", "16G", "16M", "16K"

    Returns
    -------
    float
        a float value representing the amount of memory in bytes
    """
    # convert to a float
    mem_num = float(non_decimal.sub('', mem_str))

    if 'K' in mem_str:
        mem_val = mem_num * mem_key['K']
    elif 'M' in mem_str:
        mem_val = mem_num * mem_key['M']
    elif 'G' in mem_str:
        mem_val = mem_num * mem_key['G']
    elif 'T' in mem_str:
        mem_val = mem_num * mem_key['T']
    else:
        # silently drop non-matching values.. ?
        mem_val = 0
    return(mem_val)

def create_user_dict(entries):
    """
    Convert the per-job squeue output table dict into a per-user dict with job metrics aggregated

    Returns
    -------
    dict
        A dict with per-user, per-partition aggregated metrics. Memory in bytes, time in seconds.

    Example:
    {
        "username1": {
        "running": {
            "mem": 309237645312.0,
            "jobs": 5,
            "cpus": 20,
            "time": 476210.0
        },
        "partitions": {
            "cpu_medium": {
                "mem": 34359738368.0,
                "jobs": 1,
                "cpus": 1,
                "time": 79753.0
            },
            "cpu_long": {
                "mem": 137438953472.0,
                "jobs": 1,
                "cpus": 16,
                "time": 60958.0
                }
            }
        },
    "username2": {
        "pending": {
            "mem": 8589934592.0,
            "jobs": 2,
            "cpus": 2,
            "time": 0.0
            }
        }
    }
    """
    # dict to hold per-user entries
    users = {}
    # parse the squeue entries
    for entry in entries:
        username = entry['USER']
        partition = entry['PARTITION']
        state = entry['STATE']
        mem = entry['MIN_MEMORY']
        cpus = entry['MIN_CPUS']
        time_used = entry['TIME']
        time_limit = entry['TIME_LIMIT']

        # parse values
        time_used_val = parse_time(time_used)
        mem_val = parse_mem(mem)

        if state == "RUNNING":
            # initialize user dict
            if username not in users:
                users[username] = {}

            # dict for user's total running stats per partition
            # initialize defaults if they are not already present
            if partition not in users[username]:
                users[username][partition] = {}
            if 'jobs' not in users[username][partition]:
                users[username][partition]['jobs'] = 0
            if 'cpus' not in users[username][partition]:
                users[username][partition]['cpus'] = 0
            if 'mem' not in users[username][partition]:
                users[username][partition]['mem'] = 0.0
            if 'time' not in users[username][partition]:
                users[username][partition]['time'] = 0.0

            # add the new values
            users[username][partition]['cpus'] += int(cpus)
            users[username][partition]['mem'] += mem_val
            users[username][partition]['time'] += time_used_val
            users[username][partition]['jobs'] += 1

            # dict for total running usage
            if 'running' not in users[username]:
                users[username]['running'] = {}
            if 'cpus' not in users[username]['running']:
                users[username]['running']['cpus'] = 0
            if 'mem' not in users[username]['running']:
                users[username]['running']['mem'] = 0.0
            if 'time' not in users[username]['running']:
                users[username]['running']['time'] = 0.0
            if 'jobs' not in users[username]['running']:
                users[username]['running']['jobs'] = 0

            users[username]['running']['cpus'] += int(cpus)
            users[username]['running']['mem'] += mem_val
            users[username]['running']['time'] += time_used_val
            users[username]['running']['jobs'] += 1

        elif state == "PENDING":
            # initialize user dict
            if username not in users:
                users[username] = {}

            # initialize dict for pending jobs
            if 'pending' not in users[username]:
                users[username]['pending'] = {}
            if 'jobs' not in users[username]['pending']:
                users[username]['pending']['jobs'] = 0
            if 'cpus' not in users[username]['pending']:
                users[username]['pending']['cpus'] = 0
            if 'mem' not in users[username]['pending']:
                users[username]['pending']['mem'] = 0.0
            if 'time' not in users[username]['pending']:
                users[username]['pending']['time'] = 0.0

            users[username]['pending']['cpus'] += int(cpus)
            users[username]['pending']['mem'] += mem_val
            users[username]['pending']['time'] += time_used_val
            users[username]['pending']['jobs'] += 1
    return(users)

def format_metric(value, metric, mem_key = mem_key):
    """
    Format a value based on predefined methods for each metric

    Parameters
    ----------
    value: str/int/float
        value to be formatted
    metric: str
        one of "cpus", "mem", "time", "jobs"

    Returns
    -------
    str
        a formatted string
    """
    if metric == "cpus":
        return("{0}".format(value))
    elif metric == "mem":
        return("{0}G".format(value / mem_key['G']))
    elif metric == "jobs":
        return("{0}".format(value))
    elif metric == "time":
        return("{0:.1f}hr".format(value / 3600))
    else:
        return('')
#
# def reformat_fieldname(fieldname):
#     return("{0:>7.6s}".format(fieldname))
#
# def reformat_username(username):
#     return("{0:>9.8}".format(username))
#
# def print_table(users, metric = "cpus"):
#     """
#     Prints the final table of user metrics
#     """
#     # get the fieldnames and partitions to print
#     partitions, fieldnames = get_partition_fieldnames(users)
#
#     # make a reformated list of the dicts for printing
#     dicts_to_print = []
#     for user, values in users.items():
#         d = {}
#         d[reformat_fieldname('user')] = reformat_username(user)
#
#         for partition in partitions:
#             if 'partitions' in values:
#                 if partition in values['partitions']:
#                     d[reformat_fieldname(partition)] = format_metric(
#                         value = values['partitions'][partition][metric],
#                         metric = metric)
#             else:
#                 d[reformat_fieldname(partition)] = ''
#
#         if 'pending' in values:
#             d[reformat_fieldname('pending')] = format_metric(
#                 value = values['pending'][metric],
#                 metric = metric)
#         else:
#             d[reformat_fieldname('pending')] = ''
#
#         if 'total' in values:
#             d[reformat_fieldname('total')] = format_metric(
#                 value = values['total'][metric],
#                 metric = metric)
#         else:
#             d[reformat_fieldname('total')] = ''
#         dicts_to_print.append(d)
#
#     dicts_to_print = sorted(dicts_to_print, key=lambda k: k[reformat_fieldname('total')], reverse=True)
#
#     # print to console
#     writer = csv.DictWriter(sys.stdout,
#                     delimiter = '\t',
#                     fieldnames = [reformat_fieldname(fieldname) for fieldname in fieldnames])
#     writer.writeheader()
#     for d in dicts_to_print:
#         writer.writerow(d)

def create_totals_tups(users, metric):
    """
    flatten dict into list of tuples so we can sort
    """
    totals_tups = []
    for username, values in users.items():
        for partition, totals in values.items():
            t = (partition, username, totals[metric])
            totals_tups.append(t)
    totals_tups = sorted(totals_tups, key = lambda tup: tup[2], reverse = True)
    return(totals_tups)

def print_table(users, totals, metric):
    """
    https://docs.python.org/3.6/library/string.html#format-specification-mini-language
    """
    # get sorted headers
    headers = list(set([ t[0] for t in totals ]))
    if 'running' in totals:
        headers.remove('running')
    if 'pending' in totals:
        headers.remove('pending')
    headers = sorted(headers)
    if 'running' in totals:
        headers.append('running')
    if 'pending' in totals:
        headers.append('pending')
    num_headers = len(headers)

    # get list of users to print; preserve order
    ordered_users = []
    for t in totals:
        if t[1] not in ordered_users:
            ordered_users.append(t[1])
    print(ordered_users)

    # :(align right)<size of space to pad>.<lenght of original str>
    header_line  = " ".join([ "{0:>9.7}".format(h)for h in headers ])
    # pad 12 spaces for later usernames
    header_line = "{0:>12}".format(" ") + header_line
    print(header_line)
    div_line = "-" * len(header_line)
    print(div_line)
    for user in ordered_users:
        if user in users:
            # start building line to print for each user
            userline = "{0:>12} ".format(user)
            # start making a list of the values to be printed
            userline_vals = []
            for header in headers:
                if header in users[user]:
                    header_index = headers.index(header)
                    header_val = users[user][header]
                    userline_vals.append((header_index, header_val))
    # TODO: finish this...

def print_totals(users, totals, metric):
    """
    Prints the list of user totals per-partition to the console
    """
    divider = '---------'
    # get sorted headers
    headers = list(set([ t[0] for t in totals ]))
    if 'running' in totals:
        headers.remove('running')
    if 'pending' in totals:
        headers.remove('pending')
    headers = sorted(headers)
    if 'running' in totals:
        headers.append('running')
    if 'pending' in totals:
        headers.append('pending')

    # rearrange the values per-partition
    d = defaultdict(list)
    for partition, username, total_val in totals:
        d[partition].append((username, total_val))

    # iterate over the headers list to ensure they are printed in the right order
    for header in headers:
        print("{0} ({1})".format(header, metric))
        print(divider)
        for username, total_val in d[header]:
            # {0:10.10} : <pad to length 10>.<cut off str to length 10>
            print("{0:10.10}: {1}".format(username, format_metric(value = total_val, metric = metric)))
        print('\n')


def main(**kwargs):
    """
    Main control function for the script
    """
    metric = kwargs.pop('metric', "cpus")
    # get the SLURM squeue
    queue = Squeue()

    # get dict of parsed values per user
    users = create_user_dict(entries = queue.entries)

    # get totals per user
    # totals = create_totals_dict(users, metric)
    totals = create_totals_tups(users, metric)

    # print the parsed table
    # print_table(users, totals, metric) # TODO: finish this eventually
    print_totals(users, totals, metric)


def parse():
    """
    Parses script args
    """
    parser = argparse.ArgumentParser(description='Calculates per-user SLURM usage metrics')
    parser.add_argument('metric',
                        nargs="?",
                        default = "jobs",
                        choices=["cpus", "mem", "time", "jobs"],
                        help='metric to report')
    args = parser.parse_args()

    main(**vars(args))

if __name__ == '__main__':
    parse()