process-relvals-status-es

#!/usr/bin/env python3
from argparse import ArgumentParser
from subprocess import getstatusoutput
from operator import itemgetter
import json
import re
import os

# Given an IB, it queries the information from elasticsearch and creates the 
# corresponding json files with the results that it finds. 

QUERY_FILE="es-queries/get-ib-relvals-status.json"
JSON_OUT_FILE_TEMPLATE="data/relvals/{arch}/{date}/{release_queue}_INCOMPLETE.json"

#
# Queries elasticsearch to get the data about the workflows of the given IB
#
def get_ib_data( ib ):
  query = open( QUERY_FILE, 'r' ).read().replace( 'IB_NAME', ib )
  print( "Querying results for %s" % ib )
  cmd = "curl -s -XPOST http://{hostname}/{index}/_search -d '{query}'".format(
         hostname=ES_HOSTNAME,
         index=ES_INDEX,
         query=query )

  err, out = getstatusoutput(cmd)
  if err:
    print( "Error while querying elasticsearch" )
    print( out )
    exit(0)

  response = json.loads( out )
  data = [ x[ 'fields' ] for x in response[ 'hits' ][ 'hits' ] ]
  return data

#---------------------------------------------------------------
# Start
#--------------------------------------------------------------
if __name__ == "__main__":
  parser = ArgumentParser()
  parser.add_argument("input")
  parser.add_argument("--es_hostname", type=str, help="elasticsearch hostname", required=True)
  parser.add_argument("--es_rv_index", type=str, help="elasticsearch relvals index", required=True)
  args = parser.parse_args()

  ES_HOSTNAME = args.es_hostname
  ES_INDEX = args.es_rv_index

  if ( not args.input ):
    print ("Please specify an IB to query, and the base directory of the repository")
    exit(1)

  ib = args.input
  data = get_ib_data( ib )
  if len( data ) == 0:
    print ('No data found. Something may be wrong')
    exit(1)

  groups = {}

  #group them to ease the ordering and generation of the json files 
  for item in data:
    arch = item[ 'architecture' ][ 0 ]
    wf = item[ 'workflow_id' ][ 0 ]
    if not groups.get( arch ):
      groups[ arch ] = {}
    if not groups[ arch ].get( wf ):
      groups[ arch ][ wf ] = []
 
    groups[ arch ][ wf ].append( item )

  for arch in groups.keys():
    results_list = []
    # reorganize the structure to be the same as in the complete files
    for wf in groups[ arch ].keys():
      wf_info = groups[ arch ][ wf ]
      wf_id = str( round( wf_info[ 0 ][ 'workflow_id' ][0], 2 ) )
      wf_name = wf_info[ 0 ][ 'workflow' ][0]
      # Right now all the workflows are run in the same node. 
      # It could change in the future and also it disturbs less the structure of the file
      # to add it to each workflow. 
      hostname = wf_info[ 0 ][ 'hostname' ][0]
      steps = []
      steps_statuses = []
      for step_info in wf_info:
        step_status = step_info[ 'status' ][0]
        steps_statuses.append( step_status )
        step_num = step_info[ 'step' ][0]
        step_num
        steps.insert( int( step_num) , {"status":step_status} )

      wf_result = {
                   "id": wf_id,
                   "name": wf_name,
                   "steps": steps,
                   "hostname": hostname, 
                   "order": sum([10-i for (i, x) in enumerate(steps_statuses)
                                         if x == "FAILED" or x == "DAS_ERROR" or x == "TIMEOUT"])
                   }
      results_list.append( wf_result )

    # order the results for the IB in the same way it is in the complete files
    results_list.sort( key=itemgetter("order", "id"), reverse=True )
    for r in results_list:
      r.pop( "order" )
    
    # create the json file
    ib_rq = re.match("(.*)_2[0-9]*-[01][0-9]-[0-3][0-9]-[0-9]*$", ib).group(1)
    ib_date = re.match(".*(2[0-9]*-[01][0-9]-[0-3][0-9]-[0-9]*)$", ib).group(1)
    out_file_path = JSON_OUT_FILE_TEMPLATE.format( arch=arch, date=ib_date, release_queue=ib_rq )
    dir_file_path = out_file_path[:out_file_path.rfind("/")]
    if not os.path.exists( dir_file_path ):
      os.makedirs( dir_file_path )
    print (dir_file_path)
    print ('Generating incomplete results file %s' % out_file_path)
    out_file = open( out_file_path, 'w' )
    json.dump(results_list, out_file, sort_keys=True, indent=4, separators=(',', ': '))
    out_file.close()