Skip to content

Commit

Permalink
gfdiff tool revamp
Browse files Browse the repository at this point in the history
1) Fixed issue with mergely interface
2) Improved command line interface
3) Diff algorithm changes
  • Loading branch information
mmascher authored and mambelli committed Jan 24, 2025
1 parent 5f7eb83 commit cbe36e4
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 143 deletions.
48 changes: 35 additions & 13 deletions doc/factory/troubleshooting.html
Original file line number Diff line number Diff line change
Expand Up @@ -790,22 +790,44 @@ <h4>If the glideins are still running</h4>
<h2>
<a name="gfdiff">Checking differences in entries configuration</a>
</h2>
The gfdiff tool can be used to check differences among entries in the
xml configuraiton. For example:
The gfdiff tool has been improved to use positional arguments instead of options. You can use it to check differences among entries in the XML configuration. For example:
<blockquote>
bash-4.1$ gfdiff --debug --confA=data/automatically_generated.xml
--confB=/etc/osg-gfactory/10-cmst1-uscmst2-all.xml
--entryA=CMSHTPC_T2_US_Florida_slurm
--entryB=CMSHTPC_T2_US_Florida_slurm Checking entry attributes: Key
proxy_url(OSG) not found in CMSHTPC_T2_US_Florida_slurm Checking inner
xml: GLIDEIN_MaxMemMBs: Key glidein_publish is different: (True vs
False) GLIDEIN_MaxMemMBs: Key job_publish is different: (False vs
True) CONDOR_ARCH: not present in CMSHTPC_T2_US_Florida_slurm
GLIDEIN_Max_Walltime: not present in CMSHTPC_T2_US_Florida_slurm
GLIDEIN_SEs: not present in CMSHTPC_T2_US_Florida_slurm
<pre>
[0618] gfactory@gfactory-2 ~/mmascher/gfdiff$ ./gfdiff 10-hosted-ces.auto.xml.new 10-hosted-ces.auto.xml.bak241121 OSG_US_CHTC-Spark-CE1-gpu
--- text_a
+++ text_b
@@ -1,6 +1,6 @@
<entry name="OSG_US_CHTC-Spark-CE1-gpu" auth_method="grid_proxy" comment="Entry automatically generated" enabled="True" gatekeeper="chtc-spark-ce1.svc.opensciencegrid.org chtc-spark-ce1.svc.opensciencegrid.org:9619" gridtype="condor" proxy_url="OSG" trust_domain="grid" verbosity="std" work_dir="OSG">
<config>
- <max_jobs >
+ <max_jobs num_factories="1">
<default_per_frontend glideins="4" held="4" idle="4"/>
<per_entry glideins="4" held="4" idle="4"/>
<per_frontends>
@@ -13,9 +13,9 @@
<submit_attrs>
<submit_attr name="+maxMemory" value="126500"/>
<submit_attr name="+maxWallTime" value="4320"/>
+ <submit_attr name="+queue" value="&quot;gpu&quot;"/>
<submit_attr name="+xcount" value="16"/>
<submit_attr name="Request_GPUs" value="1"/>
- <submit_attr name="batch_queue" value="gpu"/>
</submit_attrs>
</submit>
</config>
@@ -31,7 +31,7 @@
<attr name="GLIDEIN_ResourceName" const="True" glidein_publish="True" job_publish="True" parameter="True" publish="True" type="string" value="CHTC-Spark-CE1"/>
<attr name="GLIDEIN_Resource_Slots" const="True" glidein_publish="True" job_publish="False" parameter="True" publish="True" type="string" value="GPUs,1,type=main"/>
<attr name="GLIDEIN_Site" const="True" glidein_publish="True" job_publish="True" parameter="True" publish="True" type="string" value="CHTC-Spark"/>
- <attr name="GLIDEIN_Supported_VOs" const="True" glidein_publish="False" job_publish="False" parameter="True" publish="True" type="string" value="OSGVO"/>
+ <attr name="GLIDEIN_Supported_VOs" const="True" glidein_publish="False" job_publish="False" parameter="True" publish="True" type="string" value="OSGVOGPU"/>
</attrs>
<files>
</files>
</pre>
</blockquote>
</div>

<div class="footer">
Banner image by
<a href="http://www.flickr.com/people/leafwarbler/">Madhusudan Katti</a>
Expand Down
211 changes: 81 additions & 130 deletions factory/tools/gfdiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,173 +8,127 @@


import argparse
import base64
import hashlib
import difflib
import re
import sys
import uuid

import requests

from glideinwms.creation.lib.factoryXmlConfig import _parse, EntryElement, FactAttrElement # , parse
from glideinwms.creation.lib.xmlConfig import DictElement, ListElement

g_entry_a = None
g_entry_b = None
last_key = []
tabs = 0


def count_tabs(function_to_decorate):
"""Decorator function that keeps track of how many intentation level are required.
In other words, the decorator counts how many times the decorated function is called
"""

def wrapper(*args, **kw):
"""The wrapper function"""
global tabs
tabs += 1
_ = function_to_decorate(*args, **kw)
tabs -= 1

return wrapper


def check_list_diff(list_a, list_b):
"""Scan the two list for differences"""
SKIP_TAGS = ["infosys_ref"]
for elem in list_a.children:
if elem.tag in SKIP_TAGS:
continue
if isinstance(elem, DictElement):
# print("\t"*tabs + "Checking %s" % elem.tag)
if len(list_a.children) > 2:
return
# TODO what if B does not have it
check_dict_diff(list_a.children[0], list_b.children[0], lambda e: list(e.children.items()))
elif isinstance(elem, FactAttrElement):
# print("\t"*tabs + "Checking %s" % elem['name'])
elem_b = [x for x in list_b.children if x["name"] == elem["name"]]
if len(elem_b) == 1:
check_dict_diff(elem, elem_b[0], FactAttrElement.items)
elif len(elem_b) == 0:
print("\t" * (tabs + 1) + "{}: not present in {}".format(elem["name"], g_entry_b.getName()))
else:
print("More than one FactAttrElement")
else:
print("Element type not DictElement or FactAttrElement")
for elem in list_b.children:
if isinstance(elem, FactAttrElement):
elem_a = [x for x in list_a.children if x["name"] == elem["name"]]
if len(elem_a) == 0:
print("\t" * (tabs + 1) + "{}: not present in {}".format(elem["name"], g_entry_a.getName()))


@count_tabs
def check_dict_diff(dict_a, dict_b, itemfunc=EntryElement.items, print_name=True):
"""Check differences between two dictionaries"""
tmp_dict_a = dict(itemfunc(dict_a))
tmp_dict_b = dict(itemfunc(dict_b))
SKIP_KEYS = ["name", "comment"] # , 'gatekeeper']
for key, val in list(tmp_dict_a.items()):
last_key.append(key)
# print("\t"*tabs + "Checking %s" % key)
if key in SKIP_KEYS:
continue
if key not in tmp_dict_b:
print("\t" * tabs + f"Key {key}({val}) not found in {g_entry_b.getName()}")
elif isinstance(val, ListElement):
check_list_diff(tmp_dict_a[key], tmp_dict_b[key])
elif isinstance(val, DictElement):
check_dict_diff(
tmp_dict_a[key],
tmp_dict_b[key],
lambda e: list(e.children.items()) if len(e.children) > 0 else list(e.items()),
)
elif tmp_dict_a[key] != tmp_dict_b[key]:
keystr = tmp_dict_a["name"] + ": " if print_name and "name" in tmp_dict_a else last_key[-2] + ": "
print("\t" * tabs + f"{keystr}Key {key} is different: ({tmp_dict_a[key]} vs {tmp_dict_b[key]})")
last_key.pop()
for key, val in list(tmp_dict_b.items()):
if key in SKIP_KEYS:
continue
if key not in tmp_dict_a:
print("\t" * tabs + f"Key {key}({val}) not found in {g_entry_a.getName()}")
from glideinwms.creation.lib.factoryXmlConfig import _parse


def parse_opts():
"""Parse the command line options for this command"""
description = "Do a diff of two entries\n\n"

parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter)
parser = argparse.ArgumentParser(description=description,
formatter_class=argparse.RawTextHelpFormatter)

# Positional arguments
parser.add_argument(
"--confA",
"conf_a",
type=str,
action="store",
dest="conf_a",
default="/etc/gwms-factory/glideinWMS.xml",
help="Configuration for the first entry",
default="/etc/gwms-factory/glideinWMS.xml"
)

parser.add_argument(
"--confB",
"conf_b",
type=str,
action="store",
dest="conf_b",
default="/etc/gwms-factory/glideinWMS.xml",
help="Configuration for the first entry",
help="Configuration for the second entry",
default="/etc/gwms-factory/glideinWMS.xml"
)

parser.add_argument("--entryA", type=str, action="store", dest="entry_a", help="Configuration for the first entry")
parser.add_argument(
"entry_a",
type=str,
help="Name of the first entry"
)

parser.add_argument("--entryB", type=str, action="store", dest="entry_b", help="Configuration for the first entry")
parser.add_argument(
"entry_b",
type=str,
nargs="?", # Makes this positional argument optional
help="Name of the second entry (optional)"
)

parser.add_argument("--mergely", action="count", help="Only print the mergely link")
# Named argument
parser.add_argument(
"--mergely",
action="count",
help="Only print the mergely link"
)

options = parser.parse_args()

# Set entry_b to entry_a if not provided
if options.entry_b is None:
options.entry_b = options.entry_a

return options


def get_entry_text(entry, conf):
"""Get an entry text from the xml configuration file"""
with open(conf) as fdesc:
text = fdesc.read()
# pylint: disable=no-member, maybe-no-member
return re.search('.*( +<entry name="%s".*?</entry>)' % entry, text, re.DOTALL).group(1)


def handle_mergely(entry_a, conf_a, entry_b, conf_b, mergely_only):
def handle_diff(text_a, text_b):
"""Function that prints the differences using the diff command"""

lines_a = text_a.splitlines()
lines_b = text_b.splitlines()

# Create a unified diff
diff = difflib.unified_diff(
lines_a, lines_b,
fromfile="text_a", tofile="text_b",
lineterm=""
)

# Print the diff line by line
for line in diff:
print(line)


def handle_mergely(text_a, text_b):
"""Function that prints the link to the mergely website"""
url = "https://www.mergely.com/ajax/handle_file.php"
# get a unique 8char key
unique_id = uuid.uuid4()
myhash = hashlib.sha1(str(unique_id).encode("UTF-8"))
key = base64.b32encode(myhash.digest())[0:8]

payload = {"key": key, "name": "lhs", "content": get_entry_text(entry_a, conf_a)}
requests.post(url, data=payload)
payload["name"] = "rhs"
payload["content"] = get_entry_text(entry_b, conf_b)
requests.post(url, data=payload)
requests.get("https://www.mergely.com/ajax/handle_save.php?key=" + key)
if mergely_only:
print("http://www.mergely.com/" + key)
else:
print("Visualize differences at: http://www.mergely.com/" + key)
print()

url = "https://mergely.com/ajax/handle_save.php"

payload = {
"config": {},
"lhs_title": "",
"lhs": text_a,
"rhs_title": "",
"rhs": text_b
}

headers = {}
headers["Accept"] = "*/*"
headers["Accept-Language"] = "en-US,en;q=0.5"
headers["Accept-Encoding"] = "gzip, deflate, br, zstd"
headers["Referer"] = "https://editor.mergely.com/"
headers["content-type"] = "application/json; charset=utf-8"
headers["Content-Length"] = str(len(str(payload)))
headers["Origin"] = "https://editor.mergely.com"
headers["Connection"] = "keep-alive"

res = requests.post(url, headers=headers, json=payload)

print("http://www.mergely.com/" + res.headers["location"])


def main():
"""The main"""
global g_entry_a
global g_entry_b
options = parse_opts()

entry_a = options.entry_a
entry_b = options.entry_b

# conf = parse("/etc/gwms-factory/glideinWMS.xml")
conf_a = _parse(options.conf_a)
conf_b = _parse(options.conf_b)

Expand All @@ -188,17 +142,14 @@ def main():
if len(entry_b) != 1:
print(f"Cannot find entry {options.entry_b} in the configuration file {options.conf_b}")
sys.exit(1)
g_entry_a = entry_a[0]
g_entry_b = entry_b[0]

if options.mergely:
handle_mergely(options.entry_a, options.conf_a, options.entry_b, options.conf_b, options.mergely)
return

print("Checking entry attributes:")
check_dict_diff(g_entry_a, g_entry_b, print_name=False)
print("Checking inner xml:")
check_dict_diff(g_entry_a.children, g_entry_b.children, dict.items)
text_a = get_entry_text(options.entry_a, options.conf_a)
text_b = get_entry_text(options.entry_b, options.conf_b)
handle_diff(text_a, text_b)
if options.mergely:
print()
handle_mergely(text_a, text_b)


if __name__ == "__main__":
Expand Down

0 comments on commit cbe36e4

Please sign in to comment.