Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Permite diferenciar o error_level da validação de correspondência entre elem e xref e melhora os textos da validação #744

24 changes: 21 additions & 3 deletions packtools/sps/models/v2/article_xref.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@
from packtools.sps.utils.xml_utils import put_parent_context, tostring


# https://jats.nlm.nih.gov/publishing/tag-library/1.4/attribute/ref-type.html
ELEMENT_NAME = {
"table": "table-wrap",
"bibr": "ref",
}

def get_element_name(ref_type):
try:
return ELEMENT_NAME[ref_type]
except KeyError:
return ref_type


class Xref:
"""<xref ref-type="aff" rid="aff1">1</xref>"""

Expand All @@ -18,6 +31,7 @@ def data(self):
"ref-type": self.xref_type,
"rid": self.xref_rid,
"text": self.xref_text,
"element_name": get_element_name(self.xref_type)
}


Expand All @@ -31,7 +45,12 @@ def __init__(self, node):
self.str_main_tag = f'<{self.node_tag} id="{self.node_id}">'

def xml(self, doctype=None, pretty_print=True, xml_declaration=True):
return tostring(node=self.node, doctype=doctype, pretty_print=pretty_print, xml_declaration=xml_declaration)
return tostring(
node=self.node,
doctype=doctype,
pretty_print=pretty_print,
xml_declaration=xml_declaration,
)

def __str__(self):
return tostring(self.node)
Expand Down Expand Up @@ -80,7 +99,7 @@ def all_ids(self, element_name):
for item in itertools.chain(
self.article_ids(element_name),
self.sub_article_translation_ids(element_name),
self.sub_article_non_translation_ids(element_name)
self.sub_article_non_translation_ids(element_name),
):
id = item.get("id")
response.setdefault(id, [])
Expand All @@ -106,4 +125,3 @@ def sub_article_translation_ids(self, element_name):
def sub_article_non_translation_ids(self, element_name):
for node in self.xml_tree.xpath(".//sub-article[@article-type!='translation']"):
yield from Ids(node).ids(element_name)

164 changes: 79 additions & 85 deletions packtools/sps/validation/article_xref.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,100 +7,112 @@ def __init__(self, xml_tree):
self.xml_tree = xml_tree
self.article_xref = ArticleXref(xml_tree)

def validate_rid(self, element_name=None, error_level="ERROR"):
def validate_xref_rid_has_corresponding_element_id(self, error_level="ERROR"):
"""
Checks if all `rid` attributes (source) in `<xref>` elements have corresponding `id` attributes (destination)
in the XML document.
Checks if all `rid` attributes (source) in `<xref>` elements have corresponding `id` attributes (destination)
in the XML document.

Parameters
----------
element_name : str
The name of the element to be validated.
error_level : str, optional
The level of error reporting (default is "ERROR").
Parameters
----------
element_name : str
The name of the element to be validated.
error_level : str, optional
The level of error reporting (default is "ERROR").

Yields
------
dict
A dictionary containing the following keys:
- title (str): The title of the validation.
- xpath (str): The XPath query used to locate the elements being validated.
- validation_type (str): The type of validation being performed (e.g., "match").
- response (str): The result of the validation ("OK" or "ERROR").
- expected_value (str): The expected `rid` value.
- got_value (str or None): The actual value found or `None` if not found.
- message (str): A message explaining the result of the validation.
- advice (str): A recommendation or advice based on the validation result.
- error_level (str): The level of error reporting.
- data (dict): Additional data related to the validation context, which includes:
- parent (str): The parent element's tag.
- parent_id (str or None): The `id` of the parent element, if available.
- parent_article_type (str): The type of the article (e.g., "research-article").
- parent_lang (str): The language of the parent element.
- tag (str): The tag of the element being validated.
- attributes (dict): A dictionary of the element's attributes.
Yields
------
dict
A dictionary containing the following keys:
- title (str): The title of the validation.
- xpath (str): The XPath query used to locate the elements being validated.
- validation_type (str): The type of validation being performed (e.g., "match").
- response (str): The result of the validation ("OK" or "ERROR").
- expected_value (str): The expected `rid` value.
- got_value (str or None): The actual value found or `None` if not found.
- message (str): A message explaining the result of the validation.
- advice (str): A recommendation or advice based on the validation result.
- error_level (str): The level of error reporting.
- data (dict): Additional data related to the validation context, which includes:
- parent (str): The parent element's tag.
- parent_id (str or None): The `id` of the parent element, if available.
- parent_article_type (str): The type of the article (e.g., "research-article").
- parent_lang (str): The language of the parent element.
- tag (str): The tag of the element being validated.
- attributes (dict): A dictionary of the element's attributes.
"""

ids = self.article_xref.all_ids(element_name)
ids = self.article_xref.all_ids("*")
for rid, rid_list in self.article_xref.all_xref_rids().items():
for rid_data in rid_list:
for xref in rid_list:
is_valid = rid in ids
element_name = xref.get("element_name")
yield format_response(
title="xref element rid attribute validation",
title="xref[@rid] -> *[@id]",
parent="article",
parent_id=None,
parent_article_type=self.xml_tree.get("article-type"),
parent_lang=self.xml_tree.get("{http://www.w3.org/XML/1998/namespace}lang"),
parent_lang=self.xml_tree.get(
"{http://www.w3.org/XML/1998/namespace}lang"
),
item="xref",
sub_item="@rid",
validation_type="match",
is_valid=is_valid,
expected=rid,
obtained=rid if is_valid else None,
advice='For each xref[@rid="{}"] must have at least one corresponding element which @id="{}"'.format(rid, rid),
data=rid_data,
advice=f'Check if xref[@rid="{rid}"] is correct or insert the missing {element_name}[@id="{rid}"]',
data=xref,
error_level=error_level,
)

def validate_id(self, element_name=None, error_level="ERROR"):
def validate_element_id_has_corresponding_xref_rid(self, elements_requires_xref_rid=None, error_level="ERROR"):
"""
Checks if all `id` attributes (destination) in the XML document have corresponding `rid` attributes (source)
in `<xref>` elements.
Checks if all `id` attributes (destination) in the XML document have corresponding `rid` attributes (source)
in `<xref>` elements.

Parameters
----------
element_name : str
The name of the element to be validated.
error_level : str, optional
The level of error reporting (default is "ERROR").
Parameters
----------
element_name : str
The name of the element to be validated.
error_level : str, optional
The level of error reporting (default is "ERROR").

Yields
------
dict
A dictionary containing the following keys:
- title (str): The title of the validation.
- xpath (str): The XPath query used to locate the elements being validated.
- validation_type (str): The type of validation being performed (e.g., "match").
- response (str): The result of the validation ("OK" or "ERROR").
- expected_value (str): The expected `id` value.
- got_value (str or None): The actual value found or `None` if not found.
- message (str): A message explaining the result of the validation.
- advice (str): A recommendation or advice based on the validation result.
- error_level (str): The level of error reporting.
- data (dict): Additional data related to the validation context, which includes:
- parent (str): The parent element's tag.
- parent_id (str or None): The `id` of the parent element, if available.
- parent_article_type (str): The type of the article (e.g., "research-article").
- parent_lang (str): The language of the parent element.
- tag (str): The tag of the element being validated.
- attributes (dict): A dictionary of the element's attributes.
Yields
------
dict
A dictionary containing the following keys:
- title (str): The title of the validation.
- xpath (str): The XPath query used to locate the elements being validated.
- validation_type (str): The type of validation being performed (e.g., "match").
- response (str): The result of the validation ("OK" or "ERROR").
- expected_value (str): The expected `id` value.
- got_value (str or None): The actual value found or `None` if not found.
- message (str): A message explaining the result of the validation.
- advice (str): A recommendation or advice based on the validation result.
- error_level (str): The level of error reporting.
- data (dict): Additional data related to the validation context, which includes:
- parent (str): The parent element's tag.
- parent_id (str or None): The `id` of the parent element, if available.
- parent_article_type (str): The type of the article (e.g., "research-article").
- parent_lang (str): The language of the parent element.
- tag (str): The tag of the element being validated.
- attributes (dict): A dictionary of the element's attributes.
"""
elements_requires_xref_rid = elements_requires_xref_rid or []
default_error_level = error_level
rids = self.article_xref.all_xref_rids()
for id, id_list in self.article_xref.all_ids(element_name).items():
for id, id_list in self.article_xref.all_ids("*").items():
for id_data in id_list:
tag = id_data.get("tag")
if tag in elements_requires_xref_rid:
error_level = "CRITICAL"
expectation = "must"
else:
error_level = default_error_level
expectation = "can"
is_valid = id in rids
yield format_response(
title="element id attribute validation",
title="*[@id] -> xref[@rid]",
parent=id_data.get("parent"),
parent_id=id_data.get("parent_id"),
parent_article_type=id_data.get("parent_article_type"),
Expand All @@ -111,25 +123,7 @@ def validate_id(self, element_name=None, error_level="ERROR"):
is_valid=is_valid,
expected=id,
obtained=id if is_valid else None,
advice='For each @id="{}" must have at least one corresponding element which xref[@rid="{}"]'.format(id, id),
advice=f'Check if {tag}[@id="{id}"] is correct or insert the missing xref[@rid="{id}"]',
data=id_data,
error_level=error_level,
)

def validate(self, data):
"""
Função que executa as validações da classe ArticleXrefValidation.

Returns:
dict: Um dicionário contendo os resultados das validações realizadas.

"""
xref_id_results = {
'article_xref_id_validation': self.validate_id()
}
xref_rid_results = {
'article_xref_rid_validation': self.validate_rid()
}

xref_id_results.update(xref_rid_results)
return xref_id_results
13 changes: 5 additions & 8 deletions packtools/sps/validation/xml_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,18 +274,15 @@ def validate_id_and_rid_match(xmltree, params):
id_and_rid_match_rules = params["id_and_rid_match_rules"]

validator = ArticleXrefValidation(xmltree)
yield from validator.validate_rid(
yield from validator.validate_xref_rid_has_corresponding_element_id(
error_level=id_and_rid_match_rules["required_id_error_level"]
)
yield from validator.validate_id(

yield from validator.validate_element_id_has_corresponding_xref_rid(
id_and_rid_match_rules["elements_required_rid"],
error_level=id_and_rid_match_rules["required_rid_error_level"]
)

# for element_name in id_and_rid_match_rules["required_rid_elements"]:
# yield from validator.validate_id(
# element_name=element_name,
# error_level=id_and_rid_match_rules["required_rid_error_level"]
# )



def validate_article_dates(xmltree, params):
Expand Down
4 changes: 2 additions & 2 deletions packtools/sps/validation_rules/id_and_rid_match_rules.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id_and_rid_match_rules": {
"error_level": "CRITICAL",
"required_id_error_level": "CRITICAL",
"required_rid_error_level": "ERROR"
"required_rid_error_level": "ERROR",
"elements_required_rid": ["ref", "table-wrap", "fig", "disp-formula", "aff", "corresp"]
}
}
Loading