diff --git a/packtools/sps/formats/pmc.py b/packtools/sps/formats/pmc.py index 7a1374cf1..a7fb4701d 100644 --- a/packtools/sps/formats/pmc.py +++ b/packtools/sps/formats/pmc.py @@ -43,14 +43,29 @@ def xml_pmc_aff(xml_tree): """ affs = xml_tree.findall(".//aff") for aff in affs: - aff_institution = aff.find("./institution[@content-type='original']").text + original_institution = aff.find("./institution[@content-type='original']") + if original_institution is not None: + aff_institution = original_institution.text + else: + aff_with_address = [] + aff_with_address.append(aff.find("./institution[@content-type='orgname']").text) + + addr_line = aff.find("./addr-line") + if addr_line is not None: + named_contents = addr_line.xpath(".//named-content | .//state | .//city ") + aff_with_address.extend([named_content.text for named_content in named_contents]) + + country = aff.find("./country") + if country is not None: + aff_with_address.append(country.text) + aff_institution = ", ".join(aff_with_address) + for institution in aff.findall(".//institution"): aff.remove(institution) - aff.remove(aff.find("./addr-line")) - - aff.remove(aff.find("./country")) + for element in [aff.find("./addr-line"), aff.find("./country")]: + aff.remove(element) node_label = aff.find("./label") @@ -154,3 +169,4 @@ def xml_pmc_ref(xml_tree): refs = xml_tree.findall(".//ref") for ref in refs: ref.remove(ref.find("./mixed-citation")) + diff --git a/tests/sps/formats/test_pmc.py b/tests/sps/formats/test_pmc.py index 2db4c7167..8c86b182c 100644 --- a/tests/sps/formats/test_pmc.py +++ b/tests/sps/formats/test_pmc.py @@ -492,6 +492,148 @@ def test_xml_pmc_ref(self): self.assertEqual(obtained, expected) + def test_xml_pmc_without_original(self): + self.maxDiff = None + expected = ( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Universidade Federal do Rio Grande do Sul, Porto Alegre, RS, Brazil' + '' + '' + '' + '
' + + ) + xml_tree = ET.fromstring( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Universidade Federal do Rio Grande do Sul' + 'Escola de Enfermagem' + 'Programa de Pós-Graduação em Enfermagem' + '' + 'Porto Alegre' + 'RS' + '' + 'Brazil' + '' + '' + '' + '
' + ) + + xml_pmc_aff(xml_tree) + + obtained = ET.tostring(xml_tree, encoding="utf-8").decode("utf-8") + + self.assertEqual(obtained, expected) + + def test_xml_pmc_with_state_and_city(self): + self.maxDiff = None + expected = ( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Fundação Oswaldo Cruz, Manguinhos, RJ, Brasil' + '' + '' + '' + '
' + + ) + xml_tree = ET.fromstring( + '
' + '' + '' + 'ZwzqmpTpbhTmtwR9GfDzP7c' + 'S0080-62342022000100445' + '10.1590/1980-220X-REEUSP-2021-0569en' + '00445' + '' + '' + '0000-0003-0843-6485' + '' + 'Boni' + 'Fernanda Guarilha' + '' + '' + '1' + '' + '' + '' + '' + 'Fundação Oswaldo Cruz' + 'Escola Nacional de Saúde Pública Sérgio Arouca' + 'Centro de Estudos da Saúde do Trabalhador e Ecologia Humana' + '' + 'Manguinhos' + 'RJ' + '' + 'Brasil' + '' + '' + '' + '
' + ) + + xml_pmc_aff(xml_tree) + + obtained = ET.tostring(xml_tree, encoding="utf-8").decode("utf-8") + + self.assertEqual(obtained, expected) + if __name__ == '__main__': unittest.main()