diff --git a/scholarly_articles/choices.py b/scholarly_articles/choices.py index 837de2dd..10d65aea 100644 --- a/scholarly_articles/choices.py +++ b/scholarly_articles/choices.py @@ -1,32 +1,41 @@ TYPE_OF_RESOURCE = [ ("", ""), - ("Book Section", "book-section"), - ("Monograph", "monograph"), - ("Report", "report"), - ("Peer Review", "peer-review"), - ("Book Track", "book-track"), - ("Journal Article", "journal-article"), - ("Part", "book-part"), - ("Other", "other"), - ("Book", "book"), - ("Journal Volume", "journal-volume"), - ("Book Set", "book-set"), - ("Reference Entry", "reference-entry"), - ("Proceedings Article", "proceedings-article"), - ("Journal", "journal"), - ("Component", "component"), - ("Book Chapter", "book-chapter"), - ("Proceedings Series", "proceedings-series"), - ("Report Series", "report-series"), - ("Proceedings", "proceedings"), - ("Standard", "standard"), - ("Reference Book", "reference-book"), - ("Posted Content", "posted-content"), - ("Journal Issue", "journal-issue"), - ("Dissertation", "dissertation"), - ("Grant", "grant"), - ("Dataset", "dataset"), - ("Book Series", "book-series"), - ("Edited Book", "edited-book"), - ("Standard Series", "standard-series") + ("book-section", "Book Section"), + ("monograph", "Monograph"), + ("report", "Report"), + ("peer-review", "Peer Review"), + ("book-track", "Book Track"), + ("journal-article", "Journal Article"), + ("book-part", "Part"), + ("other", "Other"), + ("book", "Book"), + ("journal-volume", "Journal Volume"), + ("book-set", "Book Set"), + ("reference-entry", "Reference Entry"), + ("proceedings-article", "Proceedings Article"), + ("journal", "Journal"), + ("component", "Component"), + ("book-chapter", "Book Chapter"), + ("proceedings-series", "Proceedings Series"), + ("report-series", "Report Series"), + ("proceedings", "Proceedings"), + ("standard", "Standard"), + ("reference-book", "Reference Book"), + ("posted-content", "Posted Content"), + ("journal-issue", "Journal Issue"), + ("dissertation", "Dissertation"), + ("grant", "Grant"), + ("dataset", "Dataset"), + ("book-series", "Book Series"), + ("edited-book", "Edited Book"), + ("standard-series", "Standard Series") +] + +OA_STATUS = [ + ("", ""), + ("Gold", "gold"), + ("Hybrid", "hybrid"), + ("Bronze", "bronze"), + ("Green", "green"), + ("Closed", "closed") ] diff --git a/scholarly_articles/models.py b/scholarly_articles/models.py index a016a6d1..afa529f0 100755 --- a/scholarly_articles/models.py +++ b/scholarly_articles/models.py @@ -4,53 +4,110 @@ from wagtail.admin.edit_handlers import FieldPanel from . import choices +from core.models import CommonControlField class ScholarlyArticles(models.Model): - doi = models.CharField("DOI", max_length=255, null=False, blank=False) - doi_url = models.URLField("DOI URL", max_length=255, null=True, blank=True) - genre = models.CharField("Resource Type", max_length=255, choices=choices.TYPE_OF_RESOURCE, null=False, blank=False) - is_oa = models.BooleanField("Opens Access", max_length=255, null=True, blank=True) - journal_is_in_doaj = models.BooleanField("DOAJ", max_length=255, null=True, blank=True) - journal_issns = models.CharField("ISSN's", max_length=255, null=False, blank=False) - journal_issn_l = models.CharField("ISSN-L", max_length=255, null=False, blank=False) - journal_name = models.CharField("Journal Name", max_length=255, null=True, blank=True) - published_date = models.DateTimeField("Published Date", max_length=255, null=True, blank=True) - publisher = models.CharField("Publisher", max_length=255, null=True, blank=True) - title = models.CharField("Title", max_length=255, null=True, blank=True) - article_json = models.JSONField("JSON File", null=True, blank=True) + doi = models.CharField(_("DOI"), max_length=255, null=True, blank=True) + year = models.CharField(_("Year"), max_length=4, null=True, blank=True) + contributors = models.ManyToManyField(_("Contributors"), null=True, blank=True) + journal = models.ForeignKey('Journals', on_delete=models.SET_NULL, max_length=255, null=True, blank=True) + + def __unicode__(self): + return self.doi + + def __str__(self): + return self.doi panels = [ FieldPanel('doi'), - FieldPanel('doi_url'), - FieldPanel('genre'), - FieldPanel('is_oa'), - FieldPanel('journal_is_in_doaj'), - FieldPanel('journal_issns'), + FieldPanel('year'), + FieldPanel('contributors'), + FieldPanel('journal'), + ] + + +class Journals(models.Model): + journal_issn_l = models.CharField(_("ISSN-L"), max_length=255, null=True, blank=True) + journal_issns = models.CharField(_("ISSN's"), max_length=255, null=True, blank=True) + journal_name = models.CharField(_("Journal Name"), max_length=255, null=True, blank=True) + publisher = models.CharField(_("Publisher"), max_length=255, null=True, blank=True) + journal_is_in_doaj = models.BooleanField(_("DOAJ"), max_length=255, default=False, null=True, blank=True) + + def __unicode__(self): + return self.journal_issn_l + + def __str__(self): + return self.journal_issn_l + + panels = [ FieldPanel('journal_issn_l'), + FieldPanel('journal_issns'), FieldPanel('journal_name'), - FieldPanel('published_date'), FieldPanel('publisher'), - FieldPanel('title'), - FieldPanel('article_json'), + FieldPanel('journal_is_in_doaj'), ] class Contributors(models.Model): - doi = models.CharField("DOI", max_length=255, null=False, blank=False) - doi_url = models.URLField("DOI URL", max_length=255, null=True, blank=True) - family = models.CharField("Family", max_length=255, null=False, blank=False) - given = models.CharField("Given", max_length=255, null=False, blank=False) - orcid = models.URLField("ORCID", max_length=255, null=False, blank=False) - authenticated_orcid = models.BooleanField("Authenticated", max_length=255, null=False, blank=False) - affiliation = models.CharField("Affiliation", max_length=255, null=False, blank=False) + family = models.CharField(_("Family Name"), max_length=255, null=True, blank=True) + given = models.CharField(_("Given Name"), max_length=255, null=True, blank=True) + orcid = models.CharField("ORCID", max_length=255, null=True, blank=True) + authenticated_orcid = models.BooleanField(_("Authenticated"), default=False, null=True, blank=True) + affiliation = models.ForeignKey(_("Affiliations"), on_delete=models.SET_NULL, max_length=255, null=True, blank=True) + + def __unicode__(self): + return f"{self.family}, {self.given} ({self.orcid})" + + def __str__(self): + return f"{self.family}, {self.given} ({self.orcid})" panels = [ - FieldPanel('doi'), - FieldPanel('doi_url'), FieldPanel('family'), FieldPanel('given'), FieldPanel('orcid'), FieldPanel('authenticated_orcid'), FieldPanel('affiliation'), ] + + +class Affiliations(models.Model): + name = models.CharField(_("Affiliation Name"), max_length=255, null=True, blank=True) + + def __unicode__(self): + return self.name + + def __str__(self): + return self.name + + panels = [ + FieldPanel('name'), + ] + + +class RawUnpaywall(models.Model): + doi = models.CharField(_("DOI"), max_length=255, null=False, blank=False) + harvesting_creation = models.CharField(_("Harvesting date"), max_length=255, null=False, blank=False) + is_paratext = models.BooleanField(_("Paratext"), default=False, null=True, blank=True) + year = models.CharField(_("Year"), max_length=255, null=True, blank=True) + # unpaywall genre + resource_type = models.CharField(_("Resource Type"), max_length=255, choices=choices.TYPE_OF_RESOURCE, null=False, + blank=True) + update = models.CharField(_("Update"), max_length=255, null=True, blank=True) + json = models.JSONField(_("JSON File"), null=True, blank=True) + + def __unicode__(self): + return self.doi + + def __str__(self): + return self.doi + + panels = [ + FieldPanel('doi'), + FieldPanel('harvesting_creation'), + FieldPanel('is_paratext'), + FieldPanel('year'), + FieldPanel('resource_type'), + FieldPanel('update'), + FieldPanel('json'), + ] diff --git a/scholarly_articles/scripts/__init__.py b/scholarly_articles/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scholarly_articles/scripts/examples.json b/scholarly_articles/scripts/examples.json new file mode 100644 index 00000000..cd70629c --- /dev/null +++ b/scholarly_articles/scripts/examples.json @@ -0,0 +1,3 @@ +{"doi": "10.1002/psp.2296", "year": 2020, "genre": "journal-article", "is_oa": false, "title": "Trap or opportunity—What role does geography play in the use of cash for childcare?", "doi_url": "https://doi.org/10.1002/psp.2296", "updated": "2021-04-02T00:47:21.884997", "oa_status": "closed", "publisher": "Wiley", "z_authors": [{"ORCID": "http://orcid.org/0000-0002-4877-2961", "given": "Lena", "family": "Magnusson Turner", "sequence": "first", "affiliation": [{"name": "Norwegian Social Research Oslo Metropolitan University Oslo Norway"}], "authenticated-orcid": false}, {"ORCID": "http://orcid.org/0000-0002-4536-9229", "given": "John", "family": "Östh", "sequence": "additional", "affiliation": [{"name": "Department of Social and Economic Geography Uppsala University Uppsala Sweden"}], "authenticated-orcid": false}], "is_paratext": true, "journal_name": "Population, Space and Place", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1544-8444,1544-8452", "journal_issn_l": "1544-8444", "published_date": "2020-01-07", "best_oa_location": null, "first_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false, "oa_locations_embargoed": []} +{"doi": "10.1093/dote/doab014", "year": 2021, "genre": "journal-article", "is_oa": false, "title": "Population trends in achalasia diagnosis and management: a changing paradigm", "doi_url": "https://doi.org/10.1093/dote/doab014", "updated": "2021-03-17T21:05:16.450507", "oa_status": "closed", "publisher": "Oxford University Press (OUP)", "z_authors": [{"given": "Judy A", "family": "Trieu", "sequence": "first", "affiliation": [{"name": "Division of Gastroenterology and Nutrition, Loyola University Medical Center, Maywood, IL, USA"}]}, {"ORCID": "http://orcid.org/0000-0002-1870-9881", "given": "Arshish", "family": "Dua", "sequence": "additional", "affiliation": [{"name": "Division of Gastroenterology and Nutrition, Loyola University Medical Center, Maywood, IL, USA"}], "authenticated-orcid": false}, {"given": "Ikponmwosa", "family": "Enofe", "sequence": "additional", "affiliation": [{"name": "Division of Gastroenterology and Nutrition, Loyola University Medical Center, Maywood, IL, USA"}]}, {"given": "Nikhil", "family": "Shastri", "sequence": "additional", "affiliation": [{"name": "Division of Gastroenterology and Nutrition, Loyola University Medical Center, Maywood, IL, USA"}]}, {"given": "Mukund", "family": "Venu", "sequence": "additional", "affiliation": [{"name": "Division of Gastroenterology and Nutrition, Loyola University Medical Center, Maywood, IL, USA"}]}], "is_paratext": false, "journal_name": "Diseases of the Esophagus", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "1120-8694,1442-2050", "journal_issn_l": "1120-8694", "published_date": "2021-03-17", "best_oa_location": null, "first_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false, "oa_locations_embargoed": []} +{"doi": "10.1080/01430750.2019.1708791", "year": 2020, "genre": "journal-article", "is_oa": false, "title": "Techno-economic study of hybrid renewable energy system of Metropolitan Cities in India", "doi_url": "https://doi.org/10.1080/01430750.2019.1708791", "updated": "2021-03-30T01:23:35.505209", "oa_status": "closed", "publisher": "Informa UK Limited", "z_authors": [{"ORCID": "http://orcid.org/0000-0001-6059-6326", "given": "Balachander", "family": "Kalappan", "sequence": "first", "affiliation": [{"name": "Department of Electrical and Electronics Engineering, Faculty of Engineering, Karpagam Academy of Higher Education, Coimbatore, India"}], "authenticated-orcid": false}, {"ORCID": "http://orcid.org/0000-0001-5138-716X", "given": "A.", "family": "Amudha", "sequence": "additional", "affiliation": [{"name": "Department of Electrical and Electronics Engineering, Faculty of Engineering, Karpagam Academy of Higher Education, Coimbatore, India"}], "authenticated-orcid": false}, {"ORCID": "http://orcid.org/0000-0003-1370-1140", "given": "K.", "family": "Keerthivasan", "sequence": "additional", "affiliation": [{"name": "Higher College of Technology, Muscat, Oman"}], "authenticated-orcid": false}], "is_paratext": false, "journal_name": "International Journal of Ambient Energy", "oa_locations": [], "data_standard": 2, "journal_is_oa": false, "journal_issns": "0143-0750,2162-8246", "journal_issn_l": "0143-0750", "published_date": "2020-01-06", "best_oa_location": null, "first_oa_location": null, "journal_is_in_doaj": false, "has_repository_copy": false, "oa_locations_embargoed": []} \ No newline at end of file diff --git a/scholarly_articles/scripts/load_data.py b/scholarly_articles/scripts/load_data.py new file mode 100644 index 00000000..5c717795 --- /dev/null +++ b/scholarly_articles/scripts/load_data.py @@ -0,0 +1,100 @@ +from scholarly_articles import models + + +def get_params(row, attribs): + params = {} + for att in attribs: + if row.get(att): + params[att] = row.get(att) + return params + + +def load_article(row): + articles = models.ScholarlyArticles.objects.filter(doi=row.get('doi')) + try: + article = articles[0] + except IndexError: + article = models.ScholarlyArticles() + article.doi = row.get('doi') + article.year = row.get('year') + article.journal = load_journal(row) + article.save() + for author in row['z_authors']: + contributor = get_one_contributor(author) + article.contributors.add(contributor) + article.save() + return article + + +def load_journal(row): + attribs = ['journal_issns', 'journal_issn_l', 'journal_name'] + params = get_params(row, attribs) + + journals = models.Journals.objects.filter(**params) + try: + journal = journals[0] + except IndexError: + journal = models.Journals() + journal.journal_is_in_doaj = row.get('journal_is_in_doaj') + journal.journal_issns = row.get('journal_issns') + journal.journal_issn_l = row.get('journal_issn_l') + journal.journal_name = row.get('journal_name') + journal.publisher = row.get('publisher') + journal.save() + return journal + + +def get_one_contributor(author): + attribs = ['family', 'given'] + params = get_params(author, attribs) + if author.get('ORCID'): + params['orcid'] = author.get('ORCID') + elif author.get('affiliation'): + try: + aff = models.Affiliations.objects.filter(name=author.get('affiliation')[0].get('name')) + params['affiliation'] = aff[0] + except IndexError: + pass + + contributors = models.Contributors.objects.filter(**params) + try: + contributor = contributors[0] + except IndexError: + contributor = models.Contributors() + contributor.family = author.get('family') + contributor.given = author.get('given') + contributor.orcid = author.get('ORCID') + contributor.authenticated_orcid = author.get('authenticated-orcid') + if author.get('affiliation'): + try: + aff = load_affiliation(author['affiliation'][0]['name']) + contributor.affiliation = aff + except KeyError: + pass + contributor.save() + return contributor + + +def load_affiliation(affiliation_name): + if affiliation_name: + affiliations = models.Affiliations.objects.filter(name=affiliation_name) + try: + affiliation = affiliations[0] + except IndexError: + affiliation = models.Affiliations() + if affiliation_name: + affiliation.name = affiliation_name + affiliation.save() + return affiliation + + +def run(from_year=1900, resource_type='journal-article'): + #pagination + rawunpaywall = models.RawUnpaywall.objects.filter(year__gte=from_year, resource_type=resource_type) + for item in rawunpaywall: + if not item.is_paratext: + load_article(item.json) + + +if __name__ == '__main__': + run() diff --git a/scholarly_articles/scripts/load_raw_unpaywall.py b/scholarly_articles/scripts/load_raw_unpaywall.py new file mode 100644 index 00000000..a67304e5 --- /dev/null +++ b/scholarly_articles/scripts/load_raw_unpaywall.py @@ -0,0 +1,34 @@ +from scholarly_articles import models + +import json +from datetime import date + + +def load(row): + try: + if row.get('doi'): + rawunpaywall = models.RawUnpaywall.objects.filter(doi=row['doi']) + if len(rawunpaywall) == 0: + rawunpaywall = models.RawUnpaywall() + rawunpaywall.doi = row['doi'] + rawunpaywall.harvesting_creation = date.today() + else: + return + + rawunpaywall.is_paratext = row.get('is_paratext') + rawunpaywall.year = row.get('year') + rawunpaywall.resource_type = row.get('genre') + try: + rawunpaywall.update = row.get('updated')[:10] + except TypeError: + pass + rawunpaywall.json = row + rawunpaywall.save() + except KeyError: + pass + + +def run(): + data = (list(json.loads(x) for x in open('scholarly_articles/scripts/examples.json'))) + for row in data: + load(row) diff --git a/scholarly_articles/wagtail_hooks.py b/scholarly_articles/wagtail_hooks.py index 0ccadaeb..39db5a70 100644 --- a/scholarly_articles/wagtail_hooks.py +++ b/scholarly_articles/wagtail_hooks.py @@ -1,54 +1,111 @@ from django.utils.translation import gettext as _ -from wagtail.contrib.modeladmin.options import (ModelAdmin, modeladmin_register) +from wagtail.contrib.modeladmin.options import (ModelAdmin, modeladmin_register, ModelAdminGroup) -from .models import (ScholarlyArticles, Contributors) +from .models import (ScholarlyArticles, Contributors, Affiliations, Journals, RawUnpaywall) class ScholarlyArticlesAdmin(ModelAdmin): model = ScholarlyArticles - menu_label = 'Scholarly Articles' # ditch this to use verbose_name_plural from model - menu_icon = 'folder' # change as required - menu_order = 200 # will put in 3rd place (000 being 1st, 100 2nd) + menu_label = _('Scholarly Articles') # ditch this to use verbose_name_plural from model + menu_icon = 'folder-open-inverse' # change as required + #menu_order = 100 # will put in 3rd place (000 being 1st, 100 2nd) add_to_settings_menu = False # or True to add your model to the Settings sub-menu exclude_from_explorer = False # or True to exclude pages of this type from Wagtail's explorer view + + def all_contributors(self, obj): + return " | ".join([str(c) for c in obj.contributors.all()]) + list_display = ( 'doi', - 'doi_url', - 'genre', - 'is_oa', - 'journal_is_in_doaj', - 'journal_issns', + 'year', + 'all_contributors', + 'journal', + ) + + list_filter = ('year',) + search_fields = ('doi',) + + +class RawUnpaywallAdmin(ModelAdmin): + model = RawUnpaywall + menu_label = _('RawUnpaywall') # ditch this to use verbose_name_plural from model + menu_icon = 'folder-open-inverse' # change as required + #menu_order = 100 # will put in 3rd place (000 being 1st, 100 2nd) + add_to_settings_menu = False # or True to add your model to the Settings sub-menu + exclude_from_explorer = False # or True to exclude pages of this type from Wagtail's explorer view + + list_display = ( + 'doi', + 'harvesting_creation', + 'is_paratext', + 'year', + 'resource_type', + 'update', + 'json', + ) + + list_filter = (_('year'),) + search_fields = ('doi',) + + +class JournalsAdmin(ModelAdmin): + model = Journals + menu_label = _('Journals') # ditch this to use verbose_name_plural from model + menu_icon = 'folder-open-inverse' # change as required + #menu_order = 000 # will put in 3rd place (000 being 1st, 100 2nd) + add_to_settings_menu = False # or True to add your model to the Settings sub-menu + exclude_from_explorer = False # or True to exclude pages of this type from Wagtail's explorer view + + list_display = ( 'journal_issn_l', + 'journal_issns', 'journal_name', - 'published_date', 'publisher', - 'title', - 'article_json', + 'journal_is_in_doaj', ) - list_filter = ('journal_issn_l',) - search_fields = ('doi', 'journal_issn_l') + + list_filter = (_('journal_issn_l'),) + search_fields = (_('journal_issn_l)'),) class ContributorsAdmin(ModelAdmin): model = Contributors - menu_label = 'Contributors' - menu_icon = 'folder' - menu_order = 300 + menu_label = _('Contributors') + menu_icon = 'folder-open-inverse' + #menu_order = 200 add_to_settings_menu = False # or True to add your model to the Settings sub-menu exclude_from_explorer = False # or True to exclude pages of this type from Wagtail's explorer view + list_display = ( - 'doi', - 'doi_url', 'family', 'given', 'orcid', 'authenticated_orcid', 'affiliation', ) - list_filter = ('orcid',) - search_fields = ('doi', 'orcid') + list_filter = ('affiliation',) + search_fields = ('orcid',) + + +class AffiliationsAdmin(ModelAdmin): + model = Affiliations + menu_label = _('Affiliations') + menu_icon = 'folder-open-inverse' + add_to_settings_menu = False # or True to add your model to the Settings sub-menu + exclude_from_explorer = False # or True to exclude pages of this type from Wagtail's explorer view + list_display = ( + 'name', + ) + list_filter = ('name',) + search_fields = ('name',) + + +class ScholarlyArticlesAdminGroup(ModelAdminGroup): + menu_label = _('Articles Directory') + menu_icon = 'folder-open-inverse' # change as required + menu_order = 200 # will put in 3rd place (000 being 1st, 100 2nd) + items = (JournalsAdmin, ScholarlyArticlesAdmin, ContributorsAdmin, AffiliationsAdmin, RawUnpaywallAdmin,) -modeladmin_register(ScholarlyArticlesAdmin) -modeladmin_register(ContributorsAdmin) +modeladmin_register(ScholarlyArticlesAdminGroup)