From 70d226764051e2b5b6fa76ae5ad153d59031cbc4 Mon Sep 17 00:00:00 2001 From: jesusbagpuss Date: Tue, 30 Jan 2024 12:56:18 +0000 Subject: [PATCH 1/3] Update links to v4.5 schema/docs --- cfg/cfg.d/z_datacitedoi.pl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cfg/cfg.d/z_datacitedoi.pl b/cfg/cfg.d/z_datacitedoi.pl index 99409bc..8ccd899 100644 --- a/cfg/cfg.d/z_datacitedoi.pl +++ b/cfg/cfg.d/z_datacitedoi.pl @@ -61,11 +61,13 @@ $c->{datacitedoi}{xmlns} = "http://datacite.org/schema/kernel-4"; # Try this instead: # $c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." ".$c->{datacitedoi}{xmlns}."/metadata.xsd"; -$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.4/metadata.xsd"; +$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; -# Need to map eprint type (article, dataset etc) to DOI ResourceType -# Controlled list https://schema.datacite.org/meta/kernel-4.4/doc/DataCite-MetadataKernel_v4.4.pdf -# where v is the ResourceType and a is the resourceTypeGeneral +# Need to map eprint type (article, dataset etc) to DOI ResourceType. +# where 'v' is the (free-text) ResourceType and 'a' is the (controlled) resourceTypeGeneral detailed below as 'resourceType'. +# Controlled list: +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd $c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'JournalArticle'}; $c->{datacitedoi}{typemap}{book_section} = {v=>'Book Section',a=>'BookChapter'}; $c->{datacitedoi}{typemap}{monograph} = {v=>'Monograph',a=>'Text'}; @@ -89,7 +91,9 @@ $c->{datacitedoi}{typemap}{collection} = {v=>'Collection',a=>'Collection'}; # Need to map contributor type to DOI contributorType -# Controlled list https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf +# Controlled list: +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/contributorType/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-contributorType-v4.xsd $c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/MDC'} = 'ContactPerson'; $c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/PRC'} = 'ContactPerson'; $c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/COL'} = 'DataCollector'; @@ -107,7 +111,9 @@ $c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/SPN'} = 'Sponsor'; # Need to map dates date type to DOI dateType -# Controlled list https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf +# Controlled list: +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/dateType/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-dateType-v4.xsd $c->{datacitedoi}{datemap}{accepted} = 'Accepted'; $c->{datacitedoi}{datemap}{submitted} = 'Submitted'; From 29093f24c4ff7c0fb45d300a7c9b1dd8929aa6a4 Mon Sep 17 00:00:00 2001 From: jesusbagpuss Date: Tue, 30 Jan 2024 21:14:38 +0000 Subject: [PATCH 2/3] Update default mapping for 'other'. Fixes #59 --- cfg/cfg.d/z_datacitedoi.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cfg/cfg.d/z_datacitedoi.pl b/cfg/cfg.d/z_datacitedoi.pl index 8ccd899..b460f81 100644 --- a/cfg/cfg.d/z_datacitedoi.pl +++ b/cfg/cfg.d/z_datacitedoi.pl @@ -85,7 +85,7 @@ $c->{datacitedoi}{typemap}{dataset} = {v=>'Dataset',a=>'Dataset'}; $c->{datacitedoi}{typemap}{experiment} = {v=>'Experiment',a=>'Text'}; $c->{datacitedoi}{typemap}{teaching_resource} = {v=>'Teaching Resource',a=>'InteractiveResource'}; -$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Collection'}; +$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Other'}; #For use with recollect $c->{datacitedoi}{typemap}{data_collection} = {v=>'Dataset',a=>'Dataset'}; $c->{datacitedoi}{typemap}{collection} = {v=>'Collection',a=>'Collection'}; From 2a96e03da357b414aa9122b701be148c3bf78974 Mon Sep 17 00:00:00 2001 From: jesusbagpuss Date: Mon, 20 May 2024 23:21:43 +0100 Subject: [PATCH 3/3] Updates for v4.5 --- README.md | 58 ++++++++++++++++++++++------ cfg/cfg.d/z_datacitedoi.pl | 24 ++++++++++++ lib/cfg.d/z_datacite_mapping.pl | 67 ++++++++++++++++++++++++++++++--- 3 files changed, 131 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index f10ea61..fbb5a43 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ DateCiteDoi - A plugin to mint DataCite DOIs to eprints ======================================================== +Updates +------- +- 2024-05-20: Support for DataCite v4.5 Schema + - New 'Instrument' item type + - New identifier attributes for Publisher + - Update default mapping for 'Other' eprint types + Requirements ------------- @@ -86,33 +93,60 @@ $c->{datacitedoi}{minters} = "eprint/edit:editor"; # eg World Data Center for Climate (WDCC); $c->{datacitedoi}{publisher} = "EPrints Repo"; +# DataCite 4.5 also defines publisherIdentifier, publisherIdentifierScheme and schemeURI. +# The hash below can be used to define URLs for publishers that exist in the EPrint record. +# +# If a publisher doesn't match any of the keys, the extra attributes won't be used. +# The scheme and URI attributes will be calculated using the 'identifiermap' below. +# By default the map understands ROR, DOI, Wikidata, ISNI, VIAF and re4data URLs. +# +# At a minimum, if you have a default publisher set, configure their identifier. +# +$c->{datacitedoi}{publishers}{ids} = { + # "Publisher" => "Identifier-URL", #Example format + # "EPrints Repo" => "https://ror.org/04z8jg394", +}; +# This array is used to work out what schema the identifier belongs to. +# The key is used as the 'publisherIdentifierSchema', and the regexes are used against the identifiers defined above +$c->{datacitedoi}{publishers}{identifiermap} = [ + { id => "ROR", uri => "https://ror.org/", regex => qr!^https://ror\.org/0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$! }, #https://ror.readme.io/docs/ror-identifier-pattern + { id => "DOI", uri => "https://doi.org/", regex => qr!^https?://doi\.org/10\.\d{4,}/\S+$! }, + { id => "DOI", uri => "https://doi.org/", regex => qr!^https?://dx\.doi\.org/10\.\d{4,}/\S+$! }, + { id => "Wikidata", uri => "https://www.wikidata.org/wiki/", regex => qr!^https://www\.wikidata\.org/(wiki|entity)/\S+$! }, + { id => "ISNI", uri => "http://isni.org/", regex => qr!^https?://isni\.org/isni/\S+$! }, + { id => "VIAF", uri => "http://viaf.org/", regex => qr!^https?://viaf\.org/viaf/\S+$! }, + { id => "re3data", uri=> "https://re3data.org/", regex => qr!^https?://(?:www\.)re3data\.org/\S+$! }, +]; # Namespace and location for DataCite XML schema # feel free to update, though no guarantees it'll be accepted if you do $c->{datacitedoi}{xmlns} = "http://datacite.org/schema/kernel-4"; # Try this instead: # $c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." ".$c->{datacitedoi}{xmlns}."/metadata.xsd"; -$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." http://schema.datacite.org/meta/kernel-4/metadata.xsd"; - -# Need to map eprint type (article, dataset etc) to DOI ResourceType -# Controlled list http://schema.datacite.org/meta/kernel-4.1/doc/DataCite-MetadataKernel_v4.1.pdf -# where v is the ResourceType and a is the resourceTypeGeneral -#$c->{datacitedoi}{typemap}{book_section} = {v=>'BookSection',a=>'Text'}; -$c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'Text'}; +$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + +# Need to map eprint type (article, dataset etc) to DOI ResourceType. +# where 'v' is the (free-text) ResourceType and 'a' is the (controlled) resourceTypeGeneral detailed below as 'resourceType'. +# Controlled list: +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd +$c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'JournalArticle'}; +$c->{datacitedoi}{typemap}{book_section} = {v=>'Book Section',a=>'BookChapter'}; $c->{datacitedoi}{typemap}{monograph} = {v=>'Monograph',a=>'Text'}; -$c->{datacitedoi}{typemap}{thesis} = {v=>'Thesis',a=>'Text'}; -$c->{datacitedoi}{typemap}{book} = {v=>'Book',a=>'Text'}; +$c->{datacitedoi}{typemap}{conference_item} = {v=>'Conference Paper',a=>'ConferencePaper'}; +$c->{datacitedoi}{typemap}{book} = {v=>'Book',a=>'Book'}; +$c->{datacitedoi}{typemap}{thesis} = {v=>'Thesis',a=>'Dissertation'}; $c->{datacitedoi}{typemap}{patent} = {v=>'Patent',a=>'Text'}; $c->{datacitedoi}{typemap}{artefact} = {v=>'Artefact',a=>'PhysicalObject'}; $c->{datacitedoi}{typemap}{exhibition} = {v=>'Exhibition',a=>'InteractiveResource'}; $c->{datacitedoi}{typemap}{composition} = {v=>'Composition',a=>'Sound'}; $c->{datacitedoi}{typemap}{performance} = {v=>'Performance',a=>'Event'}; $c->{datacitedoi}{typemap}{image} = {v=>'Image',a=>'Image'}; -$c->{datacitedoi}{typemap}{video} = {v=>'Video',a=>'AudioVisual'}; +$c->{datacitedoi}{typemap}{video} = {v=>'Video',a=>'Audiovisual'}; $c->{datacitedoi}{typemap}{audio} = {v=>'Audio',a=>'Sound'}; $c->{datacitedoi}{typemap}{dataset} = {v=>'Dataset',a=>'Dataset'}; $c->{datacitedoi}{typemap}{experiment} = {v=>'Experiment',a=>'Text'}; -$c->{datacitedoi}{typemap}{teaching_resource} = {v=>'TeachingResourse',a=>'InteractiveResource'}; -$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Collection'}; +$c->{datacitedoi}{typemap}{teaching_resource} = {v=>'Teaching Resource',a=>'InteractiveResource'}; +$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Other'}; #For use with recollect $c->{datacitedoi}{typemap}{data_collection} = {v=>'Dataset',a=>'Dataset'}; $c->{datacitedoi}{typemap}{collection} = {v=>'Collection',a=>'Collection'}; diff --git a/cfg/cfg.d/z_datacitedoi.pl b/cfg/cfg.d/z_datacitedoi.pl index b460f81..939ea99 100644 --- a/cfg/cfg.d/z_datacitedoi.pl +++ b/cfg/cfg.d/z_datacitedoi.pl @@ -56,6 +56,30 @@ # eg World Data Center for Climate (WDCC); $c->{datacitedoi}{publisher} = "EPrints Repo"; +# DataCite 4.5 also defines publisherIdentifier, publisherIdentifierScheme and schemeURI. +# The hash below can be used to define URLs for publishers that exist in the EPrint record. +# +# If a publisher doesn't match any of the keys, the extra attributes won't be used. +# The scheme and URI attributes will be calculated using the 'identifiermap' below. +# By default the map understands ROR, DOI, Wikidata, ISNI, VIAF and re4data URLs. +# +# At a minimum, if you have a default publisher set, configure their identifier. +# +$c->{datacitedoi}{publishers}{ids} = { + # "Publisher" => "Identifier-URL", #Example format + # "EPrints Repo" => "https://ror.org/04z8jg394", +}; +# This array is used to work out what schema the identifier belongs to. +# The key is used as the 'publisherIdentifierSchema', and the regexes are used against the identifiers defined above +$c->{datacitedoi}{publishers}{identifiermap} = [ + { id => "ROR", uri => "https://ror.org/", regex => qr!^https://ror\.org/0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$! }, #https://ror.readme.io/docs/ror-identifier-pattern + { id => "DOI", uri => "https://doi.org/", regex => qr!^https?://doi\.org/10\.\d{4,}/\S+$! }, + { id => "DOI", uri => "https://doi.org/", regex => qr!^https?://dx\.doi\.org/10\.\d{4,}/\S+$! }, + { id => "Wikidata", uri => "https://www.wikidata.org/wiki/", regex => qr!^https://www\.wikidata\.org/(wiki|entity)/\S+$! }, + { id => "ISNI", uri => "http://isni.org/", regex => qr!^https?://isni\.org/isni/\S+$! }, + { id => "VIAF", uri => "http://viaf.org/", regex => qr!^https?://viaf\.org/viaf/\S+$! }, + { id => "re3data", uri=> "https://re3data.org/", regex => qr!^https?://(?:www\.)re3data\.org/\S+$! }, +]; # Namespace and location for DataCite XML schema # feel free to update, though no guarantees it'll be accepted if you do $c->{datacitedoi}{xmlns} = "http://datacite.org/schema/kernel-4"; diff --git a/lib/cfg.d/z_datacite_mapping.pl b/lib/cfg.d/z_datacite_mapping.pl index 441aee3..9725874 100644 --- a/lib/cfg.d/z_datacite_mapping.pl +++ b/lib/cfg.d/z_datacite_mapping.pl @@ -17,7 +17,9 @@ ################################################## # resourceType this is derived from the eprint.type and the datacitedoi->{typemap} in cfg/cfg.d/z_datacite.pl -# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#resourceType +# Controlled list: +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/#a-resourcetypegeneral +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd $c->{datacite_mapping_type} = sub { @@ -36,6 +38,7 @@ Dissertation Event Image + Instrument InteractiveResource Journal JournalArticle @@ -49,6 +52,7 @@ Software Sound Standard + StudyRegistration Text Workflow Other @@ -76,7 +80,8 @@ ############################################################### # creators this is derived from creators and/or corp_creators -# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#creators +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/creator/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#creators $c->{datacite_mapping_creators} = sub { @@ -144,7 +149,8 @@ ############################################################### # contributors this is derived from contributors -# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#creators +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/contributorType/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-contributorType-v4.xsd $c->{datacite_mapping_contributors} = sub { @@ -235,7 +241,8 @@ ################################################## # titles this is derived from the eprint.title -# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#titles +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/title/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#titles $c->{datacite_eprint_mapping_title} = sub { my($xml, $dataobj, $repo) = @_; @@ -250,8 +257,10 @@ }; ##################################################### +# 2024-01-22 Updated in Schema 4.5 to include publisherIdentifier, publisherIdentifierScheme and schemeURI # publisher this is derived from the eprint.publisher -# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#publisher +# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/publisher/ +# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#publisher $c->{datacite_mapping_publisher} = sub { @@ -261,8 +270,11 @@ if($dataobj->exists_and_set("publisher")){ $publisher = $dataobj->render_value("publisher"); } - return $xml->create_data_element("publisher", $publisher); + # returns hash of attributes to add to publisher element or undef if an ID isn't found. + my $pub_id = $repo->call( ["datacitedoi", "get_publisher_identifier"], $repo, $publisher ); + + return $xml->create_data_element("publisher", $publisher, %$pub_id ); }; ################################################## @@ -1020,3 +1032,46 @@ return 0; }; + +# This has been defined as a function to allow repositories to do more than just add values to a hash, e.g. +# values could be retrieved from other sources/config files. +# +# By default a hash is defined keyed on the publisher. +$c->{datacitedoi}->{get_publisher_identifier} = sub { + my( $repo, $publisher ) = @_; + + my $publisher_id = $repo->get_conf( "datacitedoi", "publishers", "ids", $publisher ); + return if !defined $publisher_id; + + if( defined $repo->get_conf( "datacitedoi", "publishers", "identifiermap" ) ) + { + foreach my $id_details (@{ $repo->get_conf( "datacitedoi", "publishers", "identifiermap" ) }) + { + if( $publisher_id =~ /$id_details->{regex}/ ) + { + return { + publisherIdentifier => $publisher_id, + publisherIdentifierScheme => $id_details->{id}, + schemeURI => $id_details->{uri} + }; + } + } + } + return; # don't return empty string +}; + +$c->{datacitedoi}->{validate_publisher_identifier} = sub { + my( $repo, $publisher ) = @_; + + my $publisher_info = $repo->call( [ "datacitedoi", "get_publisher_identifier" ], $repo, $publisher ); + + # having no publisher identifiersnfo is OK + return 1 if !defined $publisher_info; + + return 1 if ( EPrints::Utils::is_set( $publisher_info->{'id'} ) && + EPrints::Utils::is_set( $publisher_info->{'idScheme'} ) && + EPrints::Utils::is_set( $publisher_info->{'schemeUri'} ) + ); #TODO schemaUri is not mandatory, but should(?) match id? + + return 0; +};