Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datacite schema 4.5 #60

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
DateCiteDoi - A plugin to mint DataCite DOIs to eprints
========================================================

Updates
-------
- 2024-05-20: Support for DataCite v4.5 Schema
- New 'Instrument' item type
- New identifier attributes for Publisher
- Update default mapping for 'Other' eprint types

Requirements
-------------

Expand Down Expand Up @@ -86,33 +93,60 @@ $c->{datacitedoi}{minters} = "eprint/edit:editor";
# eg World Data Center for Climate (WDCC);
$c->{datacitedoi}{publisher} = "EPrints Repo";

# DataCite 4.5 also defines publisherIdentifier, publisherIdentifierScheme and schemeURI.
# The hash below can be used to define URLs for publishers that exist in the EPrint record.
#
# If a publisher doesn't match any of the keys, the extra attributes won't be used.
# The scheme and URI attributes will be calculated using the 'identifiermap' below.
# By default the map understands ROR, DOI, Wikidata, ISNI, VIAF and re4data URLs.
#
# At a minimum, if you have a default publisher set, configure their identifier.
#
$c->{datacitedoi}{publishers}{ids} = {
# "Publisher" => "Identifier-URL", #Example format
# "EPrints Repo" => "https://ror.org/04z8jg394",
};
# This array is used to work out what schema the identifier belongs to.
# The key is used as the 'publisherIdentifierSchema', and the regexes are used against the identifiers defined above
$c->{datacitedoi}{publishers}{identifiermap} = [
{ id => "ROR", uri => "https://ror.org/", regex => qr!^https://ror\.org/0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$! }, #https://ror.readme.io/docs/ror-identifier-pattern
{ id => "DOI", uri => "https://doi.org/", regex => qr!^https?://doi\.org/10\.\d{4,}/\S+$! },
{ id => "DOI", uri => "https://doi.org/", regex => qr!^https?://dx\.doi\.org/10\.\d{4,}/\S+$! },
{ id => "Wikidata", uri => "https://www.wikidata.org/wiki/", regex => qr!^https://www\.wikidata\.org/(wiki|entity)/\S+$! },
{ id => "ISNI", uri => "http://isni.org/", regex => qr!^https?://isni\.org/isni/\S+$! },
{ id => "VIAF", uri => "http://viaf.org/", regex => qr!^https?://viaf\.org/viaf/\S+$! },
{ id => "re3data", uri=> "https://re3data.org/", regex => qr!^https?://(?:www\.)re3data\.org/\S+$! },
];
# Namespace and location for DataCite XML schema
# feel free to update, though no guarantees it'll be accepted if you do
$c->{datacitedoi}{xmlns} = "http://datacite.org/schema/kernel-4";
# Try this instead:
# $c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." ".$c->{datacitedoi}{xmlns}."/metadata.xsd";
$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." http://schema.datacite.org/meta/kernel-4/metadata.xsd";

# Need to map eprint type (article, dataset etc) to DOI ResourceType
# Controlled list http://schema.datacite.org/meta/kernel-4.1/doc/DataCite-MetadataKernel_v4.1.pdf
# where v is the ResourceType and a is the resourceTypeGeneral
#$c->{datacitedoi}{typemap}{book_section} = {v=>'BookSection',a=>'Text'};
$c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'Text'};
$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.5/metadata.xsd";

# Need to map eprint type (article, dataset etc) to DOI ResourceType.
# where 'v' is the (free-text) ResourceType and 'a' is the (controlled) resourceTypeGeneral detailed below as 'resourceType'.
# Controlled list:
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd
$c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'JournalArticle'};
$c->{datacitedoi}{typemap}{book_section} = {v=>'Book Section',a=>'BookChapter'};
$c->{datacitedoi}{typemap}{monograph} = {v=>'Monograph',a=>'Text'};
$c->{datacitedoi}{typemap}{thesis} = {v=>'Thesis',a=>'Text'};
$c->{datacitedoi}{typemap}{book} = {v=>'Book',a=>'Text'};
$c->{datacitedoi}{typemap}{conference_item} = {v=>'Conference Paper',a=>'ConferencePaper'};
$c->{datacitedoi}{typemap}{book} = {v=>'Book',a=>'Book'};
$c->{datacitedoi}{typemap}{thesis} = {v=>'Thesis',a=>'Dissertation'};
$c->{datacitedoi}{typemap}{patent} = {v=>'Patent',a=>'Text'};
$c->{datacitedoi}{typemap}{artefact} = {v=>'Artefact',a=>'PhysicalObject'};
$c->{datacitedoi}{typemap}{exhibition} = {v=>'Exhibition',a=>'InteractiveResource'};
$c->{datacitedoi}{typemap}{composition} = {v=>'Composition',a=>'Sound'};
$c->{datacitedoi}{typemap}{performance} = {v=>'Performance',a=>'Event'};
$c->{datacitedoi}{typemap}{image} = {v=>'Image',a=>'Image'};
$c->{datacitedoi}{typemap}{video} = {v=>'Video',a=>'AudioVisual'};
$c->{datacitedoi}{typemap}{video} = {v=>'Video',a=>'Audiovisual'};
$c->{datacitedoi}{typemap}{audio} = {v=>'Audio',a=>'Sound'};
$c->{datacitedoi}{typemap}{dataset} = {v=>'Dataset',a=>'Dataset'};
$c->{datacitedoi}{typemap}{experiment} = {v=>'Experiment',a=>'Text'};
$c->{datacitedoi}{typemap}{teaching_resource} = {v=>'TeachingResourse',a=>'InteractiveResource'};
$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Collection'};
$c->{datacitedoi}{typemap}{teaching_resource} = {v=>'Teaching Resource',a=>'InteractiveResource'};
$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Other'};
#For use with recollect
$c->{datacitedoi}{typemap}{data_collection} = {v=>'Dataset',a=>'Dataset'};
$c->{datacitedoi}{typemap}{collection} = {v=>'Collection',a=>'Collection'};
Expand Down
44 changes: 37 additions & 7 deletions cfg/cfg.d/z_datacitedoi.pl
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,42 @@
# eg World Data Center for Climate (WDCC);
$c->{datacitedoi}{publisher} = "EPrints Repo";

# DataCite 4.5 also defines publisherIdentifier, publisherIdentifierScheme and schemeURI.
# The hash below can be used to define URLs for publishers that exist in the EPrint record.
#
# If a publisher doesn't match any of the keys, the extra attributes won't be used.
# The scheme and URI attributes will be calculated using the 'identifiermap' below.
# By default the map understands ROR, DOI, Wikidata, ISNI, VIAF and re4data URLs.
#
# At a minimum, if you have a default publisher set, configure their identifier.
#
$c->{datacitedoi}{publishers}{ids} = {
# "Publisher" => "Identifier-URL", #Example format
# "EPrints Repo" => "https://ror.org/04z8jg394",
};
# This array is used to work out what schema the identifier belongs to.
# The key is used as the 'publisherIdentifierSchema', and the regexes are used against the identifiers defined above
$c->{datacitedoi}{publishers}{identifiermap} = [
{ id => "ROR", uri => "https://ror.org/", regex => qr!^https://ror\.org/0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$! }, #https://ror.readme.io/docs/ror-identifier-pattern
{ id => "DOI", uri => "https://doi.org/", regex => qr!^https?://doi\.org/10\.\d{4,}/\S+$! },
{ id => "DOI", uri => "https://doi.org/", regex => qr!^https?://dx\.doi\.org/10\.\d{4,}/\S+$! },
{ id => "Wikidata", uri => "https://www.wikidata.org/wiki/", regex => qr!^https://www\.wikidata\.org/(wiki|entity)/\S+$! },
{ id => "ISNI", uri => "http://isni.org/", regex => qr!^https?://isni\.org/isni/\S+$! },
{ id => "VIAF", uri => "http://viaf.org/", regex => qr!^https?://viaf\.org/viaf/\S+$! },
{ id => "re3data", uri=> "https://re3data.org/", regex => qr!^https?://(?:www\.)re3data\.org/\S+$! },
];
# Namespace and location for DataCite XML schema
# feel free to update, though no guarantees it'll be accepted if you do
$c->{datacitedoi}{xmlns} = "http://datacite.org/schema/kernel-4";
# Try this instead:
# $c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." ".$c->{datacitedoi}{xmlns}."/metadata.xsd";
$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.4/metadata.xsd";
$c->{datacitedoi}{schemaLocation} = $c->{datacitedoi}{xmlns}." https://schema.datacite.org/meta/kernel-4.5/metadata.xsd";

# Need to map eprint type (article, dataset etc) to DOI ResourceType
# Controlled list https://schema.datacite.org/meta/kernel-4.4/doc/DataCite-MetadataKernel_v4.4.pdf
# where v is the ResourceType and a is the resourceTypeGeneral
# Need to map eprint type (article, dataset etc) to DOI ResourceType.
# where 'v' is the (free-text) ResourceType and 'a' is the (controlled) resourceTypeGeneral detailed below as 'resourceType'.
# Controlled list:
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd
$c->{datacitedoi}{typemap}{article} = {v=>'Article',a=>'JournalArticle'};
$c->{datacitedoi}{typemap}{book_section} = {v=>'Book Section',a=>'BookChapter'};
$c->{datacitedoi}{typemap}{monograph} = {v=>'Monograph',a=>'Text'};
Expand All @@ -83,13 +109,15 @@
$c->{datacitedoi}{typemap}{dataset} = {v=>'Dataset',a=>'Dataset'};
$c->{datacitedoi}{typemap}{experiment} = {v=>'Experiment',a=>'Text'};
$c->{datacitedoi}{typemap}{teaching_resource} = {v=>'Teaching Resource',a=>'InteractiveResource'};
$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Collection'};
$c->{datacitedoi}{typemap}{other} = {v=>'Misc',a=>'Other'};
#For use with recollect
$c->{datacitedoi}{typemap}{data_collection} = {v=>'Dataset',a=>'Dataset'};
$c->{datacitedoi}{typemap}{collection} = {v=>'Collection',a=>'Collection'};

# Need to map contributor type to DOI contributorType
# Controlled list https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf
# Controlled list:
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/contributorType/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-contributorType-v4.xsd
$c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/MDC'} = 'ContactPerson';
$c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/PRC'} = 'ContactPerson';
$c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/COL'} = 'DataCollector';
Expand All @@ -107,7 +135,9 @@
$c->{datacitedoi}{contributormap}{'http://www.loc.gov/loc.terms/relators/SPN'} = 'Sponsor';

# Need to map dates date type to DOI dateType
# Controlled list https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf
# Controlled list:
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/dateType/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-dateType-v4.xsd
$c->{datacitedoi}{datemap}{accepted} = 'Accepted';
$c->{datacitedoi}{datemap}{submitted} = 'Submitted';

Expand Down
67 changes: 61 additions & 6 deletions lib/cfg.d/z_datacite_mapping.pl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

##################################################
# resourceType this is derived from the eprint.type and the datacitedoi->{typemap} in cfg/cfg.d/z_datacite.pl
# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#resourceType
# Controlled list:
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/resourcetype/#a-resourcetypegeneral
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-resourceType-v4.xsd

$c->{datacite_mapping_type} = sub {

Expand All @@ -36,6 +38,7 @@
Dissertation
Event
Image
Instrument
InteractiveResource
Journal
JournalArticle
Expand All @@ -49,6 +52,7 @@
Software
Sound
Standard
StudyRegistration
Text
Workflow
Other
Expand Down Expand Up @@ -76,7 +80,8 @@

###############################################################
# creators this is derived from creators and/or corp_creators
# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#creators
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/creator/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#creators

$c->{datacite_mapping_creators} = sub {

Expand Down Expand Up @@ -144,7 +149,8 @@

###############################################################
# contributors this is derived from contributors
# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#creators
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/contributorType/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/include/datacite-contributorType-v4.xsd

$c->{datacite_mapping_contributors} = sub {

Expand Down Expand Up @@ -235,7 +241,8 @@

##################################################
# titles this is derived from the eprint.title
# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#titles
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/title/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#titles

$c->{datacite_eprint_mapping_title} = sub {
my($xml, $dataobj, $repo) = @_;
Expand All @@ -250,8 +257,10 @@
};

#####################################################
# 2024-01-22 Updated in Schema 4.5 to include publisherIdentifier, publisherIdentifierScheme and schemeURI
# publisher this is derived from the eprint.publisher
# https://schema.datacite.org/meta/kernel-4.0/metadata.xsd#publisher
# - Docs: https://datacite-metadata-schema.readthedocs.io/en/4.5/properties/publisher/
# - Xsd: https://schema.datacite.org/meta/kernel-4.5/metadata.xsd#publisher

$c->{datacite_mapping_publisher} = sub {

Expand All @@ -261,8 +270,11 @@
if($dataobj->exists_and_set("publisher")){
$publisher = $dataobj->render_value("publisher");
}
return $xml->create_data_element("publisher", $publisher);

# returns hash of attributes to add to publisher element or undef if an ID isn't found.
my $pub_id = $repo->call( ["datacitedoi", "get_publisher_identifier"], $repo, $publisher );

return $xml->create_data_element("publisher", $publisher, %$pub_id );
};

##################################################
Expand Down Expand Up @@ -1020,3 +1032,46 @@

return 0;
};

# This has been defined as a function to allow repositories to do more than just add values to a hash, e.g.
# values could be retrieved from other sources/config files.
#
# By default a hash is defined keyed on the publisher.
$c->{datacitedoi}->{get_publisher_identifier} = sub {
my( $repo, $publisher ) = @_;

my $publisher_id = $repo->get_conf( "datacitedoi", "publishers", "ids", $publisher );
return if !defined $publisher_id;

if( defined $repo->get_conf( "datacitedoi", "publishers", "identifiermap" ) )
{
foreach my $id_details (@{ $repo->get_conf( "datacitedoi", "publishers", "identifiermap" ) })
{
if( $publisher_id =~ /$id_details->{regex}/ )
{
return {
publisherIdentifier => $publisher_id,
publisherIdentifierScheme => $id_details->{id},
schemeURI => $id_details->{uri}
};
}
}
}
return; # don't return empty string
};

$c->{datacitedoi}->{validate_publisher_identifier} = sub {
my( $repo, $publisher ) = @_;

my $publisher_info = $repo->call( [ "datacitedoi", "get_publisher_identifier" ], $repo, $publisher );

# having no publisher identifiersnfo is OK
return 1 if !defined $publisher_info;

return 1 if ( EPrints::Utils::is_set( $publisher_info->{'id'} ) &&
EPrints::Utils::is_set( $publisher_info->{'idScheme'} ) &&
EPrints::Utils::is_set( $publisher_info->{'schemeUri'} )
); #TODO schemaUri is not mandatory, but should(?) match id?

return 0;
};