Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tool to import and link muziekweb and imslp musiccompositions #14

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions Pipfile

This file was deleted.

759 changes: 0 additions & 759 deletions Pipfile.lock

This file was deleted.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ It also imports basic identifiers for the following sources

## Installation

pip install requirements.txt
pip install -r requirements.txt

## Running the application

Expand Down Expand Up @@ -54,4 +54,4 @@ and -mwp.
Copyright 2020 Music Technology Group, Universitat Pompeu Fabra
Copyright 2020 Muziekweb

Licensed under the Apache License, Version 2.0. See LICENSE for more information
Licensed under the Apache License, Version 2.0. See LICENSE for more information
69 changes: 69 additions & 0 deletions ceimport/imslp_muziekweb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os

import click
import csv

import muziekweb_api
from trompace import config

import importers
from ceimport.loader import load_musiccomposition_from_imslp_name, link_musiccomposition_exactmatch
from importers.work import import_work


def auth():
if 'MUZIEKWEB_USER' not in os.environ or 'MUZIEKWEB_PASS' not in os.environ:
raise ValueError("Require MUZIEKWEB_USER and MUZIEKWEB_PASS environment variables")
muziekweb_api.set_api_account(os.environ['MUZIEKWEB_USER'], os.environ['MUZIEKWEB_PASS'])
config.config.load()


def import_link(item):
imslp = item['imslp']
# Go from url to page name
imslp = imslp.replace("https://imslp.org/wiki/", "").replace("_", " ")
mw = item['mw']

mw_id = import_work(mw)
imslp_id = load_musiccomposition_from_imslp_name(imslp, False)
link_musiccomposition_exactmatch([mw_id, imslp_id])


@click.group()
def cli():
pass


@cli.command("import-artist")
@click.argument('artistid')
def import_artist_command(artistid):
auth()
a_id = importers.import_artist(artistid)
print(a_id)


@cli.command("import-work")
@click.argument('workid')
def import_work_command(workid):
auth()
w_id = import_work(workid)
print(w_id)


@cli.command("import")
@click.argument('datafile')
def import_data(datafile):
auth()
data = []
with open(datafile) as fp:
reader = csv.DictReader(fp)
for line in reader:
if line['match_score'] == "100":
data.append({'mw': line['uniform_title_id'], 'imslp': line['imslp_link']})

for item in data:
import_link(item)


if __name__ == '__main__':
cli()
10 changes: 7 additions & 3 deletions ceimport/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,10 @@ def load_artist_from_imslp(url):
if wp_person:
people.append(wp_person)
if 'musicbrainz' in rels:
mb_person = musicbrainz.load_person_from_musicbrainz(rels['musicbrainz'])
people.append(mb_person)
# TODO: This is broken
pass
# mb_person = musicbrainz.load_person_from_musicbrainz(rels['musicbrainz'])
# people.append(mb_person)
if 'isni' in rels:
isni_person = isni.load_person_from_isni(rels['isni'])
people.append(isni_person)
Expand Down Expand Up @@ -403,7 +405,7 @@ def load_musiccomposition_from_imslp_name(imslp_name, load_files=True):
link_musiccomposition_exactmatch([composition_id, mb_work_ceid])

if not load_files:
return
return composition_id

wikitext = imslp.get_wiki_content_for_pages([imslp_name])
files = imslp.files_for_work(wikitext[0])
Expand Down Expand Up @@ -442,6 +444,8 @@ def load_musiccomposition_from_imslp_name(imslp_name, load_files=True):
# TODO: We should check if this is the case all the time.
link_mediaobject_was_derived_from(source_id=xmlmediaobject_ceid,
derived_id=pdfmediaobject_ceid)

return composition_id
else:
logger.info(" - No composer??, skipping")

Expand Down
104 changes: 39 additions & 65 deletions importers/artist.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,55 @@
"""
Muziekweb artist importer
"""
from typing import Optional
import itertools

import trompace as ce
from SPARQLWrapper import SPARQLWrapper, JSON
from trompace.connection import submit_query
from trompace.mutations.person import mutation_update_person, mutation_create_person
import trompace.connection
from trompace.mutations.person import mutation_create_person, \
mutation_person_add_exact_match_person

from models import CE_Person
from trompace_local import GLOBAL_CONTRIBUTOR, GLOBAL_IMPORTER_REPO, GLOBAL_PUBLISHER, lookupIdentifier
import muziekweb_api
from importers.audio_object import get_person_information


async def import_artist(keys: list):
def import_artist(artist_id: str):
"""Import an artist from Muziekweb and import it to the CE. If the com
Returns a dictionary:
{"musiccomposition_id": musiccomp_ceid,
"person_ids": composer_ids}
TODO: This code is duplicated in audio_object.import_tracks, and should be abstracted out
"""
Imports artists from Muziekweb for all given keys into the Trompa CE.
"""
for key in keys:
print(f"Retrieving artist with key {key} from Muziekweb")
# Get data from Muziekweb
artist = await get_mw_artist(key)

if artist is None:
print(f"No data received for {key}")
continue

artist.identifier = await lookupIdentifier("Person", artist.source)
persons = get_artist(artist_id)

if artist.identifier is not None:
print(f"Updating record {artist.identifier} in Trompa CE", end="")
response = await ce.connection.submit_query(mutation_update_person(**artist.as_dict()))
artist.identifier = response["data"]["UpdatePerson"]["identifier"]
else:
print("Inserting new record in Trompa CE", end="")
response = await ce.connection.submit_query(mutation_create_person(**artist.as_dict()))
artist.identifier = response["data"]["CreatePerson"]["identifier"]
list_person_ids = []
for person in persons:
print("Inserting new person {} in Trompa CE\n".format(person.name))

if artist.identifier is None:
print(" - failed.")
else:
print(" - success.")
response = trompace.connection.submit_query(mutation_create_person(**person.as_dict()), auth_required=True)

print("Importing artists done.")
person.identifier = response["data"]["CreatePerson"]["identifier"]
list_person_ids.append(person.identifier)

for from_id, to_id in itertools.permutations(list_person_ids, 2):
query = mutation_person_add_exact_match_person(from_id, to_id)
response = trompace.connection.submit_query(query, auth_required=True)
print(f" - Linking Person {from_id} to Person {to_id} done.")

async def get_mw_artist(key: str) -> Optional[CE_Person]:
sparql = SPARQLWrapper(
"https://api.data.muziekweb.nl/datasets/muziekweborganization/Muziekweb/services/Muziekweb/sparql")
sparql.setReturnFormat(JSON)
qry = f"""PREFIX schema: <http://schema.org/>
PREFIX vocab: <https://data.muziekweb.nl/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select ?url ?name ?birthYear ?deathYear where {{
BIND(<https://data.muziekweb.nl/Link/{key}> as ?url)
?url vocab:beginYear ?birthYear;
vocab:endYear ?deathYear;
rdfs:label ?name.
}}"""
sparql.setQuery(qry)
# Return CE ID of the muziekweb Person
mw_person = [p for p in persons if p.contributor == "https://www.muziekweb.nl"]
if mw_person:
return mw_person[0]
else:
return None

result = sparql.query().convert()["results"]["bindings"]

if len(result) > 0:
# Now get Muziekweb data
person = CE_Person(
identifier=None,
name=result[0]["name"]["value"],
url=result[0]["url"]["value"],
contributor=GLOBAL_CONTRIBUTOR,
creator=GLOBAL_IMPORTER_REPO,
)

person.publisher = GLOBAL_PUBLISHER
person.description = None
person.birthDate = result[0]["birthYear"]["value"]
person.deathDate = result[0]["deathYear"]["value"]

return person
def get_artist(artist_id: str):
"""Query muziekweb api and parse result
TODO: This code currently skips the check for the artist being a group
"""
artist = muziekweb_api.get_artist_information(artist_id)
perf_name = artist.getElementsByTagName('PresentationName')[0].firstChild.data
perf_text = perf_name.replace(' ', '-')
unif_style = artist.getElementsByTagName('Catalogue')[0].firstChild.data.split(' ')[0]
persons = get_person_information(artist, perf_name, artist_id, perf_text, unif_style)
return persons

return None
31 changes: 17 additions & 14 deletions importers/audio_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

import trompace as ce
from trompace.connection import submit_query
from trompace.mutations.audioobject import mutation_update_audioobject, mutation_create_audioobject, \
mutation_merge_audioobject_exampleofwork
from trompace.mutations.audioobject import mutation_update_audioobject, mutation_create_audioobject
from trompace.mutations.musiccomposition import mutation_update_music_composition, mutation_create_music_composition, \
mutation_merge_music_composition_composer, mutation_merge_music_composition_recorded_as
from trompace.mutations.person import mutation_update_person, mutation_create_person, \
Expand All @@ -18,7 +17,6 @@
mutation_update_musicrecording, mutation_merge_music_recording_audio

from ceimport.sites.isni import load_person_from_isni
# from ceimport.sites.musicbrainz import load_person_data_from_musicbrainz
from ceimport.sites import musicbrainz
from ceimport.sites.viaf import load_person_from_viaf
from ceimport.sites.wikidata import load_person_from_wikidata_url, load_person_from_wikipedia_url
Expand Down Expand Up @@ -337,18 +335,19 @@ def get_mw_audio_1track(key: str) -> [CE_AudioObject]:
artist_type = artist.get('type', None)

if artist_type == 'Group':
music_groups, persons = get_music_group_information(doc_artist, music_groups, persons, num_ext_links, perf_name, perf_link, perf_text, unif_style)
music_groups, persons = get_music_group_information(doc_artist, music_groups, num_ext_links, perf_name, perf_link, perf_text, unif_style)
else:
persons = get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_link, perf_text, unif_style)
persons = get_person_information(doc_artist, perf_name, perf_link, perf_text, unif_style)

return audio_objects, music_recordings, music_works, persons, music_groups

return None, None, None, None, None


def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_link, perf_text, unif_style):
def get_person_information(doc_artist, perf_name, perf_link, perf_text, unif_style):
"""
"""
persons = []
# MW person
person = CE_Person(
identifier=None,
Expand All @@ -362,12 +361,11 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
persons.append(person)

# external links
for pers in range(num_ext_links):

prov_name = doc_artist.getElementsByTagName('ExternalLink')[pers].attributes['Provider'].value
external_links = doc_artist.getElementsByTagName('ExternalLink')
for external in external_links:
prov_name = external.attributes['Provider'].value
print('Searching for person: {} - {}'.format(perf_name, prov_name))
ext_link = doc_artist.getElementsByTagName('ExternalLinks')[0].getElementsByTagName('Link')[
pers].firstChild.data
ext_link = external.getElementsByTagName("Link")[0].firstChild.nodeValue
if prov_name == 'ISNI':
ext_link = MW_MUSIC_URL.format(perf_link, unif_style, ext_link)
ppl = load_person_from_isni(ext_link)
Expand All @@ -380,6 +378,7 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
title=ppl['title'],
source=ppl['source'],
)
persons.append(person)
elif prov_name == 'VIAF':
ppl = load_person_from_viaf(ext_link)
person = CE_Person(
Expand All @@ -391,6 +390,7 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
title=ppl['title'],
source=ppl['source'],
)
persons.append(person)
elif prov_name == 'MUSICBRAINZ':
mbid = ext_link.split('/')[-1]
ppls = musicbrainz.load_artist_from_musicbrainz(mbid)
Expand Down Expand Up @@ -422,6 +422,7 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
source=ppl['source'],
)
person.description = ppl['description']
persons.append(person)
elif prov_name == 'WIKIPEDIA_EN':
en_wiki_link = 'https://en.wikipedia.org/wiki/{}'.format(ext_link)
ppl = load_person_from_wikipedia_url(en_wiki_link, 'en')
Expand All @@ -436,7 +437,7 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
source=ppl['source'],
)
person.description = ppl['description']

persons.append(person)
elif prov_name == 'WIKIPEDIA_NL':
nl_wiki_link = 'https://nl.wikipedia.org/wiki/{}'.format(ext_link)
ppl = load_person_from_wikipedia_url(nl_wiki_link, 'nl')
Expand All @@ -451,6 +452,7 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
source=ppl['source'],
)
person.description = ppl['description']
persons.append(person)
else:
if prov_name == 'ALLMUSIC':
contributor = 'https://www.allmusic.com/'
Expand All @@ -470,14 +472,15 @@ def get_person_information(doc_artist, persons, num_ext_links, perf_name, perf_l
title='{} - {}'.format(perf_name, prov_name),
source=ext_link,
)
persons.append(person)
persons.append(person)
print('External link: {}'.format(ext_link))
return persons


def get_music_group_information(doc_artist, music_groups, persons, num_ext_links, perf_name, perf_link, perf_text, unif_style):
def get_music_group_information(doc_artist, music_groups, num_ext_links, perf_name, perf_link, perf_text, unif_style):
"""
"""
persons = []
# MW Music Group
music_group = CE_MusicGroup(
identifier=None,
Expand Down
Loading