-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieve_scrobbles.py
153 lines (122 loc) · 4.95 KB
/
retrieve_scrobbles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import requests
from datetime import date, datetime, timedelta
from dotenv import load_dotenv
import os
import pandas as pd
import time
load_dotenv()
def get_lastfm_scrobbles(api_key, username, from_timestamp, to_timestamp, page=1):
base_url = "http://ws.audioscrobbler.com/2.0/"
params = {
'method': 'user.getrecenttracks',
'user': username,
'api_key': api_key,
'limit': 200,
'from': from_timestamp,
'to': to_timestamp,
'format': 'json',
'page': page
}
response = requests.get(base_url, params=params)
data = response.json()
if 'error' in data:
print(f"Error: {data['message']}")
return []
scrobbles = data['recenttracks']['track']
return scrobbles
def get_top_tag(api_key, artist):
base_url = "http://ws.audioscrobbler.com/2.0/"
params = {
'method': 'artist.getTopTags',
'artist': artist,
'api_key': api_key,
'format': 'json'
}
response = requests.get(base_url, params=params)
data = response.json()
top_tag = [tag['name'] for tag in data.get('toptags', {}).get('tag', [])]
return top_tag
def organize_scrobbles(scrobbles, api_key):
timestamp_list = []
artist_list = []
track_list = []
album_list = []
album_mbid_list = []
top_tag_list = []
mbid_list = []
artist_mbid_list = []
#tags are on artist-level, and take time to retrieve
#build up a cache of artist tags for artist already captured
top_tag_cache = {}
for scrobble in scrobbles:
timestamp = scrobble.get('date', {}).get('uts', 'N/A')
artist = scrobble['artist']['#text']
artist_mbid = scrobble['artist'].get('mbid', 'N/A')
track_mbid = scrobble['mbid']
track = scrobble['name']
album = scrobble.get('album', {}).get('#text', 'N/A')
album_mbid = scrobble.get('album', {}).get('mbid', 'N/A')
#timestamps in utc format, convert to more readable form
#current listen tracks sometimes flag as 'N/A' so below avoids error when converting N/A
if timestamp != 'N/A':
timestamp = datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
if artist not in top_tag_cache:
top_tag_cache[artist] = get_top_tag(api_key, artist)
tag = top_tag_cache.get(artist)
#append to lists which will be returned
timestamp_list.append(timestamp)
artist_list.append(artist)
track_list.append(track)
album_list.append(album)
album_mbid_list.append(album_mbid)
top_tag_list.append(tag)
mbid_list.append(track_mbid)
artist_mbid_list.append(artist_mbid)
return timestamp_list, artist_list, artist_mbid_list, track_list, mbid_list, album_list, album_mbid_list, top_tag_list
def create_dataframe(timestamps, artists, artist_MBID, tracks, mbids, album, album_MBID, tags):
data = {'Timestamp': timestamps,
'Artist': artists,
'artist_MBID': artist_MBID,
'Track': tracks,
'track_MBID': mbids,
'Album': album,
'album_MBID': album_MBID,
'Tag': tags}
df = pd.DataFrame(data)
return df
def main():
start_total_time = time.time()
api_key = os.getenv('API_KEY')
username = 'sorfildor'
#runs daily to capture previous day's scrobbles
yesterday = datetime.now() - timedelta(days=2)
from_timestamp = int(yesterday.replace(hour=0, minute=0, second=0, microsecond=0).timestamp())
to_timestamp = int(yesterday.replace(hour=23, minute=59, second=59, microsecond=999999).timestamp())
all_scrobbles = []
page = 1
while True:
scrobbles = get_lastfm_scrobbles(api_key, username, from_timestamp, to_timestamp, page)
if not scrobbles:
print(f"no scrobbles for {yesterday.date()}")
break
all_scrobbles.extend(scrobbles)
#add a break clause when reach final page
if len(scrobbles) < 200:
print(f"page {page}: {len(scrobbles)} records")
break
print(f"Page {page}: {len(scrobbles)} records")
page += 1
if all_scrobbles:
timestamps, artists, artist_MBID, tracks, mbids, album, album_MBID, tags = organize_scrobbles(all_scrobbles, api_key)
df = create_dataframe(timestamps, artists, artist_MBID, tracks, mbids, album, album_MBID, tags)
#returns the df minus any entries with a Timestamp as 'N/A'
#this is due to tracks being currently listened to appearing as such
print(f"Total scrobbles for {yesterday.date()} is {len(df)}")
end_total_time = time.time()
print(f"Total runtime: {time.time() - start_total_time:.2f} seconds")
return df[df['Timestamp'] != 'N/A']
if __name__ == "__main__":
result_df = main()
yesterday = datetime.now() - timedelta(days=2)
file_name = yesterday.strftime("%Y-%m-%d-scrobbles.csv")
result_df.to_csv(file_name, index=False)