-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathv2_checker.py
58 lines (48 loc) · 2.06 KB
/
v2_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import json
from api import TwitterAPI
from base import Base
import media_downloader as downloader
# parse v2 json and check for missing media files
data_dir = 'data.v2'
def download_media(username : str, api : TwitterAPI):
b = Base(username, api)
b._change_parent_dir(data_dir)
if b._read_user_json() is False:
print('[!] failed to read user json for %s' % b.username)
return False
if b._read_tweets_json() is False:
print('[!] failed to read tweets json for %s' % b.username)
return False
for tweet_id in b.tweets_json:
tweet = b.tweets_json[tweet_id]
# the tweets contains retweets which we'll save, but not parse for media downloads
# the retweets has `user_id_str` different than our `rest_id`
if tweet['user_id_str'] != b.user_json['rest_id']:
continue
if 'extended_entities' not in tweet:
# there are tweets with no media
# print('[!] no extended_entities found in tweet %s' % tweet_id)
continue
medias = tweet['extended_entities']['media']
if len(medias) == 0:
# no media
continue
# https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/overview/extended-entities-object#intro
# media has 3 types: ‘photo’, ‘video’ or ‘animated_gif’
for media in medias:
if media['type'] == 'video':
downloader.video(tweet_id, media['video_info'], b.media_dir)
elif media['type'] == 'photo':
downloader.tweet_photo(media['media_url_https'], b.media_dir)
config = {}
with open('config.json', 'r') as f:
config_file = f.read()
config = json.loads(config_file)
api = TwitterAPI(config["graphql_userbyscreenname_endpoint"], config["graphql_usertweets_endpoint"], config["graphql_tweetdetail_endpoint"], config["bearer_token"])
if not api.get_guest_token():
print('[!] failed to retrive guest token')
else:
dirs = os.listdir(data_dir)
for dir in dirs:
download_media(dir, api)