-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex3.py
73 lines (57 loc) · 2.25 KB
/
index3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
import json
import langid
# Read the keyword_numbers.json file
with open('keyword_numbers.json', 'r') as file:
keyword_numbers = json.load(file)
def detect_language(text):
# Detect the language of the text
lang, confidence = langid.classify(text)
return lang, confidence
result = []
# Loop through the IDs and make GET requests
for keyword, ids in keyword_numbers.items():
keyword_data = {"keyword": keyword, "data": []}
for id in ids:
url = f"https://cdn.syndication.twimg.com/tweet-result?id={id}&lang=en"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
tweet_data = {
"context_annotations": {
"hashtags": [],
"user_mentions": []
},
"created_at": data["created_at"],
"entities": [],
"lang": "",
"public_metrics": {
"likes": ""
},
"text": data["text"]
}
# Extract hashtags
hashtags = data["entities"]["hashtags"]
for hashtag in hashtags:
tweet_data["context_annotations"]["hashtags"].append(hashtag["text"])
# Extract user mentions
user_mentions = data["entities"]["user_mentions"]
for user in user_mentions:
tweet_data["context_annotations"]["user_mentions"].append(user["name"])
# Extract media URLs
# entities = data["entities"]["media"]
# for media in entities:
# tweet_data["entities"].append(media["media_url_https"])
# Detect language
lang, confidence = detect_language(tweet_data["text"])
tweet_data["lang"] = lang
# Extract likes
tweet_data["public_metrics"]["likes"] = data["favorite_count"]
keyword_data["data"].append(tweet_data)
else:
print(f"Request for ID {id} failed with status code:", response.status_code)
result.append(keyword_data)
# Write the result to a JSON file
with open('tweet_data.json', 'w') as file:
json.dump(result, file, indent=4)
print("Data written to tweet_data.json")