-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_citations.py
145 lines (114 loc) · 4.12 KB
/
get_citations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import ads
import matplotlib.dates as mdates
import pandas as pd
import matplotlib.pyplot as plt
import json
from ads_fields import FIELDS
def query_ads() -> pd.DataFrame:
"""Queries NASA ADS for papers referencing lightkurve
Returns:
pd.DataFrame: Output statistics
"""
qry = ads.SearchQuery(
q='full:"lightkurve" AND year:2017-2050', rows=999999, fl=FIELDS
)
papers = [q for q in qry]
dates = [p.date for p in papers[::-1]]
titles = [p.title[0] for p in papers[::-1]]
years = [p.year for p in papers[::-1]]
authors = [p.first_author_norm for p in papers[::-1]]
bibcodes = [p.bibcode for p in papers[::-1]]
pubs = [p.pub for p in papers[::-1]]
cite_count = [p.citation_count for p in papers[::-1]]
df = pd.DataFrame(
{
"year": years,
"date": pd.to_datetime(dates),
"title": titles,
"author": authors,
"bibcode": bibcodes,
"pub": pubs,
"cite_count": cite_count,
}
)
# Filter out Zenodo entries and AAS Abstracts
mask = ~df.pub.str.contains("(Zenodo)|(Abstracts)")
# Sort by date and reset index
df = df[mask].sort_values("date", ascending=False).reset_index(drop=True)
return df
def make_recent_table(df) -> pd.DataFrame:
"""Formats and makes a nice table of the 5 most recent papers which cite lightkurve
Args:
df (_type_): Input statistics
Returns:
pd.DataFrame: Output table of most recent papers
"""
## Make a markdown table
most_recent = df.sort_values("date", ascending=False).head(5)
most_recent = most_recent.rename(
columns={"date": "Date", "title": "Title", "author": "Author"}
)
link_title = []
for index, row in most_recent.iterrows():
link_title.append(
f"[{row.Title}](https://ui.adsabs.harvard.edu/abs/{row.bibcode}/abstract)"
)
most_recent["Title"] = link_title
most_recent["Date"] = most_recent["Date"].dt.date
recent_table = most_recent[["Date", "Title", "Author"]]
return recent_table
def make_readme(md, path="README.md"):
"""Makes a readme file containing the publications plot and recent papers
Args:
md (_type_): markdown to include
path (str, optional): output path. Defaults to 'README.md'.
"""
readme_str = f"""
<h1>Lightkurve statistics</h1>
![publications](out/lightkurve-publications.png)
{md}
"""
text_file = open(path, "w")
n = text_file.write(readme_str)
text_file.close()
def make_plot(df: pd.DataFrame, path="out/lightkurve-publications.png"):
"""Generates and saves a lightkurve publications plot
Args:
df (pd.DataFrame): Statistics table
path (str, optional): output path. Defaults to 'out/lightkurve-publications.png'.
"""
# Make a plot
x = pd.date_range("2018-01-01T00:00:00Z", df.date.max(), freq="1M")
y = [len(df[df.date < d]) for d in x]
plot_color = "#777"
fig, ax = plt.subplots(figsize=[9, 5])
ax.plot(x, y, marker="o", c=plot_color)
ax.set_xlabel("Year", fontsize=15, c=plot_color)
ax.set_ylabel("Publications", fontsize=15, c=plot_color)
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
ax.tick_params(color=plot_color, labelcolor=plot_color)
for spine in ax.spines.values():
spine.set_edgecolor(plot_color)
plt.savefig(path, transparent=True)
plt.close()
def make_badge_endpoint(df):
badge = {"color": "blue", "status": f"{len(df)}", "subject": "Citations"}
json_object = json.dumps(badge, indent=4)
# Writing to sample.json
with open("out/badge.json", "w") as outfile:
outfile.write(json_object)
if __name__ == "__main__":
# Query ADS
df = query_ads()
# Save entire statistics table
df.to_csv("out/statistics.csv")
# get recent publications
recent = make_recent_table(df)
# Make the plot
make_plot(df)
# Now save the readme
make_readme(recent.to_markdown())
make_badge_endpoint(df)