-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPubAg Script.py
63 lines (37 loc) · 1.49 KB
/
PubAg Script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# coding: utf-8
# In[1]:
import pandas as pd
# In[2]:
import urllib.request
import http.client
import re
from collections import Counter
# In[3]:
Type = input("Enter file type: ")
if Type == 'xlsx':
File = input("Enter complete file path: ")
df = pd.read_excel(File)
else:
print("Not valid")
# In[5]:
df = df.dropna(subset = ['AGID']) #drops the rows that do not have an AGID
# In[6]:
counter = Counter() #calls the Counter functions that tracks how often a string is used
total = 0 #total count initialized to 0
Key = input("Enter API key: ")
for index, row in df.iterrows(): #Checks each row in the AGID columnn
x = row['AGID']
xx = math.floor(x)
#adds the AGID to the search query
y = 'https://api.nal.usda.gov/pubag/rest/search/?query=agid:'+str(xx)+'&api_key='+str(Key)
try:
z = urllib.request.urlopen(y) #opens the webpage
text = z.read().decode('utf-8') # reads the contents of each webpage
find = re.findall('Full Text', text) #finds the phrase 'Full Text' on the webpage
for i in find: #checks for each instance of 'Full Text'
counter = Counter(find) #counts how many times 'Full Text' appears in a row
if counter == Counter({'Full Text': 1}):
total += 1 #counts how many rows have the phrase 'Full Text'
except http.client.HTTPException as e: #handles the BadStatusLine error
continue
print("Full Text publications in PubAg: " + str(total))