Skip to content

Commit

Permalink
Getting Flask to work
Browse files Browse the repository at this point in the history
  • Loading branch information
Plikt committed Apr 10, 2024
1 parent ca0cb9b commit 61a5ca8
Show file tree
Hide file tree
Showing 8 changed files with 606 additions and 54 deletions.
Binary file modified .DS_Store
Binary file not shown.
6 changes: 5 additions & 1 deletion DOCKERFILE
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,9 @@ COPY app/ /usr/src/app/
# Install dependencies
RUN pip install -r requirements.txt

#run service - Expose (what is the request response model)
EXPOSE 5001

# Define the command to run when the container starts
CMD ["python", "langchain_api.py"]
CMD ["flask", "run", "--host=0.0.0.0"]
#CMD ["python", "langchain_orcid.py"]
Binary file modified app/__pycache__/langchain_api.cpython-310.pyc
Binary file not shown.
87 changes: 48 additions & 39 deletions app/langchain_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@
import tiktoken
#from demo import read_single

#TODO: IF doi -> then search open alex -> determine relevant metadata to return. -> Together once everything is up to date.
#TODO: get api + langchain + sturcutred output in a pretty package -> Ellie

#from ..Server.PDFDataExtractor.pdfdataextractor.demo import read_single
sys.path.append(os.path.abspath("/Users/desot1/Dev/automating-metadata/Server/PDFDataExtractor/pdfdataextractor"))
pyalex.config.email = "ellie@desci.com"
Expand Down Expand Up @@ -255,6 +252,7 @@ def paper_data_json_single(doi):
openaccess_pdf = "None, Semantic Scholar lookup error"

# OpenAlex accessing as backup info for the previous tools
openalex=True
try:
openalex_results = Works()[doi] # Crossref search using DOI, "r" for request
except requests.exceptions.HTTPError as e:
Expand Down Expand Up @@ -410,51 +408,29 @@ def get_orcid(authors):
print(f"OpenAlex ORCID lookup returned error: {e}\n")
continue # Skip to the next author

if response["meta"]["count"] >= 1:
orcid = response["results"][0]["external_id"]
affiliation = response["results"][0]["affiliations"][0]
name = response["results"][0]["display_name"]
#print(response)
if response["meta"]["count"] >= 1:
orcid = response["results"][0]["orcid"]
print(orcid)
affiliation = response["results"][0]["affiliations"][0]["institution"]["display_name"]
display_name = response["results"][0]["display_name"] # Updated to use display_name

author_info = {
"orcid": orcid,
"affiliation": affiliation
"@id": f"https://orcid.org/{orcid}",
"role": "Person",
"affiliation": affiliation,
"name": display_name
}

orcid_info[author] = author_info
orcid_info[name] = orcid_info.pop(author)

orcid_info[author] = author_info

else:
print("None, There are no OrcID suggestions for this author")
author_info = "none"
orcid_info[author] = author_info
orcid_info[author] = "none"
continue # Skip to the next author



return orcid_info

#def get_orcid(authors):
orcid = []
author_info = {}

for author in authors:
try:
url = "https://api.openalex.org/autocomplete/authors?q=" + author
response = json.loads(requests.get(url).text)
except:
print(f"OpenAlex ORCID lookup returned error: {e}\n")

if response["meta"]["count"] == 1:
orcid = response["results"][0]["external_id"]
author_info[author] = {"orcid": orcid, "affiliation":response["results"][0]["hint"]}
elif response["meta"]["count"] == 0: #FAKE - Create a test so we can check if the return is valid.
print("None, There are no OrcID suggestions for this author")
else:
orcid = response["results"][0]["external_id"]
author_info[author] = {"orcid": orcid, "affiliation": response["results"][0]["hint"]}
#create an async function which ranks the authors based on the similarity to the paper.

return author_info

def check_item_filled(json_ld, name):
for item in json_ld["@graph"]:
Expand Down Expand Up @@ -499,7 +475,7 @@ def update_json_ld(json_ld, new_data):


#%% Main, general case for testing
if __name__ == "__main__":
"""if __name__ == "__main__":
print("Starting code run...")
node = "46" #os.getenv('NODE_ENV')
Expand Down Expand Up @@ -527,3 +503,36 @@ def update_json_ld(json_ld, new_data):
print(updated_json_ld)
print("Script completed")
"""
def run(node, doi=None):
print("Starting code run...")

#node = "46" #os.getenv('NODE_ENV')
#DOI_env = "10.3847/0004-637X/828/1/46"#os.getenv('DOI_ENV') #

if node is not None:
print(f"NODE_ENVIRONMENT is set to: {node}")
else:
print("NODE_ENVIRONMENT is not set.")

json_ld = get_jsonld(node)
print(json_ld)

if doi:
lookup_results = paper_data_json_single(doi)
#updated_json_ld = update_json_ld(json_ld, lookup_results)

else:
updated_json_ld = json_ld

llm_output = asyncio.run(langchain_paper_search(node))# output of unstructured text in dictionary
#updated_json_ld = update_json_ld(json_ld, llm_output)
updated_json_ld = json_ld
#doi = "https://doi.org/10.1002/adma.202208113"

#print(updated_json_ld)

print("Script completed")

if __name__ == "__main__":
run("46", "https://doi.org/10.1002/adma.202208113")
56 changes: 42 additions & 14 deletions app/langchain_orcid.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,39 @@ async def langchain_paper_search(node):
llm_output['authors'] = get_orcid(llm_output["authors"])

return llm_output
def get_orcid(authors):
orcid_info = {} # Dictionary to store author information

for author in authors:
try:
url = "https://api.openalex.org/authors?search=" + author
response = json.loads(requests.get(url).text)
except Exception as e: # Added variable 'e' to catch the exception
print(f"OpenAlex ORCID lookup returned error: {e}\n")
continue # Skip to the next author

if response["meta"]["count"] >= 1:
orcid = response["results"][0]["external_id"]
affiliation = response["results"][0]["affiliations"][0]
name = response["results"][0]["display_name"]

author_info = {
"orcid": orcid,
"affiliation": affiliation
}

orcid_info[author] = author_info
orcid_info[name] = orcid_info.pop(author)

def get_orcid(authors):
else:
print("None, There are no OrcID suggestions for this author")
author_info = "none"
orcid_info[author] = author_info
continue # Skip to the next author

return orcid_info

"""def get_orcid(authors):
orcid_info = [] # Dictionary to store author information
for author in authors:
Expand Down Expand Up @@ -403,7 +434,7 @@ def get_orcid(authors):
return orcid_info

"""
def update_json_ld(json_ld, new_data):
# Process author information
loop = 0
Expand Down Expand Up @@ -441,22 +472,18 @@ def update_json_ld(json_ld, new_data):


#%% Main, general case for testing
if __name__ == "__main__":
print("Starting code run...")
def run(node, doi=None):

node = "46" #os.getenv('NODE_ENV')
DOI_env = "10.3847/0004-637X/828/1/46"#os.getenv('DOI_ENV') #

print("Starting code run...")
if node is not None:
print(f"NODE_ENVIRONMENT is set to: {node}")
print(f"NODE is set to: {node}")
else:
print("NODE_ENVIRONMENT is not set.")
print("NODE is not set.")

json_ld = get_jsonld(node)
print(json_ld)

if DOI_env:
lookup_results = paper_data_json_single(DOI_env)
if doi:
lookup_results = paper_data_json_single(doi)
#updated_json_ld = update_json_ld(json_ld, lookup_results)

else:
Expand All @@ -468,6 +495,7 @@ def update_json_ld(json_ld, new_data):

#doi = "https://doi.org/10.1002/adma.202208113"

#print(updated_json_ld)

print("Script completed")

if __name__ == "__main__":
run("46")
Loading

0 comments on commit 61a5ca8

Please sign in to comment.