-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8f989e1
commit 03a9fc9
Showing
14 changed files
with
295 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
node_modules | ||
experiment/testResults/venv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
const {promptLLMOpenAI} = require("./gpt"); | ||
const fs = require('fs'); | ||
|
||
const testCases = require('./test-cases.json'); | ||
const metaPrompt = fs.readFileSync('index-gen.txt', 'utf-8'); | ||
|
||
async function main() { | ||
testCases.forEach(async (testCase, index) => { | ||
const prompt = `\ | ||
${testCase.prompt} | ||
Generate the python code without any other wrapping elements or text. | ||
Also no code fencing like \`\`\`python is allowed | ||
I have my jina token as env var: JINA_API_KEY | ||
${metaPrompt}` | ||
const response = await promptLLMOpenAI(prompt, 'gpt-4o'); | ||
fs.writeFileSync(`testResults/${index}-${testCase.name}.py`, response) | ||
}); | ||
} | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,34 @@ | ||
[ | ||
"Generate a js script that creates embeddings out of the numbers 1 to 100 (in text form).", | ||
"Generate a js script that re-ranks the words Jina, Weaviate, OpenAI, Hugging Face, Qdrant for the query 'Future of AI'.", | ||
"write the full code that embed every sentence from https://news.ycombinator.com/newest and store the embedding into a numpy array.", | ||
"Write the js code to check the validity of the following statement on bbc.com 'The UK government has announced a new law that will require social media companies to verify the age of their users.'" | ||
{ | ||
"name": "hackernews", | ||
"prompt": "grab every sentence from hackernews frontpage and visualize them in a 2d umap using matplotlib" | ||
}, | ||
{ | ||
"name": "image-rerank", | ||
"prompt": "I want to classify a series of images based on their domain, can I do that with Jina?" | ||
}, | ||
{ | ||
"name": "batch-embedding", | ||
"prompt": "creates embeddings out of the numbers 1 to 100 (in text form)." | ||
}, | ||
{ | ||
"name": "embedding for classification", | ||
"prompt": "generate an embedding that is good for a classification task for the word 'Jina'" | ||
}, | ||
{ | ||
"name": "embedding late chunking", | ||
"prompt": "generate an embedding with late chunking for the word 'Jina'" | ||
}, | ||
{ | ||
"name": "embedding binary return type", | ||
"prompt": "generate an embedding with binary return type for the word 'Jina'" | ||
}, | ||
{ | ||
"name": "re-rank", | ||
"prompt": "re-ranks the words Jina, Weaviate, OpenAI, Hugging Face, Qdrant for the query 'Future of AI'." | ||
}, | ||
{ | ||
"name": "reader-grounding", | ||
"prompt": "Write the js code to check the validity of the following statement on bbc.com 'The UK government has announced a new law that will require social media companies to verify the age of their users.'" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
import requests | ||
import matplotlib.pyplot as plt | ||
import umap | ||
import numpy as np | ||
|
||
# Jina API key from environment variables | ||
JINA_API_KEY = os.getenv('JINA_API_KEY') | ||
|
||
# Endpoint for embeddings | ||
embedding_endpoint = "https://api.jina.ai/v1/embeddings" | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {JINA_API_KEY}" | ||
} | ||
|
||
# Grab sentences from HackerNews frontpage using r.reader from Jina | ||
reader_endpoint = "https://r.jina.ai/https://news.ycombinator.com" | ||
reader_response = requests.get(reader_endpoint, headers=headers) | ||
sentences = reader_response.json()['content']['markdown_content'].split('\n') | ||
|
||
# Filter out only sentences (simple approach, refine as needed) | ||
sentences = [s for s in sentences if len(s.split()) > 3] | ||
|
||
# Prepare data for embedding request | ||
data = { | ||
"model": "jina-embeddings-v3", | ||
"input": sentences | ||
} | ||
|
||
# Request embeddings | ||
response = requests.post(embedding_endpoint, json=data, headers=headers) | ||
embeddings = np.array([item['embedding'] for item in response.json()['data']]) | ||
|
||
# Reduce embeddings to 2D using UMAP | ||
reducer = umap.UMAP() | ||
embedding_2d = reducer.fit_transform(embeddings) | ||
|
||
# Visualize with matplotlib | ||
plt.figure(figsize=(12,8)) | ||
plt.scatter(embedding_2d[:, 0], embedding_2d[:, 1]) | ||
plt.title('HackerNews Sentences Visualized in 2D with UMAP') | ||
plt.xlabel('UMAP Dimension 1') | ||
plt.ylabel('UMAP Dimension 2') | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import requests | ||
import os | ||
|
||
endpoint = "https://api.jina.ai/v1/classify" | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {os.getenv('JINA_API_KEY')}" | ||
} | ||
|
||
data = { | ||
"model": "jina-clip-v1", | ||
"input": [ | ||
{"text": "A sleek smartphone with a high-resolution display and multiple camera lenses"}, | ||
{"text": "Fresh sushi rolls served on a wooden board with wasabi and ginger"}, | ||
{"image": "https://picsum.photos/id/11/367/267"}, | ||
{"image": "https://picsum.photos/id/22/367/267"}, | ||
{"text": "Vibrant autumn leaves in a dense forest with sunlight filtering through"}, | ||
{"image": "https://picsum.photos/id/8/367/267"} | ||
], | ||
"labels": [ | ||
"Technology and Gadgets", | ||
"Food and Dining", | ||
"Nature and Outdoors", | ||
"Urban and Architecture" | ||
] | ||
} | ||
|
||
response = requests.post(endpoint, headers=headers, json=data) | ||
print(response.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import os | ||
import requests | ||
|
||
# Retrieve the Jina API key from the environment variable | ||
JINA_API_KEY = os.getenv("JINA_API_KEY") | ||
|
||
# Jina embeddings API endpoint | ||
endpoint = "https://api.jina.ai/v1/embeddings" | ||
|
||
# Headers including the authorization token | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {JINA_API_KEY}" | ||
} | ||
|
||
# Data payload for the request | ||
data = { | ||
"model": "jina-embeddings-v3", | ||
"task": "text-matching", | ||
"dimensions": 1024, | ||
"input": [str(i) for i in range(1, 101)] | ||
} | ||
|
||
# Sending the POST request to Jina AI to generate embeddings | ||
response = requests.post(endpoint, json=data, headers=headers) | ||
|
||
# Printing the response from Jina AI | ||
print(response.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import requests | ||
|
||
endpoint = "https://api.jina.ai/v1/embeddings" | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {os.getenv('JINA_API_KEY')}" | ||
} | ||
data = { | ||
"model": "jina-embeddings-v3", | ||
"task": "text-matching", | ||
"dimensions": 1024, | ||
"late_chunking": False, | ||
"embedding_type": "float", | ||
"input": ["Jina"] | ||
} | ||
response = requests.post(endpoint, json=data, headers=headers) | ||
print(response.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import requests | ||
|
||
endpoint = "https://api.jina.ai/v1/embeddings" | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {os.getenv('JINA_API_KEY')}" | ||
} | ||
data = { | ||
"model": "jina-embeddings-v3", | ||
"task": "text-matching", | ||
"dimensions": 1024, | ||
"late_chunking": True, | ||
"embedding_type": "float", | ||
"input": [ | ||
"Jina" | ||
] | ||
} | ||
response = requests.post(endpoint, json=data, headers=headers) | ||
print(response.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import requests | ||
|
||
endpoint = "https://api.jina.ai/v1/embeddings" | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": "Bearer " + os.environ.get('JINA_API_KEY') | ||
} | ||
data = { | ||
"model": "jina-embeddings-v3", | ||
"task": "text-matching", | ||
"dimensions": 1024, | ||
"late_chunking": False, | ||
"embedding_type": "binary", | ||
"input": ["Jina"] | ||
} | ||
response = requests.post(endpoint, json=data, headers=headers) | ||
print(response.json()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import requests | ||
import os | ||
|
||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {os.getenv('JINA_API_KEY')}" | ||
} | ||
|
||
data = { | ||
"model": "jina-reranker-v2-base-multilingual", | ||
"query": "Future of AI", | ||
"top_n": 5, | ||
"documents": [ | ||
"Jina", | ||
"Weaviate", | ||
"OpenAI", | ||
"Hugging Face", | ||
"Qdrant" | ||
] | ||
} | ||
|
||
response = requests.post("https://api.jina.ai/v1/rerank", headers=headers, json=data) | ||
results = response.json() | ||
for result in results.get('results', []): | ||
print(result['document']['text'], result['relevance_score']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os | ||
import requests | ||
|
||
# Environment variable for Jina API Key | ||
jina_api_key = os.getenv('JINA_API_KEY') | ||
|
||
# S.reader API endpoint for searching | ||
endpoint = 'https://s.jina.ai' | ||
headers = { | ||
'Authorization': f'Bearer {jina_api_key}' | ||
} | ||
params = { | ||
'query': 'The UK government has announced a new law that will require social media companies to verify the age of their users.', | ||
'lang': 'en-US' | ||
} | ||
|
||
# Sending the GET request | ||
response = requests.get(endpoint, headers=headers, params=params) | ||
|
||
# Parsing the response | ||
if response.status_code == 200: | ||
data = response.json() | ||
articles = data.get('data', {}) | ||
relevant_articles = [article for article in articles if 'bbc.com' in article.get('url', '')] | ||
if relevant_articles: | ||
print("Found relevant articles on BBC.com:") | ||
for article in relevant_articles: | ||
print(article.get('title'), '-', article.get('url')) | ||
else: | ||
print("No relevant articles found on BBC.com regarding the statement.") | ||
else: | ||
print("Failed to fetch data from API.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
requests | ||
scikit-learn | ||
umap-learn | ||
matplotlib |