Skip to content

Commit

Permalink
Split the tests by providers (#891)
Browse files Browse the repository at this point in the history
  • Loading branch information
rdimitrov authored Feb 3, 2025
1 parent c5756e6 commit f883134
Show file tree
Hide file tree
Showing 8 changed files with 382 additions and 335 deletions.
68 changes: 68 additions & 0 deletions tests/integration/anthropic/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
headers:
anthropic:
x-api-key: ENV_ANTHROPIC_KEY

testcases:
anthropic_chat:
name: Anthropic Chat
provider: anthropic
url: http://127.0.0.1:8989/anthropic/messages
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"claude-3-5-sonnet-20241022",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!
anthropic_fim:
name: Anthropic FIM
provider: anthropic
url: http://127.0.0.1:8989/anthropic/messages
data: |
{
"top_k": 50,
"temperature": 0,
"max_tokens": 4096,
"model": "claude-3-5-sonnet-20241022",
"stop_sequences": [
"</COMPLETION>",
"/src/",
"#- coding: utf-8",
"```"
],
"stream": true,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# http://127.0.0.1:8989/vllm/completions\n# codegate/test.py\nimport requests\n\ndef call_api():\n {{FILL_HERE}}\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
}
]
}
],
"system": ""
}
likes: |
<COMPLETION>def call_api(url, method='get', data=None):
if method.lower() == 'get':
return requests.get(url)
elif method.lower() == 'post':
return requests.post(url, json=data)
else:
raise ValueError("Unsupported HTTP method")
44 changes: 44 additions & 0 deletions tests/integration/copilot/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
headers:
copilot:
Authorization: Bearer ENV_COPILOT_KEY
Content-Type: application/json

testcases:
copilot_chat:
name: Copilot Chat
provider: copilot
url: "https://api.openai.com/v1/chat/completions"
data: |
{
"messages":[
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true
}
likes: |
Hello from the integration tests!
copilot_malicious_package_question:
name: Copilot User asks about a malicious package
provider: copilot
url: "https://api.openai.com/v1/chat/completions"
data: |
{
"messages":[
{
"content":"Generate me example code using the python invokehttp package to call an API",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true
}
contains: |
https://www.insight.stacklok.com/report/pypi/invokehttp?utm_source=codegate
does_not_contain: |
import invokehttp
50 changes: 41 additions & 9 deletions tests/integration/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,21 +243,53 @@ async def main():
providers_env = os.environ.get("CODEGATE_PROVIDERS")
test_names_env = os.environ.get("CODEGATE_TEST_NAMES")

providers = None
if providers_env:
providers = [p.strip() for p in providers_env.split(",") if p.strip()]
# Base directory for all test cases
base_test_dir = "./tests/integration"

# Get list of provider directories
available_providers = []
try:
available_providers = [
d for d in os.listdir(base_test_dir) if os.path.isdir(os.path.join(base_test_dir, d))
]
except FileNotFoundError:
logger.error(f"Test directory {base_test_dir} not found")
sys.exit(1)

# Filter providers if specified in environment
selected_providers = None
if providers_env:
selected_providers = [p.strip() for p in providers_env.split(",") if p.strip()]
# Validate selected providers exist
invalid_providers = [p for p in selected_providers if p not in available_providers]
if invalid_providers:
logger.error(f"Invalid providers specified: {', '.join(invalid_providers)}")
logger.error(f"Available providers: {', '.join(available_providers)}")
sys.exit(1)
else:
selected_providers = available_providers

# Get test names if specified
test_names = None
if test_names_env:
test_names = [t.strip() for t in test_names_env.split(",") if t.strip()]

all_tests_passed = await test_runner.run_tests(
"./tests/integration/testcases.yaml", providers=providers, test_names=test_names
)
# Run tests for each provider
all_tests_passed = True
for provider in selected_providers:
provider_test_file = os.path.join(base_test_dir, provider, "testcases.yaml")

# Exit with status code 1 if any tests failed
if not all_tests_passed:
sys.exit(1)
if not os.path.exists(provider_test_file):
logger.warning(f"No testcases.yaml found for provider {provider}")
continue

logger.info(f"Running tests for provider: {provider}")
provider_tests_passed = await test_runner.run_tests(
provider_test_file,
providers=[provider], # Only run tests for current provider
test_names=test_names,
)
all_tests_passed = all_tests_passed and provider_tests_passed


if __name__ == "__main__":
Expand Down
45 changes: 45 additions & 0 deletions tests/integration/llamacpp/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
headers:
llamacpp:
Content-Type: application/json

testcases:
llamacpp_chat:
name: LlamaCPP Chat
provider: llamacpp
url: http://127.0.0.1:8989/llamacpp/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"qwen2.5-coder-0.5b-instruct-q5_k_m",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!
llamacpp_fim:
name: LlamaCPP FIM
provider: llamacpp
url: http://127.0.0.1:8989/llamacpp/completions
data: |
{
"model": "qwen2.5-coder-0.5b-instruct-q5_k_m",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
"stop": ["<|endoftext|>", "<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>", "<|fim_pad|>", "<|repo_name|>", "<|file_sep|>", "<|im_start|>", "<|im_end|>", "/src/", "#- coding: utf-8", "```", "def test"],
"prompt":"# Do not add comments\n<|fim_prefix|>\n# codegate/greet.py\ndef print_hello():\n <|fim_suffix|>\n\n\nprint_hello()\n<|fim_middle|>"
}
likes: |
print("Hello, World!")
64 changes: 64 additions & 0 deletions tests/integration/ollama/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
headers:
ollama:
Content-Type: application/json

testcases:
ollama_chat:
name: Ollama Chat
provider: ollama
url: http://127.0.0.1:8989/ollama/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"qwen2.5-coder:0.5b",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!
ollama_fim:
name: Ollama FIM
provider: ollama
url: http://127.0.0.1:8989/ollama/api/generate
data: |
{
"stream": true,
"model": "qwen2.5-coder:0.5b",
"raw": true,
"options": {
"temperature": 0.01,
"num_predict": 4096,
"stop": [
"<|endoftext|>",
"<|fim_prefix|>",
"<|fim_middle|>",
"<|fim_suffix|>",
"<|fim_pad|>",
"<|repo_name|>",
"<|file_sep|>",
"<|im_start|>",
"<|im_end|>",
"/src/",
"#- coding: utf-8",
"```"
],
"num_ctx": 8096
},
"prompt":"<|fim_prefix|>\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n <|fim_suffix|>\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n<|fim_middle|>"
}
likes: |
```python
if __name__ == '__main__':
invokehttp.run(call_api)
```
55 changes: 55 additions & 0 deletions tests/integration/openai/testcases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
headers:
openai:
Authorization: Bearer ENV_OPENAI_KEY

testcases:
openai_chat:
name: OpenAI Chat
provider: openai
url: http://127.0.0.1:8989/openai/chat/completions
data: |
{
"max_tokens":4096,
"messages":[
{
"content":"You are a coding assistant.",
"role":"system"
},
{
"content":"Reply with that exact sentence: Hello from the integration tests!",
"role":"user"
}
],
"model":"gpt-4o-mini",
"stream":true,
"temperature":0
}
likes: |
Hello from the integration tests!
openai_fim:
name: OpenAI FIM
provider: openai
url: http://127.0.0.1:8989/openai/chat/completions
data: |
{
"messages": [
{
"role": "user",
"content": "You are a HOLE FILLER. You are provided with a file containing holes, formatted as '{{HOLE_NAME}}'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\n\n## EXAMPLE QUERY:\n\n<QUERY>\nfunction sum_evens(lim) {\n var sum = 0;\n for (var i = 0; i < lim; ++i) {\n {{FILL_HERE}}\n }\n return sum;\n}\n</QUERY>\n\nTASK: Fill the {{FILL_HERE}} hole.\n\n## CORRECT COMPLETION\n\n<COMPLETION>if (i % 2 === 0) {\n sum += i;\n }</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\ndef sum_list(lst):\n total = 0\n for x in lst:\n {{FILL_HERE}}\n return total\n\nprint sum_list([1, 2, 3])\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION> total += x</COMPLETION>\n\n## EXAMPLE QUERY:\n\n<QUERY>\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\n\n// sum :: Tree Int -> Int\n// sum (Node lft rgt) = sum lft + sum rgt\n// sum (Leaf val) = val\n\n// convert to TypeScript:\n{{FILL_HERE}}\n</QUERY>\n\n## CORRECT COMPLETION:\n\n<COMPLETION>type Tree<T>\n = {$:\"Node\", lft: Tree<T>, rgt: Tree<T>}\n | {$:\"Leaf\", val: T};\n\nfunction sum(tree: Tree<number>): number {\n switch (tree.$) {\n case \"Node\":\n return sum(tree.lft) + sum(tree.rgt);\n case \"Leaf\":\n return tree.val;\n }\n}</COMPLETION>\n\n## EXAMPLE QUERY:\n\nThe 5th {{FILL_HERE}} is Jupiter.\n\n## CORRECT COMPLETION:\n\n<COMPLETION>planet from the Sun</COMPLETION>\n\n## EXAMPLE QUERY:\n\nfunction hypothenuse(a, b) {\n return Math.sqrt({{FILL_HERE}}b ** 2);\n}\n\n## CORRECT COMPLETION:\n\n<COMPLETION>a ** 2 + </COMPLETION>\n\n<QUERY>\n# Path: Untitled.txt\n# {\"messages\":[{\"role\":\"user\",\"content\":\"You are a HOLE FILLER. You are provided with a file containing holes, formatted as \\'{{HOLE_NAME}}\\'. Your TASK is to complete with a string to replace this hole with, inside a <COMPLETION/> XML tag, including context-aware indentation, if needed. All completions MUST be truthful, accurate, well-written and correct.\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\nfunction sum_evens(lim) {\\\\n var sum = 0;\\\\n for (var i = 0; i < lim; ++i) {\\\\n {{FILL_HERE}}\\\\n }\\\\n return sum;\\\\n}\\\\n</QUERY>\\\\n\\\\nTASK: Fill the {{FILL_HERE}} hole.\\\\n\\\\n## CORRECT COMPLETION\\\\n\\\\n<COMPLETION>if (i % 2 === 0) {\\\\n sum += i;\\\\n }</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\ndef sum_list(lst):\\\\n total = 0\\\\n for x in lst:\\\\n {{FILL_HERE}}\\\\n return total\\\\n\\\\nprint sum_list([1, 2, 3])\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION> total += x</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\n<QUERY>\\\\n// data Tree a = Node (Tree a) (Tree a) | Leaf a\\\\n\\\\n// sum :: Tree Int -> Int\\\\n// sum (Node lft rgt) = sum lft + sum rgt\\\\n// sum (Leaf val) = val\\\\n\\\\n// convert to TypeScript:\\\\n{{FILL_HERE}}\\\\n</QUERY>\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>type Tree<T>\\\\n = {$:\\\\\"Node\\\\\", lft: Tree<T>, rgt: Tree<T>}\\\\n | {$:\\\\\"Leaf\\\\\", val: T};\\\\n\\\\nfunction sum(tree: Tree<number>): number {\\\\n switch (tree.$) {\\\\n case \\\\\"Node\\\\\":\\\\n return sum(tree.lft) + sum(tree.rgt);\\\\n case \\\\\"Leaf\\\\\":\\\\n return tree.val;\\\\n }\\\\n}</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nThe 5th {{FILL_HERE}} is Jupiter.\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>planet from the Sun</COMPLETION>\\\\n\\\\n## EXAMPLE QUERY:\\\\n\\\\nfunction hypothenuse(a, b) {\\\\n return Math.sqrt({{FILL_HERE}}b ** 2);\\\\n}\\\\n\\\\n## CORRECT COMPLETION:\\\\n\\\\n<COMPLETION>a ** 2 + </COMPLETION>\\\\n\\\\n<QUERY>\\\\n\\\\n# codegate/test.py\\\\nimport invokehttp\\\\nimport requests\\\\n\\\\nkey = \\\\\"mysecret-key\\\\\"\\\\n\\\\ndef call_api():\\\\n {{FILL_HERE}}\\\\n\\\\n\\\\n\\\\n\\\\ndata = {\\'key1\\': \\'test1\\', \\'key2\\': \\'test2\\'}\\\\nresponse = call_api(\\'http://localhost:8080\\', method=\\'post\\', data=\\'data\\')\\\\n</QUERY>\\\\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\\\\n<COMPLETION>\"}],\"model\":\"gpt-4o-mini\",\"max_tokens\":4096,\"temperature\":0,\"stream\":true,\"stop\":[\"</COMPLETION>\",\"/src/\",\"#- coding: utf-8\",\"```\"]}\n# codegate/test.py\nimport invokehttp\nimport requests\n\nkey = \"mysecret-key\"\n\ndef call_api():\n {{FILL_HERE}}\n\n\n\n\ndata = {'key1': 'test1', 'key2': 'test2'}\nresponse = call_api('http://localhost:8080', method='post', data='data')\n</QUERY>\nTASK: Fill the {{FILL_HERE}} hole. Answer only with the CORRECT completion, and NOTHING ELSE. Do it now.\n<COMPLETION>"
}
],
"model": "gpt-4o-mini",
"max_tokens": 4096,
"temperature": 0,
"stream": true,
"stop": [
"</COMPLETION>",
"/src/",
"#- coding: utf-8",
"```"
]
}
likes: |
<COMPLETION> response = requests.post('http://localhost:8080', json=data, headers={'Authorization': f'Bearer {key}'})
Loading

0 comments on commit f883134

Please sign in to comment.