Skip to content

Commit

Permalink
Merge pull request #20 from iamarunbrahma/fix/deepseek-models
Browse files Browse the repository at this point in the history
feat: Add test method for DeepSeek models and update dependencies
  • Loading branch information
iamarunbrahma authored Jan 24, 2025
2 parents 1f1ea65 + 9dd5d2b commit f8bc1bc
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 72 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ lint:
ruff check . --fix

format-nb:
black --ipynb examples/*.ipynb
black --ipynb docs/examples/*.ipynb

format: format-nb
black .
Expand Down
68 changes: 68 additions & 0 deletions docs/examples/deepseek_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using Vision Parse with DeepSeek"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook demonstrates how to use Vision Parse with DeepSeek."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install 'vision-parse[openai]' -Uqq # install the vision-parse package with openai"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from vision_parse import VisionParser\n",
"import os\n",
"\n",
"# Initialize parser\n",
"parser = VisionParser(\n",
" model_name=\"deepseek-chat\",\n",
" api_key=os.getenv(\"DEEPSEEK_API_KEY\"),\n",
" temperature=0.9,\n",
" top_p=0.4,\n",
" image_mode=None,\n",
" detailed_extraction=True,\n",
" enable_concurrency=True,\n",
")\n",
"\n",
"pdf_path = \"../tests/Texas-Holdem-Rules.pdf\"\n",
"markdown_pages = parser.convert_pdf(pdf_path)\n",
"\n",
"# Print the markdown pages\n",
"for i, page_content in enumerate(markdown_pages):\n",
" print(f\"\\n--- Page {i+1} ---\\n{page_content}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "personal",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ gemini = [
"google-generativeai==0.8.3",
]
openai = [
"openai>=1.59.8",
"openai==1.59.8",
]
all = [
"google-generativeai==0.8.3",
"openai>=1.59.8",
"openai==1.59.8",
]

[tool.hatch.build.targets.wheel]
Expand Down
2 changes: 1 addition & 1 deletion src/vision_parse/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ async def _get_response(
):
if self.provider == "ollama":
return await self._ollama(base64_encoded, prompt, structured)
elif self.provider == "openai":
elif self.provider == "openai" or self.provider == "deepseek":
return await self._openai(base64_encoded, prompt, structured)
elif self.provider == "gemini":
return await self._gemini(base64_encoded, prompt, structured)
Expand Down
122 changes: 60 additions & 62 deletions tests/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,66 @@ async def test_gemini_generate_markdown(
assert mock_client.generate_content_async.call_count == 2


@pytest.mark.asyncio
@patch("openai.AsyncOpenAI")
async def test_deepseek_generate_markdown(
MockAsyncOpenAI, sample_base64_image, mock_pixmap
):
"""Test markdown generation using Deepseek."""
mock_client = AsyncMock()
MockAsyncOpenAI.return_value = mock_client

# Mock structured analysis response
mock_parse = AsyncMock()
mock_parse.choices = [
AsyncMock(
message=AsyncMock(
content=json.dumps(
{
"text_detected": "Yes",
"tables_detected": "No",
"images_detected": "No",
"latex_equations_detected": "No",
"extracted_text": "Test content",
"confidence_score_text": 0.9,
}
)
)
)
]

# Mock markdown conversion response
mock_create = AsyncMock()
mock_create.choices = [
AsyncMock(message=AsyncMock(content="# Test Header\n\nTest content"))
]
# Set up side effects to return mock_parse first, then mock_create
mock_client.chat.completions.create = AsyncMock(
side_effect=[mock_parse, mock_create]
)

llm = LLM(
model_name="deepseek-chat",
api_key="test-key",
temperature=0.7,
top_p=0.7,
ollama_config=None,
openai_config=None,
gemini_config=None,
image_mode=None,
custom_prompt=None,
detailed_extraction=True,
enable_concurrency=True,
device=None,
num_workers=1,
)
result = await llm.generate_markdown(sample_base64_image, mock_pixmap, 0)

assert isinstance(result, str)
assert "Test content" in result
assert mock_client.chat.completions.create.call_count == 2


@pytest.mark.asyncio
@patch("ollama.AsyncClient")
async def test_ollama_base64_image_mode(
Expand Down Expand Up @@ -389,65 +449,3 @@ async def test_ollama_llm_error(mock_async_client, sample_base64_image, mock_pix
await llm.generate_markdown(sample_base64_image, mock_pixmap, 0)
assert "Ollama Model processing failed" in str(exc_info.value)
assert mock_client.chat.call_count == 1


@pytest.mark.asyncio
@patch("openai.AsyncOpenAI")
async def test_openai_llm_error(MockAsyncOpenAI, sample_base64_image, mock_pixmap):
"""Test LLMError handling for OpenAI."""
mock_client = AsyncMock()
MockAsyncOpenAI.return_value = mock_client

# Mock API error for markdown generation
mock_client.chat.completions.create.side_effect = Exception("OpenAI API error")

llm = LLM(
model_name="gpt-4o",
api_key="test-key",
temperature=0.7,
top_p=0.7,
ollama_config=None,
openai_config=None,
gemini_config=None,
image_mode=None,
custom_prompt=None,
detailed_extraction=True,
enable_concurrency=True,
device=None,
num_workers=1,
)

with pytest.raises(LLMError) as exc_info:
await llm.generate_markdown(sample_base64_image, mock_pixmap, 0)
assert "OpenAI Model processing failed" in str(exc_info.value)


@pytest.mark.asyncio
@patch("google.generativeai.GenerativeModel")
async def test_gemini_llm_error(MockGenerativeModel, sample_base64_image, mock_pixmap):
"""Test LLMError handling for Gemini."""
mock_client = AsyncMock()
MockGenerativeModel.return_value = mock_client

# Mock API error
mock_client.generate_content.side_effect = Exception("Gemini API error")

llm = LLM(
model_name="gemini-1.5-pro",
api_key="test-key",
temperature=0.7,
top_p=0.7,
ollama_config=None,
openai_config=None,
gemini_config=None,
image_mode=None,
custom_prompt=None,
detailed_extraction=True,
enable_concurrency=True,
device=None,
num_workers=1,
)

with pytest.raises(LLMError) as exc_info:
await llm.generate_markdown(sample_base64_image, mock_pixmap, 0)
assert "Gemini Model processing failed" in str(exc_info.value)
12 changes: 6 additions & 6 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f8bc1bc

Please sign in to comment.