-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d91dcb8
commit 9ad7dc3
Showing
11 changed files
with
227 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from marker.processors.llm import BaseLLMProcessor | ||
|
||
from google.ai.generativelanguage_v1beta.types import content | ||
|
||
from marker.schema import BlockTypes | ||
from marker.schema.blocks import Equation | ||
from marker.schema.document import Document | ||
from marker.schema.groups.page import PageGroup | ||
|
||
from typing import Annotated | ||
|
||
|
||
class LLMEquationProcessor(BaseLLMProcessor): | ||
block_types = (BlockTypes.Equation,) | ||
min_equation_height: Annotated[ | ||
float, | ||
"The minimum ratio between equation height and page height to consider for processing.", | ||
] = 0.1 | ||
equation_latex_prompt: Annotated[ | ||
str, | ||
"The prompt to use for generating LaTeX from equations.", | ||
"Default is a string containing the Gemini prompt." | ||
] = """You're an expert mathematician who is good at writing LaTeX code for equations'. | ||
You will receive an image of a math block that may contain one or more equations. Your job is to write the LaTeX code for the equation, along with markdown for any other text. | ||
Some guidelines: | ||
- Keep the LaTeX code simple and concise. | ||
- Make it KaTeX compatible. | ||
- Use $$ as a block equation delimiter and $ for inline equations. Block equations should also be on their own line. Do not use any other delimiters. | ||
- You can include text in between equation blocks as needed. Try to put long text segments into plain text and not inside the equations. | ||
**Instructions:** | ||
1. Carefully examine the provided image. | ||
2. Analyze the existing markdown, which may include LaTeX code. | ||
3. If the markdown and LaTeX are correct, write "No corrections needed." | ||
4. If the markdown and LaTeX are incorrect, generate the corrected markdown and LaTeX. | ||
5. Output only the corrected text or "No corrections needed." | ||
**Example:** | ||
Input: | ||
```markdown | ||
Equation 1: | ||
$$x^2 + y^2 = z2$$ | ||
``` | ||
Output: | ||
```markdown | ||
Equation 1: | ||
$$x^2 + y^2 = z^2$$ | ||
``` | ||
**Input:** | ||
```markdown | ||
{equation} | ||
``` | ||
""" | ||
|
||
def process_rewriting(self, document: Document, page: PageGroup, block: Equation): | ||
text = block.latex if block.latex else block.raw_text(document) | ||
prompt = self.equation_latex_prompt.replace("{equation}", text) | ||
|
||
image = self.extract_image(document, block) | ||
response_schema = content.Schema( | ||
type=content.Type.OBJECT, | ||
enum=[], | ||
required=["markdown_equation"], | ||
properties={ | ||
"markdown_equation": content.Schema( | ||
type=content.Type.STRING | ||
) | ||
}, | ||
) | ||
|
||
response = self.model.generate_response(prompt, image, block, response_schema) | ||
|
||
if not response or "markdown_equation" not in response: | ||
block.update_metadata(llm_error_count=1) | ||
return | ||
|
||
markdown_equation = response["markdown_equation"] | ||
if len(markdown_equation) < len(text) * .5: | ||
block.update_metadata(llm_error_count=1) | ||
return | ||
|
||
block.latex = markdown_equation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import markdown2 | ||
|
||
from marker.processors.llm import BaseLLMProcessor | ||
|
||
from google.ai.generativelanguage_v1beta.types import content | ||
|
||
from marker.schema import BlockTypes | ||
from marker.schema.blocks import Equation | ||
from marker.schema.document import Document | ||
from marker.schema.groups.page import PageGroup | ||
|
||
from typing import Annotated | ||
|
||
|
||
class LLMHandwritingProcessor(BaseLLMProcessor): | ||
block_types = (BlockTypes.Equation,) | ||
min_handwriting_height: Annotated[ | ||
float, | ||
"The minimum ratio between handwriting height and page height to consider for processing.", | ||
] = 0.1 | ||
handwriting_generation_prompt: Annotated[ | ||
str, | ||
"The prompt to use for OCRing handwriting.", | ||
"Default is a string containing the Gemini prompt." | ||
] = """You are an expert editor specializing in accurately reproducing text from images. | ||
You will receive an image of a text block, along with the text that can be extracted. Your task is to generate markdown to properly represent the content of the image. Do not omit any text present in the image - make sure everything is included in the markdown representation. The markdown representation should be as faithful to the original image as possible. | ||
Formatting should be in markdown, with the following rules: | ||
- * for italics, ** for bold, and ` for inline code. | ||
- Headers should be formatted with #, with one # for the largest header, and up to 6 for the smallest. | ||
- Lists should be formatted with either - or 1. for unordered and ordered lists, respectively. | ||
- Links should be formatted with [text](url). | ||
- Use ``` for code blocks. | ||
- Inline math should be formatted with <math>math expression</math>. | ||
- Display math should be formatted with <math display="block">math expression</math>. | ||
- Values and labels should be extracted from forms, and put into markdown tables, with the labels on the left side, and values on the right. The headers should be "Labels" and "Values". Other text in the form can appear between the tables. | ||
- Tables should be formatted with markdown tables, with the headers bolded. | ||
**Instructions:** | ||
1. Carefully examine the provided block image. | ||
2. Analyze the existing text representation. | ||
3. Output the markdown representing the content of the image. | ||
**Example:** | ||
Input: | ||
```text | ||
This i sm handwritting. | ||
``` | ||
Output: | ||
```markdown | ||
This is some *handwriting*. | ||
``` | ||
**Input:** | ||
```text | ||
{extracted_text} | ||
``` | ||
""" | ||
|
||
def process_rewriting(self, document: Document, page: PageGroup, block: Equation): | ||
text = block.raw_text(document) | ||
prompt = self.handwriting_generation_prompt.replace("{handwriting_text}", text) | ||
|
||
image = self.extract_image(document, block) | ||
response_schema = content.Schema( | ||
type=content.Type.OBJECT, | ||
enum=[], | ||
required=["markdown"], | ||
properties={ | ||
"markdown": content.Schema( | ||
type=content.Type.STRING | ||
) | ||
}, | ||
) | ||
|
||
response = self.model.generate_response(prompt, image, block, response_schema) | ||
|
||
if not response or "markdown" not in response: | ||
block.update_metadata(llm_error_count=1) | ||
return | ||
|
||
markdown = response["markdown"] | ||
if len(markdown) < len(text) * .5: | ||
block.update_metadata(llm_error_count=1) | ||
return | ||
|
||
markdown = markdown.strip().lstrip("```markdown").rstrip("```").strip() | ||
block.html = markdown2.markdown(markdown) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.