Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPF-786: implement use full widget name #787

Merged
merged 6 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions PyPDFForm/middleware/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(

super().__init__()
self._name = name
self.full_name = None
self._value = value
self.desc = None

Expand Down
21 changes: 19 additions & 2 deletions PyPDFForm/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from reportlab.pdfbase.pdfmetrics import stringWidth

from .constants import (COMB, DEFAULT_FONT_SIZE, MULTILINE, NEW_LINE_SYMBOL,
WIDGET_TYPES, Annots, MaxLen, Rect)
WIDGET_TYPES, Annots, MaxLen, Rect, Parent, T)
from .font import (adjust_paragraph_font_size, adjust_text_field_font_size,
auto_detect_font, get_text_field_font_color,
get_text_field_font_size, text_field_font_size)
Expand Down Expand Up @@ -43,7 +43,7 @@ def set_character_x_paddings(
return widgets


def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
def build_widgets(pdf_stream: bytes, use_full_widget_name: bool) -> Dict[str, WIDGET_TYPES]:
"""Builds a widget dict given a PDF form stream."""

results = {}
Expand All @@ -53,6 +53,7 @@ def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
key = get_widget_key(widget)
_widget = construct_widget(widget, key)
if _widget is not None:
_widget.full_name = get_widget_full_key(widget)
_widget.desc = get_widget_description(widget)
if isinstance(_widget, Text):
_widget.max_length = get_text_field_max_length(widget)
Expand All @@ -73,6 +74,8 @@ def build_widgets(pdf_stream: bytes) -> Dict[str, WIDGET_TYPES]:
continue

results[key] = _widget
if _widget.full_name is not None and use_full_widget_name:
results[_widget.full_name] = results[key]
return results


Expand Down Expand Up @@ -190,6 +193,20 @@ def get_widget_key(widget: dict) -> Union[str, list, None]:
return result


def get_widget_full_key(widget: dict) -> Union[str, None]:
"""
Returns a PDF widget's full annotated key by prepending its
parent widget's key.
"""

key = get_widget_key(widget)

if Parent in widget and T in widget[Parent].get_object() and widget[Parent][T] != key:
return f"{widget[Parent][T]}.{key}"

return None


def get_widget_alignment(widget: dict) -> Union[str, list, None]:
"""Finds a PDF widget's alignment by pattern matching."""

Expand Down
12 changes: 10 additions & 2 deletions PyPDFForm/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def fill(
) -> FormWrapper:
"""Fills a PDF form."""

widgets = build_widgets(self.stream) if self.stream else {}
widgets = build_widgets(self.stream, False) if self.stream else {}

for key, value in data.items():
if key in widgets:
Expand Down Expand Up @@ -86,13 +86,15 @@ def __init__(
self.global_font_size = kwargs.get("global_font_size")
self.global_font_color = kwargs.get("global_font_color")

self.use_full_widget_name = kwargs.get("use_full_widget_name", False)

self._init_helper()

def _init_helper(self, key_to_refresh: str = None) -> None:
"""Updates all attributes when the state of the PDF stream changes."""

refresh_not_needed = {}
new_widgets = build_widgets(self.read()) if self.read() else {}
new_widgets = build_widgets(self.read(), self.use_full_widget_name) if self.read() else {}
for k, v in self.widgets.items():
if k in new_widgets:
new_widgets[k] = v
Expand Down Expand Up @@ -254,6 +256,9 @@ def update_widget_key(
) -> PdfWrapper:
"""Updates the key of an existed widget on a PDF form."""

if self.use_full_widget_name:
raise NotImplementedError

if defer:
self._keys_to_update.append((old_key, new_key, index))
return self
Expand All @@ -268,6 +273,9 @@ def update_widget_key(
def commit_widget_key_updates(self) -> PdfWrapper:
"""Commits all deferred widget key updates on a PDF form."""

if self.use_full_widget_name:
raise NotImplementedError

old_keys = [each[0] for each in self._keys_to_update]
new_keys = [each[1] for each in self._keys_to_update]
indices = [each[2] for each in self._keys_to_update]
Expand Down
19 changes: 19 additions & 0 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,25 @@ with open("sample_template.pdf", "rb+") as template:
This adaptation is universal across all APIs of PyPDFForm. So in later sections of the documentation whenever you see
a function parameter that's a file path you can safely switch them for a file object or file stream.

## Use full widget name in PDF wrapper (beta)

**NOTE:** This is a beta feature, meaning it still needs to be tested against more PDF forms and may not work for
some of them.

According to section 12.7.3.2 found on page 434 of [the PDF standard](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf), each PDF form widget can have a fully qualified name that is not explicitly defined but can be constructed following the pattern `<parent_widget_name>.<widget_name>`.

PyPDFForm supports accessing widgets through their full names by simply setting the optional parameter `use_full_widget_name` to `True` when a `PdfWrapper` object is instantiated. Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/sample_template_with_full_key.pdf):

```python
from PyPDFForm import PdfWrapper

pdf = PdfWrapper("sample_template_with_full_key.pdf", use_full_widget_name=True)
```

The checkbox widget on the second page with texts `Gain de 2 classes` has a partial name of `0` and a full name of `Gain de 2 classes.0`. By constructing the object like above, you can access the same checkbox through both the partial name and the full name.

**NOTE:** Because each full widget name involves both the widget itself and its parent widget, the methods `update_widget_key` and `commit_widget_key_updates` are disabled and will raise a `NotImplementedError` when invoked through an object that uses full widget names.

## Write to a file

Lastly, `PdfWrapper` also implements itself similar to an open file object. So you can write the PDF it holds to another
Expand Down
Binary file added pdf_samples/sample_template_with_full_key.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ def sample_template_with_image_field(pdf_samples):
return f.read()


@pytest.fixture
def sample_template_with_full_key(pdf_samples):
with open(
os.path.join(pdf_samples, "sample_template_with_full_key.pdf"), "rb+"
) as f:
return f.read()


@pytest.fixture
def dropdown_alignment(pdf_samples):
with open(
Expand Down
52 changes: 52 additions & 0 deletions tests/test_use_full_widget_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-

import pytest

from PyPDFForm import PdfWrapper


def test_init(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.widgets
assert obj.widgets["Gain de 2 classes.0"] is obj.widgets["0"]


def test_sample_data(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.sample_data
assert obj.sample_data["Gain de 2 classes.0"] == obj.sample_data["0"]


def test_fill(sample_template_with_full_key):
obj_1 = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
obj_2 = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

assert obj_1.fill(
{
"Gain de 2 classes.0": True
}
).read() == obj_2.fill(
{
"0": True
}
).read()


def test_update_widget_key(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

with pytest.raises(NotImplementedError):
obj.update_widget_key("0", "foo")


def test_commit_widget_key_updates(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)

with pytest.raises(NotImplementedError):
obj.commit_widget_key_updates()


def test_schema(sample_template_with_full_key):
obj = PdfWrapper(sample_template_with_full_key, use_full_widget_name=True)
assert "Gain de 2 classes.0" in obj.schema["properties"]
assert obj.schema["properties"]["Gain de 2 classes.0"] == obj.schema["properties"]["0"]
Loading