Skip to content

Commit

Permalink
Pick a pickle protocol
Browse files Browse the repository at this point in the history
The compiler will emit pickles using either MAX_PROTOCOL or 0, depending on which has the shorter repr.
  • Loading branch information
gilch committed Aug 30, 2024
1 parent dddfbfe commit 297ea5d
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 130 deletions.
38 changes: 8 additions & 30 deletions docs/lissp_whirlwind_tour.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1926,19 +1926,8 @@ Lissp Whirlwind Tour
;; only meant for use at read time, but they're allowed to survive to
;; run time for debugging purposes.
#> spam=eggs
>>> __import__('pickle').loads( # Kwarg('spam', 'eggs')
... b'ccopyreg\n'
... b'_reconstructor\n'
... b'(chissp.reader\n'
... b'Kwarg\n'
... b'cbuiltins\n'
... b'object\n'
... b'NtR(dVk\n'
... b'Vspam\n'
... b'sVv\n'
... b'Veggs\n'
... b'sb.'
... )
>>> # Kwarg('spam', 'eggs')
... __import__('pickle').loads(b'ccopy_reg\n_reconstructor\n(chissp.reader\nKwarg\nc__builtin__\nobject\nNtR(dVk\nVspam\nsVv\nVeggs\nsb.')
Kwarg('spam', 'eggs')


Expand Down Expand Up @@ -1968,10 +1957,8 @@ Lissp Whirlwind Tour
;;; the compiler is in a pickle!

#> builtins..float#inf
>>> __import__('pickle').loads( # inf
... b'Finf\n'
... b'.'
... )
>>> # inf
... __import__('pickle').loads(b'Finf\n.')
inf


Expand Down Expand Up @@ -2003,12 +1990,8 @@ Lissp Whirlwind Tour
Fraction(1, 2)

#> .#(fractions..Fraction 1 2) ;Read time eval. Compiles to equivalent object.
>>> __import__('pickle').loads( # Fraction(1, 2)
... b'cfractions\n'
... b'Fraction\n'
... b'(V1/2\n'
... b'tR.'
... )
>>> # Fraction(1, 2)
... __import__('pickle').loads(b'cfractions\nFraction\n(V1/2\ntR.')
Fraction(1, 2)


Expand Down Expand Up @@ -2057,11 +2040,6 @@ Lissp Whirlwind Tour
"(re.compile('[1-9][0-9]*'), inf)"

#> re..compile#.#"[1-9][0-9]*"
>>> __import__('pickle').loads( # re.compile('[1-9][0-9]*')
... b'cre\n'
... b'_compile\n'
... b'(V[1-9][0-9]*\n'
... b'I32\n'
... b'tR.'
... )
>>> # re.compile('[1-9][0-9]*')
... __import__('pickle').loads(b'cre\n_compile\n(V[1-9][0-9]*\nI32\ntR.')
re.compile('[1-9][0-9]*')
16 changes: 4 additions & 12 deletions docs/macro_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3219,12 +3219,8 @@ you can already use `decimal.Decimal` as a reader macro:
#> (mul decimal..Decimal#|.2| 3)
>>> mul(
... __import__('pickle').loads( # Decimal('0.2')
... b'cdecimal\n'
... b'Decimal\n'
... b'(V0.2\n'
... b'tR.'
... ),
... # Decimal('0.2')
... __import__('pickle').loads(b'cdecimal\nDecimal\n(V0.2\ntR.'),
... (3))
Decimal('0.6')
Expand Down Expand Up @@ -3261,12 +3257,8 @@ but this isn't always a good idea.
.. code-block:: REPL
#> decimal..Decimal#.2
>>> __import__('pickle').loads( # Decimal('0.200000000000000011102230246251565404236316680908203125')
... b'cdecimal\n'
... b'Decimal\n'
... b'(V0.200000000000000011102230246251565404236316680908203125\n'
... b'tR.'
... )
>>> # Decimal('0.200000000000000011102230246251565404236316680908203125')
... __import__('pickle').loads(b'cdecimal\nDecimal\n(V0.200000000000000011102230246251565404236316680908203125\ntR.')
Decimal('0.200000000000000011102230246251565404236316680908203125')
There's no bug in Decimal.
Expand Down
87 changes: 23 additions & 64 deletions docs/primer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1214,12 +1214,8 @@ How about these?
[[], [], []]
#> .#.#"[[]]*3" ; Injects a list object.
>>> __import__('pickle').loads( # [[], [], []]
... b'(l(lp0\n'
... b'ag0\n'
... b'ag0\n'
... b'a.'
... )
>>> # [[], [], []]
... __import__('pickle').loads(b'(l(lp0\nag0\nag0\na.')
[[], [], []]
Surprised?
Expand All @@ -1233,7 +1229,7 @@ Let's check.
>>> eval(_)
[[], [], []]
>>> readerless([[]]*3)
"__import__('pickle').loads( # [[], [], []]\n b'(l(lp0\\n'\n b'ag0\\n'\n b'ag0\\n'\n b'a.'\n)"
"# [[], [], []]\n__import__('pickle').loads(b'(l(lp0\\nag0\\nag0\\na.')"
>>> eval(_)
[[], [], []]

Expand Down Expand Up @@ -1261,12 +1257,8 @@ Well, what *should* it compile to?
[[7], [], []]
#> .#.#"[[]]*3"
>>> __import__('pickle').loads( # [[], [], []]
... b'(l(lp0\n'
... b'ag0\n'
... b'ag0\n'
... b'a.'
... )
>>> # [[], [], []]
... __import__('pickle').loads(b'(l(lp0\nag0\nag0\na.')
[[], [], []]
#> (.append (operator..getitem _ 0) 7)
Expand Down Expand Up @@ -1331,13 +1323,10 @@ How can the Hissp compiler generate Python code from this tuple?
Let's see what it's doing.

>>> readerless((print,1,2,3,':','sep',':'))
"__import__('pickle').loads( # <built-in function print>\n b'cbuiltins\\n'\n b'print\\n'\n b'.'\n)(\n (1),\n (2),\n (3),\n sep=':')"
"# <built-in function print>\n__import__('pickle').loads(b'c__builtin__\\nprint\\n.')(\n (1),\n (2),\n (3),\n sep=':')"
>>> print(_)
__import__('pickle').loads( # <built-in function print>
b'cbuiltins\n'
b'print\n'
b'.'
)(
# <built-in function print>
__import__('pickle').loads(b'c__builtin__\nprint\n.')(
(1),
(2),
(3),
Expand All @@ -1358,11 +1347,8 @@ but if we had injected it instead,
.. code-block:: REPL
#> (.#print 1 2 3 : sep :)
>>> __import__('pickle').loads( # <built-in function print>
... b'cbuiltins\n'
... b'print\n'
... b'.'
... )(
>>> # <built-in function print>
... __import__('pickle').loads(b'c__builtin__\nprint\n.')(
... (1),
... (2),
... (3),
Expand All @@ -1376,12 +1362,8 @@ Many other object types work.
.. code-block:: REPL
#> .#(fractions..Fraction 1 2)
>>> __import__('pickle').loads( # Fraction(1, 2)
... b'cfractions\n'
... b'Fraction\n'
... b'(V1/2\n'
... b'tR.'
... )
>>> # Fraction(1, 2)
... __import__('pickle').loads(b'cfractions\nFraction\n(V1/2\ntR.')
Fraction(1, 2)
Unfortunately, there are some objects even pickle can't handle.
Expand Down Expand Up @@ -1416,10 +1398,8 @@ and the reader embeds the resulting object into the output Hissp:
.. code-block:: REPL
#> builtins..float#inf
>>> __import__('pickle').loads( # inf
... b'Finf\n'
... b'.'
... )
>>> # inf
... __import__('pickle').loads(b'Finf\n.')
inf
This inserts an actual `float` object at `read time` into the Hissp code.
Expand All @@ -1429,16 +1409,14 @@ It's the same as using inject like this
.. code-block:: REPL
#> .#(float 'inf)
>>> __import__('pickle').loads( # inf
... b'Finf\n'
... b'.'
... )
>>> # inf
... __import__('pickle').loads(b'Finf\n.')
inf
Or readerless mode like this

>>> readerless(float('inf'))
"__import__('pickle').loads( # inf\n b'Finf\\n'\n b'.'\n)"
"# inf\n__import__('pickle').loads(b'Finf\\n.')"

A float is neither a `str` nor a `tuple`,
so Hissp tries its best to compile this as data representing itself,
Expand Down Expand Up @@ -1487,21 +1465,13 @@ You indicate how many with the number of trailing ``#``\ s.
.. code-block:: REPL
#> fractions..Fraction#|2/3| ; Two thirds.
>>> __import__('pickle').loads( # Fraction(2, 3)
... b'cfractions\n'
... b'Fraction\n'
... b'(V2/3\n'
... b'tR.'
... )
>>> # Fraction(2, 3)
... __import__('pickle').loads(b'cfractions\nFraction\n(V2/3\ntR.')
Fraction(2, 3)
#> fractions..Fraction## 2 3 ; Notice the extra #.
>>> __import__('pickle').loads( # Fraction(2, 3)
... b'cfractions\n'
... b'Fraction\n'
... b'(V2/3\n'
... b'tR.'
... )
>>> # Fraction(2, 3)
... __import__('pickle').loads(b'cfractions\nFraction\n(V2/3\ntR.')
Fraction(2, 3)
Reader tags may also take keyword arguments,
Expand Down Expand Up @@ -1533,19 +1503,8 @@ If you see one of these, make sure you used enough ``#``\ s on your tag.
.. code-block:: REPL
#> base=6
>>> __import__('pickle').loads( # Kwarg('base', 6)
... b'ccopyreg\n'
... b'_reconstructor\n'
... b'(chissp.reader\n'
... b'Kwarg\n'
... b'cbuiltins\n'
... b'object\n'
... b'NtR(dVk\n'
... b'Vbase\n'
... b'sVv\n'
... b'I6\n'
... b'sb.'
... )
>>> # Kwarg('base', 6)
... __import__('pickle').loads(b'ccopy_reg\n_reconstructor\n(chissp.reader\nKwarg\nc__builtin__\nobject\nNtR(dVk\nVbase\nsVv\nI6\nsb.')
Kwarg('base', 6)
The special kwarg tokens ``*=`` and ``**=`` unpack the argument at that position,
Expand Down
38 changes: 18 additions & 20 deletions src/hissp/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
`readerless` uses this automatically.
"""

MAX_PROTOCOL = pickle.HIGHEST_PROTOCOL
"""
When there is no known literal syntax for an `atom`,
the compiler emits a `pickle.loads` expression as a fallback.
This is the highest pickle protocol it's allowed to use.
The compiler may use Protocol 0 instead when it has a shorter repr,
due to the inefficient escapes required for non-printing bytes.
"""

@contextmanager
def macro_context(ns):
Expand Down Expand Up @@ -518,28 +526,19 @@ def atomic(self, form) -> str:
>>> readerless(-4.2j)
'((-0-4.2j))'
>>> print(readerless(float('nan')))
__import__('pickle').loads( # nan
b'Fnan\n'
b'.'
)
# nan
__import__('pickle').loads(b'Fnan\n.')
>>> readerless([{'foo':2},(),1j,2.0,{3}])
"[{'foo': 2}, (), 1j, 2.0, {3}]"
>>> spam = []
>>> spam.append(spam) # ref cycle can't be a literal
>>> print(readerless(spam))
__import__('pickle').loads( # [[...]]
b'(lp0\n'
b'g0\n'
b'a.'
)
# [[...]]
__import__('pickle').loads(b'(lp0\ng0\na.')
>>> spam = [[]] * 3 # duplicated refs
>>> print(readerless(spam))
__import__('pickle').loads( # [[], [], []]
b'(l(lp0\n'
b'ag0\n'
b'ag0\n'
b'a.'
)
# [[], [], []]
__import__('pickle').loads(b'(l(lp0\nag0\nag0\na.')
"""
if form is Ellipsis:
Expand Down Expand Up @@ -580,12 +579,11 @@ def _try_eval(literal):
@_trace
def pickle(self, form) -> str:
"""Compile to `pickle.loads`. The final fallback for :meth:`atom`."""
# 0 is the "human-readable" backwards-compatible text protocol.
dumps = pickletools.optimize(pickle.dumps(form, 0, fix_imports=False))
dumps = "\n ".join(f"{b!r}" for b in dumps.splitlines(keepends=True))
protocols = 0, MAX_PROTOCOL
pickles = (repr(pickletools.optimize(pickle.dumps(form, p))) for p in protocols)
code = min(pickles, key=len)
r = repr(form).replace("\n", "\n # ")
nl = "\n" if "\n" in r else ""
return f"__import__({pickle.__name__!r}).loads({nl} # {r}\n {dumps}\n)"
return f"# {r}\n__import__({pickle.__name__!r}).loads({code})"

@staticmethod
def linenos(form):
Expand Down
6 changes: 2 additions & 4 deletions tests/test_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,8 @@ def test_repl_empty_reader_macro_error():
call_response(
"> #> ", "< builtins..float#\n",
"> #..", "< inf\n",
"! >>> __import__('pickle').loads( # inf\n",
"! ... b'Finf\\n'\n",
"! ... b'.'\n",
"! ... )\n",
"! >>> # inf\n",
"! ... __import__('pickle').loads(b'Finf\\n.')\n",
"> inf\n",
"> #> ", "< (builtins..float#)\n",
'! File "<console>", line 1\n',
Expand Down

0 comments on commit 297ea5d

Please sign in to comment.