-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path__init__.py
158 lines (122 loc) · 4.51 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from typing import Type, Iterator, Union, Callable, Any, List, Dict
import base64
import hashlib
from . import _cbrrr # type: ignore
CbrrrDecodeError = _cbrrr.CbrrrDecodeError
class CID:
"""
This class is very minimal, intended to support atproto use cases and not
much else.
"""
# fmt: off
CIDV1_DAG_CBOR_SHA256_32_PFX = b"\x01\x71\x12\x20"
CIDV1_RAW_SHA256_32_PFX = b"\x01\x55\x12\x20"
# fmt: on
__slots__ = ("cid_bytes",)
def __init__(self, cid_bytes: bytes) -> None:
"""
Expects raw byes, without a multibase prefix.
If you don't have raw bytes, you probably want CID.decode()
NOTE: No validation is performed here! You're responsible for ensuring
the CID has a format you recognise. the is_cidv1_dag_cbor_sha256_32()
and is_cidv1_raw_sha256_32() methods may be useful for this.
"""
self.cid_bytes = cid_bytes
@classmethod
def cidv1_dag_cbor_sha256_32_from(cls, data: bytes) -> "CID":
return cls(cls.CIDV1_DAG_CBOR_SHA256_32_PFX + hashlib.sha256(data).digest())
@classmethod
def cidv1_raw_sha256_32_from(cls, data: bytes) -> "CID":
return cls(cls.CIDV1_RAW_SHA256_32_PFX + hashlib.sha256(data).digest())
@classmethod
def decode(cls, data: Union[bytes, str]) -> "CID":
"""
Currently supported codecs: identity/raw, base32
"""
if isinstance(data, str):
data = data.encode()
if data.startswith(b"\x00"): # identity multibase codec
return cls(data[1:])
if data.startswith(b"b"): # base32 multibase codec
data = data[1:] # strip prefix
if data.endswith(b"="):
raise ValueError("unexpected base32 padding")
# add back correct amount of padding (python is fussy)
data += b"=" * ((-len(data)) % 8)
decoded = base64.b32decode(data, casefold=True)
return cls(decoded)
raise ValueError("I don't know how to decode this CID")
def encode(self, base="base32") -> str:
if base == "base32":
return "b" + base64.b32encode(self.cid_bytes).decode().lower().rstrip("=")
# this function might support other encodings in the future
raise ValueError("unsupported base encoding")
def is_cidv1_dag_cbor_sha256_32(self) -> bool:
return (
self.cid_bytes.startswith(self.CIDV1_DAG_CBOR_SHA256_32_PFX)
and len(self.cid_bytes) == 36
)
def is_cidv1_raw_sha256_32(self) -> bool:
return (
self.cid_bytes.startswith(self.CIDV1_RAW_SHA256_32_PFX)
and len(self.cid_bytes) == 36
)
def __bytes__(self):
return self.cid_bytes
def __repr__(self):
return f"CID({self.encode()})"
def __hash__(self) -> int:
return self.cid_bytes.__hash__()
def __eq__(self, __value: object) -> bool:
if not isinstance(__value, CID):
return False
return self.cid_bytes == __value.cid_bytes
# nb: | syntax not supported in <=py3.9
DagCborTypes = Union[str, bytes, int, bool, float, CID, List["DagCborTypes"], Dict[str, "DagCborTypes"], None]
def decode_dag_cbor(
data: bytes, atjson_mode: bool = False, cid_ctor: Callable[[bytes], Any] = CID
) -> DagCborTypes:
"""
Decode DAG-CBOR bytes into python objects.
If atjson_mode is True, bytes will be represented as {"$bytes": "b64..."},
and CIDs will be represented as {"$link": "b32..."}. Otherwise they'll
be represented as bytes objects, or CID classes, respectively.
"""
parsed, length = _cbrrr.decode_dag_cbor(data, cid_ctor, atjson_mode)
if length != len(data):
raise ValueError("did not parse to end of buffer")
return parsed
def decode_multi_dag_cbor_in_violation_of_the_spec(
data: bytes, atjson_mode: bool = False, cid_ctor: Callable[[bytes], Any] = CID
) -> Iterator[DagCborTypes]:
"""
https://ipld.io/specs/codecs/dag-cbor/spec/#strictness
"Encode and decode must operate on a single top-level CBOR object.
Back-to-back concatenated objects are not allowed or supported, as suggested
by section 5.1 of RFC 8949 for streaming applications."
"""
view = memoryview(data)
offset = 0
while offset < len(data):
parsed, length = _cbrrr.decode_dag_cbor(view[offset:], cid_ctor, atjson_mode)
yield parsed
offset += length
assert offset == len(data) # should never fail!
def encode_dag_cbor(
obj: DagCborTypes, atjson_mode: bool = False, cid_type: Type = CID
) -> bytes:
"""
Encode python objects to DAG-CBOR bytes.
If atjson_mode is True, dicts in the format {"$bytes": "b64..."} will be
encoded as CBOR bytes, and dicts in the format {"$link": "b32..."} will be
encoded as CIDs (CBOR tag value 42)
"""
return _cbrrr.encode_dag_cbor(obj, cid_type, atjson_mode)
__all__ = [
"CbrrrDecodeError",
"CID",
"DagCborTypes",
"decode_dag_cbor",
"decode_multi_dag_cbor_in_violation_of_the_spec",
"encode_dag_cbor",
]