-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_encrypted_parquet.py
executable file
·49 lines (39 loc) · 1.44 KB
/
read_encrypted_parquet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
# vim: set ft=python :
import datetime
from pathlib import Path
import pyarrow.dataset as ds
import pyarrow.parquet.encryption as pe
from kms_client import KmsClient, WrappingKeyId
PATH = Path("dataset")
KMS_INSTANCE_URL = "http://localhost:8001"
# KMS_ACCESS_TOKEN = None
# KMS_ACCESS_TOKEN = WrappingKeyId.INTERNAL
# KMS_ACCESS_TOKEN = WrappingKeyId.CONFIDENTIAL
KMS_ACCESS_TOKEN = WrappingKeyId.RESTRICTED
COLUMNS = [
"id", # minimum required privilege: none (plaintext)
"date_of_birth", # minimum required privilege: INTERNAL
"first_name", # minimum required privilege: CONFIDENTIAL
"last_name", # minimum required privilege: CONFIDENTIAL
"social_security_number", # minimum required privilege: RESTRICTED
]
def main():
format_ = ds.ParquetFileFormat(
default_fragment_scan_options=ds.ParquetFragmentScanOptions(
decryption_config=ds.ParquetDecryptionConfig(
pe.CryptoFactory(KmsClient),
pe.KmsConnectionConfig(
kms_instance_url=KMS_INSTANCE_URL,
key_access_token=KMS_ACCESS_TOKEN,
),
pe.DecryptionConfiguration(cache_lifetime=datetime.timedelta(minutes=1)),
)
)
)
dataset = ds.dataset(PATH, format=format_)
table = dataset.scanner(columns=COLUMNS).to_table()
for row in table.to_pylist():
print(row)
if __name__ == "__main__":
main()