Skip to content

Commit

Permalink
fix)bigquery): fixup query parsing for nested records
Browse files Browse the repository at this point in the history
An error in the test data led to us implementing the wrong parsing
logic. This fix should allow RECORDs containing other RECORDs to
properly get parsed.
  • Loading branch information
harshsaini authored and TheKevJames committed Jul 20, 2022
1 parent 08178ac commit 689888c
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 72 deletions.
10 changes: 0 additions & 10 deletions bigquery/gcloud/aio/bigquery/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,6 @@ def parse(field: Dict[str, Any], value: Any) -> Any:

if field['mode'] == 'REPEATED':
if field['type'] == 'RECORD':
# TODO: The [0] and all this special-casing is suspicious. Is there
# a case I'm missing with overly nested RECORDS, perhaps?
# I suspect this entire block can get reduced down to a single case
# and then inserted into the dict of Callables above.
if (len(field['fields']) == 1
and field['fields'][0]['type'] == 'RECORD'):
return [{f['name']: parse(f, xs)
for f in field['fields']}
for xs in flatten(value)]

return [{f['name']: parse(f, x)
for f, x in zip(field['fields'], xs)}
for xs in flatten(value)]
Expand Down
170 changes: 108 additions & 62 deletions bigquery/tests/unit/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ def test_parse_nullable(kind):
assert parse(field, None) is None


def test_query_response_to_dict():
fields = [
@pytest.mark.parametrize('fields,rows,expected', [
# collection of misc data
([
{'name': 'id', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'unixtime', 'type': 'INTEGER', 'mode': 'NULLABLE'},
{'name': 'isfakedata', 'type': 'BOOLEAN', 'mode': 'NULLABLE'},
Expand All @@ -92,19 +93,18 @@ def test_query_response_to_dict():
{'name': 'record', 'type': 'RECORD', 'mode': 'REQUIRED', 'fields': [
{'name': 'item', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'value', 'type': 'INTEGER', 'mode': 'NULLABLE'}]},
{'name': 'PARTITIONTIME', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'},
]
rows = [
{'name': 'PARTITIONTIME', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}],
[
{'f': [
{'v': 'ident1'},
{'v': '1654122422181'},
{'v': 'true'},
{'v': [{'v': {'f': [{'v': {'f': [{'v': 'apples'},
{'v': '1.23'}]}},
{'v': {'f': [{'v': 'oranges'},
{'v': '2.34'}]}}]}},
{'v': {'f': [{'v': {'f': [{'v': 'aardvarks'},
{'v': '9000.1'}]}}]}}]},
{'v': [
{'v': {'f': [{'v': [
{'v': {'f': [{'v': 'apples'}, {'v': '1.23'}]}},
{'v': {'f': [{'v': 'oranges'}, {'v': '2.34'}]}}]}]}},
{'v': {'f': [{'v': [
{'v': {'f': [{'v': 'aardvarks'}, {'v': '9000.1'}]}}]}]}}]},
{'v': [{'v': 'foo'}, {'v': 'bar'}]},
{'v': {'f': [{'v': 'slothtoes'}, {'v': 3}]}},
{'v': '1.6540416E9'}]},
Expand All @@ -115,64 +115,110 @@ def test_query_response_to_dict():
{'v': []},
{'v': [{'v': 'foo'}, {'v': 'bar'}]},
{'v': {'f': [{'v': 'slothtoes'}, {'v': 3}]}},
{'v': '1.6540416E9'}]},
]
expected = [
{
'PARTITIONTIME': datetime.datetime(2022, 6, 1, 0, 0, tzinfo=utc),
'id': 'ident1',
'isfakedata': True,
'nested': [
{
'nestedagain': [
{
'item': 'apples',
'value': 1.23,
},
{
'item': 'oranges',
'value': 2.34,
},
],
},
{
'nestedagain': [
{
'item': 'aardvarks',
'value': 9000.1,
},
],
}
],
'record': {
'item': 'slothtoes',
'value': 3,
},
'repeated': ['foo', 'bar'],
'unixtime': 1654122422181,
},
{
'PARTITIONTIME': datetime.datetime(2022, 6, 1, 0, 0, tzinfo=utc),
'id': 'ident2',
'isfakedata': False,
'nested': [],
'record': {
'item': 'slothtoes',
'value': 3,
},
'repeated': ['foo', 'bar'],
'unixtime': 1654122422181,
},
]

{'v': '1.6540416E9'}]}],
[{
'PARTITIONTIME': datetime.datetime(2022, 6, 1, 0, 0, tzinfo=utc),
'id': 'ident1',
'isfakedata': True,
'nested': [
{
'nestedagain': [
{'item': 'apples', 'value': 1.23},
{'item': 'oranges', 'value': 2.34},
],
},
{
'nestedagain': [
{'item': 'aardvarks', 'value': 9000.1},
],
}
],
'record': {'item': 'slothtoes', 'value': 3},
'repeated': ['foo', 'bar'],
'unixtime': 1654122422181,
}, {
'PARTITIONTIME': datetime.datetime(2022, 6, 1, 0, 0, tzinfo=utc),
'id': 'ident2',
'isfakedata': False,
'nested': [],
'record': {'item': 'slothtoes', 'value': 3},
'repeated': ['foo', 'bar'],
'unixtime': 1654122422181}],
),
# double-nested RECORDs
([{
'name': 'paragraph',
'type': 'RECORD',
'mode': 'REPEATED',
'fields': [
{
'name': 'sentence',
'type': 'RECORD',
'mode': 'REPEATED',
'fields': [
{
'name': 'word',
'type': 'STRING',
'mode': 'NULLABLE'
},
{
'name': 'timestamp',
'type': 'FLOAT',
'mode': 'NULLABLE'
}]}]}],
[{'f': [{'v': [
{'v': {'f': [{'v': [{'v': {'f': [{'v': 'hello'},
{'v': '2.34'}]}}]}]}},
{'v': {'f': [{'v': [{'v': {'f': [{'v': 'hey'},
{'v': '5.22'}]}}]}]}},
{'v': {'f': [{'v': [{'v': {'f': [{'v': "I'm"},
{'v': '7.86'}]}},
{'v': {'f': [{'v': 'good'},
{'v': '8.31'}]}},
{'v': {'f': [{'v': "I'm"},
{'v': '8.46'}]}},
{'v': {'f': [{'v': 'very'},
{'v': '8.76'}]}},
{'v': {'f': [{'v': 'caffeinated'},
{'v': '9.45'}]}},
{'v': {'f': [{'v': 'this'},
{'v': '9.66'}]}},
{'v': {'f': [{'v': 'morning'},
{'v': '10.05'}]}},
{'v': {'f': [{'v': 'how'},
{'v': '10.92'}]}},
{'v': {'f': [{'v': 'are'},
{'v': '11.04'}]}},
{'v': {'f': [{'v': 'you'},
{'v': '11.13'}]}},
{'v': {'f': [{'v': 'doing'},
{'v': '11.4'}]}}]}]}}]}]}],
[{'paragraph': [{
'sentence': [{'word': 'hello', 'timestamp': 2.34}]}, {
'sentence': [{'word': 'hey', 'timestamp': 5.22}]}, {
'sentence': [{'word': "I'm", 'timestamp': 7.86},
{'word': 'good', 'timestamp': 8.31},
{'word': "I'm", 'timestamp': 8.46},
{'word': 'very', 'timestamp': 8.76},
{'word': 'caffeinated', 'timestamp': 9.45},
{'word': 'this', 'timestamp': 9.66},
{'word': 'morning', 'timestamp': 10.05},
{'word': 'how', 'timestamp': 10.92},
{'word': 'are', 'timestamp': 11.04},
{'word': 'you', 'timestamp': 11.13},
{'word': 'doing', 'timestamp': 11.4}]}]}],
),
])
def test_query_response_to_dict(fields, rows, expected):
resp = {
'kind': 'bigquery#queryResponse',
'schema': {'fields': fields},
'jobReference': {'projectId': 'sample-project',
'jobId': 'job_Tlpl-66ca7a8e365a28084c39ffc52d402671',
'location': 'US'},
'rows': rows,
'totalRows': '2',
'totalRows': str(len(rows)),
'totalBytesProcessed': '0',
'jobComplete': True,
'cacheHit': True,
Expand Down

0 comments on commit 689888c

Please sign in to comment.