Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MIN-2432] Enum schema evolution Python fork #1

Draft
wants to merge 18 commits into
base: json_unknown_enum_fork_baseline
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/google/protobuf/internal/json_format_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,41 @@ def testParseEnumValue(self):
'for enum type protobuf_unittest.TestAllTypes.NestedEnum.',
json_format.Parse, '{"optionalNestedEnum": 12345}', message)

def testParseUnknownEnumStringValueProto2(self):
message = json_format_pb2.TestNumbers()
# Note that in src/google/protobuf/util/json_format.proto::TestNumbers
# has a field 'optional MyType a = 1', which is different
# from the test below that are using
# src/google/protobuf/util/json_format_proto3.proto::TestMessage.
text = '{"a": "UNKNOWN_STRING_VALUE"}'
json_format.Parse(text, message, ignore_unknown_fields=True)
# In proto2 we can see that the field was not set at all.
self.assertFalse(message.HasField("a"))

def testParseErrorCorrectCatchForUnknownEnumValue(self):
message = json_format_pb2.TestNumbers()
self.assertRaisesRegexp(
json_format.ParseError,
'Invalid enum value',
json_format.Parse, '{"a": "UNKNOWN_STRING_VALUE"}', message)

def testParseUnknownEnumStringValueProto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"enumValue": "UNKNOWN_STRING_VALUE"}'
json_format.Parse(text, message, ignore_unknown_fields=True)
# In proto3, there is no difference between the default value and 0.
self.assertEqual(message.enum_value, 0)

# Note that in noom-contracts we banned the repeated enum fields, so this test
# is not adding value for Noom's use of this library.
# Still, we are testing that the behavior here is consistent with what
# binary serialization would do.
def testParseUnknownEnumStringValueRepeatedProto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertEquals(len(message.repeated_enum_value), 2)

def testBytes(self):
message = json_format_proto3_pb2.TestMessage()
# Test url base64
Expand Down
93 changes: 78 additions & 15 deletions python/google/protobuf/json_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,16 @@
__author__ = 'jieluo@google.com (Jie Luo)'


import ctypes
import base64
from collections import OrderedDict
import json
import math
from operator import methodcaller
import re
import sys
from contextlib import contextmanager

from google.protobuf.internal import type_checkers
from google.protobuf import descriptor
from google.protobuf import symbol_database

Expand All @@ -75,6 +76,35 @@
_VALID_EXTENSION_NAME = re.compile(r'\[[a-zA-Z0-9\._]*\]$')


# NOTE(anton): copied from internal/type_checkers.py to avoid having json_format.py depend on internal interface of the
# protobuf library.
# Depending on an internal protobuf interface can hurt us since we are forking this file in noom-contracts, and the
# version of the protobuf library used on the client is under client's control.
# The other two imports in this file (descriptor and symbol_database) are part of the protobuf public interface, and
# there is less risk that this will change under us.
def TruncateToFourByteFloat(original):
return ctypes.c_float(original).value


# NOTE(anton): same comment from above TruncateToFourByteFloat applies here.
def ToShortestFloat(original):
"""Returns the shortest float that has same value in wire."""
# All 4 byte floats have between 6 and 9 significant digits, so we
# start with 6 as the lower bound.
# It has to be iterative because use '.9g' directly can not get rid
# of the noises for most values. For example if set a float_field=0.9
# use '.9g' will print 0.899999976.
precision = 6
rounded = float('{0:.{1}g}'.format(original, precision))
while TruncateToFourByteFloat(rounded) != original:
precision += 1
rounded = float('{0:.{1}g}'.format(original, precision))
return rounded

# NOTE(anton): same comment from above TruncateToFourByteFloat applies here.
_FLOAT_MAX = float.fromhex('0x1.fffffep+127')
_FLOAT_MIN = -_FLOAT_MAX

class Error(Exception):
"""Top-level module error for json_format."""

Expand All @@ -87,6 +117,31 @@ class ParseError(Error):
"""Thrown in case of parsing error."""


class _UnknownEnumStringValueParseError(ParseError):
"""
Thrown if an unknown enum string value is encountered.
We inherit ParseError to avoid breaking the code that might expect ParseError.
"""


@contextmanager
def _MaybeSuppressUnknownEnumStringValueParseError(should_suppress: bool):
"""
This context manager will suppress _UnknownEnumStringValueParseError if the should_suppress is set to true. Usage:

with _MaybeSuppressUnknownEnumStringValueParseError(True):
run_some_code_that_might_throw()
"""
try:
yield
except _UnknownEnumStringValueParseError as ex:
if should_suppress:
# Here we suppress _UnknownEnumStringValueParseError.
pass
else:
raise


def MessageToJson(
message,
including_default_value_fields=False,
Expand Down Expand Up @@ -308,7 +363,7 @@ def _FieldToJsonObject(self, field, value):
if self.float_format:
return float(format(value, self.float_format))
else:
return type_checkers.ToShortestFloat(value)
return ToShortestFloat(value)

return value

Expand Down Expand Up @@ -569,8 +624,9 @@ def _ConvertFieldValuePair(self, js, message):
if item is None:
raise ParseError('null is not allowed to be used as an element'
' in a repeated field.')
getattr(message, field.name).append(
_ConvertScalarFieldValue(item, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
getattr(message, field.name).append(
_ConvertScalarFieldValue(item, field))
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
if field.is_extension:
sub_message = message.Extensions[field]
Expand All @@ -579,10 +635,11 @@ def _ConvertFieldValuePair(self, js, message):
sub_message.SetInParent()
self.ConvertMessage(value, sub_message)
else:
if field.is_extension:
message.Extensions[field] = _ConvertScalarFieldValue(value, field)
else:
setattr(message, field.name, _ConvertScalarFieldValue(value, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
if field.is_extension:
message.Extensions[field] = _ConvertScalarFieldValue(value, field)
else:
setattr(message, field.name, _ConvertScalarFieldValue(value, field))
except ParseError as e:
if field and field.containing_oneof is None:
raise ParseError('Failed to parse {0} field: {1}.'.format(name, e))
Expand Down Expand Up @@ -669,7 +726,8 @@ def _ConvertStructMessage(self, value, message):
def _ConvertWrapperMessage(self, value, message):
"""Convert a JSON representation into Wrapper message."""
field = message.DESCRIPTOR.fields_by_name['value']
setattr(message, 'value', _ConvertScalarFieldValue(value, field))
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
setattr(message, 'value', _ConvertScalarFieldValue(value, field))

def _ConvertMapFieldValue(self, value, message, field):
"""Convert map field value for a message map field.
Expand All @@ -689,13 +747,15 @@ def _ConvertMapFieldValue(self, value, message, field):
key_field = field.message_type.fields_by_name['key']
value_field = field.message_type.fields_by_name['value']
for key in value:
key_value = _ConvertScalarFieldValue(key, key_field, True)
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
key_value = _ConvertScalarFieldValue(key, key_field, True)
if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
self.ConvertMessage(value[key], getattr(
message, field.name)[key_value])
else:
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field)
with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields):
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field)


def _ConvertScalarFieldValue(value, field, require_str=False):
Expand All @@ -711,6 +771,7 @@ def _ConvertScalarFieldValue(value, field, require_str=False):

Raises:
ParseError: In case of convert problems.
_UnknownEnumStringValueParseError: If unknown enum string value is encountered during parsing.
"""
if field.cpp_type in _INT_TYPES:
return _ConvertInteger(value)
Expand Down Expand Up @@ -741,7 +802,9 @@ def _ConvertScalarFieldValue(value, field, require_str=False):
number = int(value)
enum_value = field.enum_type.values_by_number.get(number, None)
except ValueError:
raise ParseError('Invalid enum value {0} for enum type {1}.'.format(
# The ValueError will be raised by the conversion to int.
# That means that here we know that we have an unknown enum string value.
raise _UnknownEnumStringValueParseError('Invalid enum value {0} for enum type {1}.'.format(
value, field.enum_type.full_name))
if enum_value is None:
if field.file.syntax == 'proto3':
Expand Down Expand Up @@ -791,10 +854,10 @@ def _ConvertFloat(value, field):
'use quoted "-Infinity" instead.')
if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:
# pylint: disable=protected-access
if value > type_checkers._FLOAT_MAX:
if value > _FLOAT_MAX:
raise ParseError('Float value too large')
# pylint: disable=protected-access
if value < type_checkers._FLOAT_MIN:
if value < _FLOAT_MIN:
raise ParseError('Float value too small')
if value == 'nan':
raise ParseError('Couldn\'t parse float "nan", use "NaN" instead.')
Expand Down
4 changes: 2 additions & 2 deletions tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ build_python() {
if [ $(uname -s) == "Linux" ]; then
envlist=py\{35,36\}-python
else
envlist=py\{36\}-python
envlist=py\{39\}-python
fi
python -m tox -e $envlist
cd ..
Expand Down Expand Up @@ -376,7 +376,7 @@ build_python_cpp() {
if [ $(uname -s) == "Linux" ]; then
envlist=py\{35,36\}-cpp
else
envlist=py\{36\}-cpp
envlist=py\{39\}-cpp
fi
tox -e $envlist
cd ..
Expand Down