From 2870d2fe1defee6908da96fdb331044cc9427ce6 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 19 Apr 2022 13:22:22 +0200 Subject: [PATCH 01/13] empty commit From d7611bd01d0149bfef555d29e3a98b2fa5dcde70 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 19 Apr 2022 13:25:25 +0200 Subject: [PATCH 02/13] Revert "Revert "Use python3.9 which is available on brew to run tests"" This reverts commit bd28ee1c5ce04e25584e072e5e3e36990c2e2cb8. --- tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests.sh b/tests.sh index 71635f9f7f4d8..bce56b4e39c66 100755 --- a/tests.sh +++ b/tests.sh @@ -326,7 +326,7 @@ build_python() { if [ $(uname -s) == "Linux" ]; then envlist=py\{35,36\}-python else - envlist=py\{36\}-python + envlist=py\{39\}-python fi python -m tox -e $envlist cd .. @@ -376,7 +376,7 @@ build_python_cpp() { if [ $(uname -s) == "Linux" ]; then envlist=py\{35,36\}-cpp else - envlist=py\{36\}-cpp + envlist=py\{39\}-cpp fi tox -e $envlist cd .. From e36ea59917ad0bfb499f3066ef477c1899259bd8 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 19 Apr 2022 14:47:01 +0200 Subject: [PATCH 03/13] make it standalone --- python/google/protobuf/json_format.py | 39 ++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index b3e85933e9609..a391f204473b6 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -43,6 +43,7 @@ __author__ = 'jieluo@google.com (Jie Luo)' +import ctypes import base64 from collections import OrderedDict import json @@ -51,7 +52,6 @@ import re import sys -from google.protobuf.internal import type_checkers from google.protobuf import descriptor from google.protobuf import symbol_database @@ -75,6 +75,37 @@ _VALID_EXTENSION_NAME = re.compile(r'\[[a-zA-Z0-9\._]*\]$') +# NOTE(anton): copied from internal/type_checkers.py to avoid having json_format.py depend on internal interface of the +# protobuf +# library. +# Depending on an internal protobuf interface can hurt us since we are forking this file in noom-contracts, and the +# version of the protobuf libary used on the client is under client's control. +# The other two imports in this file (descriptor and symbol_database) are part of the protobuf public interface, and +# there is less risk that this will change under us. +def TruncateToFourByteFloat(original): + return ctypes.c_float(original).value + + +# NOTE(anton): same comment from above TruncateToFourByteFloat applies here. +def ToShortestFloat(original): + """Returns the shortest float that has same value in wire.""" + # All 4 byte floats have between 6 and 9 significant digits, so we + # start with 6 as the lower bound. + # It has to be iterative because use '.9g' directly can not get rid + # of the noises for most values. For example if set a float_field=0.9 + # use '.9g' will print 0.899999976. + precision = 6 + rounded = float('{0:.{1}g}'.format(original, precision)) + while TruncateToFourByteFloat(rounded) != original: + precision += 1 + rounded = float('{0:.{1}g}'.format(original, precision)) + return rounded + +# NOTE(anton): same comment from above TruncateToFourByteFloat applies here. +_FLOAT_MAX = float.fromhex('0x1.fffffep+127') +_FLOAT_MIN = -_FLOAT_MAX + + class Error(Exception): """Top-level module error for json_format.""" @@ -308,7 +339,7 @@ def _FieldToJsonObject(self, field, value): if self.float_format: return float(format(value, self.float_format)) else: - return type_checkers.ToShortestFloat(value) + return ToShortestFloat(value) return value @@ -791,10 +822,10 @@ def _ConvertFloat(value, field): 'use quoted "-Infinity" instead.') if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: # pylint: disable=protected-access - if value > type_checkers._FLOAT_MAX: + if value > _FLOAT_MAX: raise ParseError('Float value too large') # pylint: disable=protected-access - if value < type_checkers._FLOAT_MIN: + if value < _FLOAT_MIN: raise ParseError('Float value too small') if value == 'nan': raise ParseError('Couldn\'t parse float "nan", use "NaN" instead.') From 3878da28f18a950c90b55b72d480308452fe16d9 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 19 Apr 2022 14:49:41 +0200 Subject: [PATCH 04/13] fix code spell --- python/google/protobuf/json_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index a391f204473b6..9f569f50b7964 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -79,7 +79,7 @@ # protobuf # library. # Depending on an internal protobuf interface can hurt us since we are forking this file in noom-contracts, and the -# version of the protobuf libary used on the client is under client's control. +# version of the protobuf library used on the client is under client's control. # The other two imports in this file (descriptor and symbol_database) are part of the protobuf public interface, and # there is less risk that this will change under us. def TruncateToFourByteFloat(original): From cc973ed4f30b97b7019e1937ac1ad8b1c0cd81e3 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Fri, 22 Apr 2022 16:53:55 +0200 Subject: [PATCH 05/13] implement ignoring unknown enum values --- .../protobuf/internal/json_format_test.py | 17 ++++++ python/google/protobuf/json_format.py | 57 +++++++++++++++---- 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/python/google/protobuf/internal/json_format_test.py b/python/google/protobuf/internal/json_format_test.py index 6a44d4c97db0a..4125a5530a046 100755 --- a/python/google/protobuf/internal/json_format_test.py +++ b/python/google/protobuf/internal/json_format_test.py @@ -909,6 +909,23 @@ def testParseEnumValue(self): 'for enum type protobuf_unittest.TestAllTypes.NestedEnum.', json_format.Parse, '{"optionalNestedEnum": 12345}', message) + def testParseUnknownEnumStringValueProto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"enumValue": "UNKNOWN_STRING_VALUE"}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + def testParseUnknownEnumStringValueRepeatedProto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}' + json_format.Parse(text, message, ignore_unknown_fields=True) + self.assertEquals(len(message.repeated_enum_value), 2) + + def testParseUnknownEnumStringValueProto2(self): + message = json_format_pb2.TestNumbers() + text = '{"a": "UNKNOWN_STRING_VALUE"}' + json_format.Parse(text, message, ignore_unknown_fields=True) + self.assertFalse(message.HasField("a")) + def testBytes(self): message = json_format_proto3_pb2.TestMessage() # Test url base64 diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index b3e85933e9609..2533b6800fcca 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -87,6 +87,33 @@ class ParseError(Error): """Thrown in case of parsing error.""" +class _UnknownEnumStringValueParseError(ParseError): + """Thrown if an unknown enum string value is encountered. This exception never leaks outside of the module.""" + + +class _MaybeSuppressUnknownEnumStringValueParseError(): + """ + Example usage: + + with _MaybeSuppressUnknownEnumStringValueParseError(True): + ... + + If should_suppress is True, the _UnknownEnumStringValueParseError will be ignored in the context body. + """ + def __init__(self, should_suppress): + self.should_suppress = should_suppress + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + # The return value from __exit__ indicates if any exception that occurred in the context body should be suppressed. + # We suppress _UnknownEnumStringValueParseError if should_suppress is set. + # See context manager docs: + # https://docs.python.org/3/reference/datamodel.html#with-statement-context-managers + return self.should_suppress and exc_type == _UnknownEnumStringValueParseError + + def MessageToJson( message, including_default_value_fields=False, @@ -569,8 +596,9 @@ def _ConvertFieldValuePair(self, js, message): if item is None: raise ParseError('null is not allowed to be used as an element' ' in a repeated field.') - getattr(message, field.name).append( - _ConvertScalarFieldValue(item, field)) + with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields): + getattr(message, field.name).append( + _ConvertScalarFieldValue(item, field)) elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: if field.is_extension: sub_message = message.Extensions[field] @@ -579,10 +607,11 @@ def _ConvertFieldValuePair(self, js, message): sub_message.SetInParent() self.ConvertMessage(value, sub_message) else: - if field.is_extension: - message.Extensions[field] = _ConvertScalarFieldValue(value, field) - else: - setattr(message, field.name, _ConvertScalarFieldValue(value, field)) + with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields): + if field.is_extension: + message.Extensions[field] = _ConvertScalarFieldValue(value, field) + else: + setattr(message, field.name, _ConvertScalarFieldValue(value, field)) except ParseError as e: if field and field.containing_oneof is None: raise ParseError('Failed to parse {0} field: {1}.'.format(name, e)) @@ -669,7 +698,8 @@ def _ConvertStructMessage(self, value, message): def _ConvertWrapperMessage(self, value, message): """Convert a JSON representation into Wrapper message.""" field = message.DESCRIPTOR.fields_by_name['value'] - setattr(message, 'value', _ConvertScalarFieldValue(value, field)) + with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields): + setattr(message, 'value', _ConvertScalarFieldValue(value, field)) def _ConvertMapFieldValue(self, value, message, field): """Convert map field value for a message map field. @@ -689,13 +719,15 @@ def _ConvertMapFieldValue(self, value, message, field): key_field = field.message_type.fields_by_name['key'] value_field = field.message_type.fields_by_name['value'] for key in value: - key_value = _ConvertScalarFieldValue(key, key_field, True) + with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields): + key_value = _ConvertScalarFieldValue(key, key_field, True) if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: self.ConvertMessage(value[key], getattr( message, field.name)[key_value]) else: - getattr(message, field.name)[key_value] = _ConvertScalarFieldValue( - value[key], value_field) + with _MaybeSuppressUnknownEnumStringValueParseError(self.ignore_unknown_fields): + getattr(message, field.name)[key_value] = _ConvertScalarFieldValue( + value[key], value_field) def _ConvertScalarFieldValue(value, field, require_str=False): @@ -711,6 +743,7 @@ def _ConvertScalarFieldValue(value, field, require_str=False): Raises: ParseError: In case of convert problems. + _UnknownEnumStringValueParseError: If unknown enum string value is encountered during parsing. """ if field.cpp_type in _INT_TYPES: return _ConvertInteger(value) @@ -741,7 +774,9 @@ def _ConvertScalarFieldValue(value, field, require_str=False): number = int(value) enum_value = field.enum_type.values_by_number.get(number, None) except ValueError: - raise ParseError('Invalid enum value {0} for enum type {1}.'.format( + # The ValueError will be raised by the conversion to int. + # That means that here we know that we have an unknown enum string value. + raise _UnknownEnumStringValueParseError('Invalid enum value {0} for enum type {1}.'.format( value, field.enum_type.full_name)) if enum_value is None: if field.file.syntax == 'proto3': From e72f8a99c1aa1221921cdff99636ad304f1de583 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Fri, 22 Apr 2022 16:58:21 +0200 Subject: [PATCH 06/13] self review --- .../google/protobuf/internal/json_format_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/google/protobuf/internal/json_format_test.py b/python/google/protobuf/internal/json_format_test.py index 4125a5530a046..aa56692e8e165 100755 --- a/python/google/protobuf/internal/json_format_test.py +++ b/python/google/protobuf/internal/json_format_test.py @@ -913,19 +913,22 @@ def testParseUnknownEnumStringValueProto3(self): message = json_format_proto3_pb2.TestMessage() text = '{"enumValue": "UNKNOWN_STRING_VALUE"}' json_format.Parse(text, message, ignore_unknown_fields=True) - - def testParseUnknownEnumStringValueRepeatedProto3(self): - message = json_format_proto3_pb2.TestMessage() - text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}' - json_format.Parse(text, message, ignore_unknown_fields=True) - self.assertEquals(len(message.repeated_enum_value), 2) + # In proto3, there is no difference between the default value and 0. + self.assertEqual(message.enum_value, 0) def testParseUnknownEnumStringValueProto2(self): message = json_format_pb2.TestNumbers() text = '{"a": "UNKNOWN_STRING_VALUE"}' json_format.Parse(text, message, ignore_unknown_fields=True) + # In proto2 we can see that the field was not set at all. self.assertFalse(message.HasField("a")) + def testParseUnknownEnumStringValueRepeatedProto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}' + json_format.Parse(text, message, ignore_unknown_fields=True) + self.assertEquals(len(message.repeated_enum_value), 2) + def testBytes(self): message = json_format_proto3_pb2.TestMessage() # Test url base64 From b0f3ef2fa5a463d03f9592963cb2b0f8753d54b4 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Fri, 22 Apr 2022 17:13:33 +0200 Subject: [PATCH 07/13] self review --- python/google/protobuf/json_format.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 35ea63e6de6c6..6781e0b88f202 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -130,6 +130,12 @@ class _MaybeSuppressUnknownEnumStringValueParseError(): ... If should_suppress is True, the _UnknownEnumStringValueParseError will be ignored in the context body. + + The motivation for the context manager is to avoid a bigger refactor that would enable _ConvertScalarFieldValue to + signal to the caller that the field should be ignored. + + We want to avoid a bigger refactor because we are maintaining a fork and we want changes to be minimal to simplify + merging with upstream. """ def __init__(self, should_suppress): self.should_suppress = should_suppress From 66eb2d983c268e2dd152a7c48557ca5115baeea7 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Mon, 20 Jun 2022 17:17:32 +0200 Subject: [PATCH 08/13] address comments --- python/google/protobuf/json_format.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 9f569f50b7964..573cc0d3dec52 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -76,8 +76,7 @@ # NOTE(anton): copied from internal/type_checkers.py to avoid having json_format.py depend on internal interface of the -# protobuf -# library. +# protobuf library. # Depending on an internal protobuf interface can hurt us since we are forking this file in noom-contracts, and the # version of the protobuf library used on the client is under client's control. # The other two imports in this file (descriptor and symbol_database) are part of the protobuf public interface, and @@ -105,7 +104,6 @@ def ToShortestFloat(original): _FLOAT_MAX = float.fromhex('0x1.fffffep+127') _FLOAT_MIN = -_FLOAT_MAX - class Error(Exception): """Top-level module error for json_format.""" From 0d8dabb911cac153f15082c50841037b4889ec17 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Mon, 20 Jun 2022 17:53:22 +0200 Subject: [PATCH 09/13] fix comment --- python/google/protobuf/json_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index fd78185c32567..3b164485bac0d 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -117,7 +117,7 @@ class ParseError(Error): class _UnknownEnumStringValueParseError(ParseError): - """Thrown if an unknown enum string value is encountered. This exception never leaks outside of the module.""" + """Thrown if an unknown enum string value is encountered.""" class _MaybeSuppressUnknownEnumStringValueParseError(): From f54c8b4d2055481f45e58c536baa8d7e60b4ce17 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Mon, 20 Jun 2022 18:07:27 +0200 Subject: [PATCH 10/13] apply advice from Clements --- python/google/protobuf/json_format.py | 38 ++++++++------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 3b164485bac0d..62068fa72ca46 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -51,6 +51,7 @@ from operator import methodcaller import re import sys +from contextlib import contextmanager from google.protobuf import descriptor from google.protobuf import symbol_database @@ -120,33 +121,16 @@ class _UnknownEnumStringValueParseError(ParseError): """Thrown if an unknown enum string value is encountered.""" -class _MaybeSuppressUnknownEnumStringValueParseError(): - """ - Example usage: - - with _MaybeSuppressUnknownEnumStringValueParseError(True): - ... - - If should_suppress is True, the _UnknownEnumStringValueParseError will be ignored in the context body. - - The motivation for the context manager is to avoid a bigger refactor that would enable _ConvertScalarFieldValue to - signal to the caller that the field should be ignored. - - We want to avoid a bigger refactor because we are maintaining a fork and we want changes to be minimal to simplify - merging with upstream. - """ - def __init__(self, should_suppress): - self.should_suppress = should_suppress - - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_value, traceback): - # The return value from __exit__ indicates if any exception that occurred in the context body should be suppressed. - # We suppress _UnknownEnumStringValueParseError if should_suppress is set. - # See context manager docs: - # https://docs.python.org/3/reference/datamodel.html#with-statement-context-managers - return self.should_suppress and exc_type == _UnknownEnumStringValueParseError +@contextmanager +def _MaybeSuppressUnknownEnumStringValueParseError(should_suppress: bool): + try: + yield + except _UnknownEnumStringValueParseError as ex: + if should_suppress: + # Here we suppress _UnknownEnumStringValueParseError. + pass + else: + raise def MessageToJson( From b34fb41dd1f44fba509597e45670b052a0260356 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Mon, 20 Jun 2022 18:08:22 +0200 Subject: [PATCH 11/13] just noting the typo above, should be Clemens From 793ad1285790cd4e8211c60d8e69c03e943b7191 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Mon, 20 Jun 2022 18:11:19 +0200 Subject: [PATCH 12/13] add comment --- python/google/protobuf/json_format.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 62068fa72ca46..c1f622ae3b90a 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -123,6 +123,12 @@ class _UnknownEnumStringValueParseError(ParseError): @contextmanager def _MaybeSuppressUnknownEnumStringValueParseError(should_suppress: bool): + """ + This context manager will suppress _UnknownEnumStringValueParseError if the should_suppress is set to true. Usage: + + with _MaybeSuppressUnknownEnumStringValueParseError(True): + run_some_code_that_might_throw() + """ try: yield except _UnknownEnumStringValueParseError as ex: From e2bfadb685a32a24f19e9aca52f62851310fae66 Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 21 Jun 2022 15:57:58 +0200 Subject: [PATCH 13/13] address review comments --- .../protobuf/internal/json_format_test.py | 29 ++++++++++++++----- python/google/protobuf/json_format.py | 5 +++- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/python/google/protobuf/internal/json_format_test.py b/python/google/protobuf/internal/json_format_test.py index aa56692e8e165..c8275e6457a8b 100755 --- a/python/google/protobuf/internal/json_format_test.py +++ b/python/google/protobuf/internal/json_format_test.py @@ -909,20 +909,35 @@ def testParseEnumValue(self): 'for enum type protobuf_unittest.TestAllTypes.NestedEnum.', json_format.Parse, '{"optionalNestedEnum": 12345}', message) - def testParseUnknownEnumStringValueProto3(self): - message = json_format_proto3_pb2.TestMessage() - text = '{"enumValue": "UNKNOWN_STRING_VALUE"}' - json_format.Parse(text, message, ignore_unknown_fields=True) - # In proto3, there is no difference between the default value and 0. - self.assertEqual(message.enum_value, 0) - def testParseUnknownEnumStringValueProto2(self): message = json_format_pb2.TestNumbers() + # Note that in src/google/protobuf/util/json_format.proto::TestNumbers + # has a field 'optional MyType a = 1', which is different + # from the test below that are using + # src/google/protobuf/util/json_format_proto3.proto::TestMessage. text = '{"a": "UNKNOWN_STRING_VALUE"}' json_format.Parse(text, message, ignore_unknown_fields=True) # In proto2 we can see that the field was not set at all. self.assertFalse(message.HasField("a")) + def testParseErrorCorrectCatchForUnknownEnumValue(self): + message = json_format_pb2.TestNumbers() + self.assertRaisesRegexp( + json_format.ParseError, + 'Invalid enum value', + json_format.Parse, '{"a": "UNKNOWN_STRING_VALUE"}', message) + + def testParseUnknownEnumStringValueProto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"enumValue": "UNKNOWN_STRING_VALUE"}' + json_format.Parse(text, message, ignore_unknown_fields=True) + # In proto3, there is no difference between the default value and 0. + self.assertEqual(message.enum_value, 0) + + # Note that in noom-contracts we banned the repeated enum fields, so this test + # is not adding value for Noom's use of this library. + # Still, we are testing that the behavior here is consistent with what + # binary serialization would do. def testParseUnknownEnumStringValueRepeatedProto3(self): message = json_format_proto3_pb2.TestMessage() text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO", "BAR"]}' diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index c1f622ae3b90a..9c55d4e5488c7 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -118,7 +118,10 @@ class ParseError(Error): class _UnknownEnumStringValueParseError(ParseError): - """Thrown if an unknown enum string value is encountered.""" + """ + Thrown if an unknown enum string value is encountered. + We inherit ParseError to avoid breaking the code that might expect ParseError. + """ @contextmanager