diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 87a63d40..156ca5d4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] architecture: ["x64"] include: - python-version: "3.10" @@ -34,11 +34,12 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies py3.7 - if: matrix.python-version == 3.7 - run: pip install -r requirements-dev-3.7.txt + - name: Upgrade setuptools + if: matrix.python-version == 3.12 + run: | + # workaround for 3.12, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 + pip install --upgrade setuptools - name: Install dependencies - if: matrix.python-version != 3.7 run: pip install -r requirements-dev.txt - name: Install Numpy Dev if: ${{ matrix.numpy-version }} @@ -63,5 +64,6 @@ jobs: if: matrix.python-version == 3.11 with: file: ./coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} env_vars: OS,PYTHON fail_ci_if_error: true diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..20de7532 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,10 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Dehpour" + given-names: "Sep" + orcid: "https://orcid.org/0009-0009-5828-4345" +title: "DeepDiff" +version: 7.0.0 +date-released: 2024 +url: "https://github.com/seperman/deepdiff" diff --git a/README.md b/README.md index 23f43845..e4a1f7c4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 6.7.1 +# DeepDiff v 7.0.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -15,9 +15,9 @@ - [Extract](https://zepworks.com/deepdiff/current/extract.html): Extract an item from a nested Python object using its path. - [commandline](https://zepworks.com/deepdiff/current/commandline.html): Use DeepDiff from commandline. -Tested on Python 3.7+ and PyPy3. +Tested on Python 3.8+ and PyPy3. -- **[Documentation](https://zepworks.com/deepdiff/6.7.1/)** +- **[Documentation](https://zepworks.com/deepdiff/7.0.0/)** ## What is new? @@ -101,16 +101,6 @@ Or to see a more user friendly version, please run: `pytest --cov=deepdiff --cov Thank you! -# Citing - -How to cite this library (APA style): - - Dehpour, S. (2023). DeepDiff (Version 6.7.1) [Software]. Available from https://github.com/seperman/deepdiff. - -How to cite this library (Chicago style): - - Dehpour, Sep. 2023. DeepDiff (version 6.7.1). - # Authors Please take a look at the [AUTHORS](AUTHORS.md) file. diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index e15f3476..2f321a7f 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '6.7.1' +__version__ = '7.0.0' import logging if __name__ == '__main__': diff --git a/deepdiff/base.py b/deepdiff/base.py index 3c812e5c..3de7e9f3 100644 --- a/deepdiff/base.py +++ b/deepdiff/base.py @@ -44,7 +44,8 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups, if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: ignore_type_in_groups.append(OrderedSet(self.numbers)) - if ignore_type_subclasses: + if not ignore_type_subclasses: + # is_instance method needs tuples. When we look for subclasses, we need them to be tuples ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) return ignore_type_in_groups diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index eb9b9f11..8665b6a4 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import inspect import logging +import datetime from collections.abc import Iterable, MutableMapping from collections import defaultdict from hashlib import sha1, sha256 @@ -186,7 +187,8 @@ def __init__(self, # the only time it should be set to False is when # testing the individual hash functions for different types of objects. self.apply_hash = apply_hash - self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group + self.type_check_func = type_in_type_group if ignore_type_subclasses else type_is_subclass_of_type_group + # self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group self.number_to_string = number_to_string_func or number_to_string self.ignore_private_variables = ignore_private_variables self.encodings = encodings @@ -455,6 +457,10 @@ def _prep_datetime(self, obj): obj = datetime_normalize(self.truncate_datetime, obj) return KEY_TO_VAL_STR.format(type_, obj) + def _prep_date(self, obj): + type_ = 'datetime' # yes still datetime but it doesn't need normalization + return KEY_TO_VAL_STR.format(type_, obj) + def _prep_tuple(self, obj, parent, parents_ids): # Checking to see if it has _fields. Which probably means it is a named # tuple. @@ -505,6 +511,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): elif isinstance(obj, times): result = self._prep_datetime(obj) + elif isinstance(obj, datetime.date): + result = self._prep_date(obj) + elif isinstance(obj, numbers): result = self._prep_number(obj) diff --git a/deepdiff/delta.py b/deepdiff/delta.py index d167bb5c..62068dd6 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -1,5 +1,7 @@ +import copy import logging -from functools import partial +from typing import List, Dict, IO, Callable, Set, Union, Optional +from functools import partial, cmp_to_key from collections.abc import Mapping from copy import deepcopy from ordered_set import OrderedSet @@ -9,10 +11,11 @@ strings, short_repr, numbers, np_ndarray, np_array_factory, numpy_dtypes, get_doc, not_found, numpy_dtype_string_to_type, dict_, + Opcode, FlatDeltaRow, UnkownValueCode, ) from deepdiff.path import ( _path_to_elements, _get_nested_obj, _get_nested_obj_and_force, - GET, GETATTR, parse_path, stringify_path, DEFAULT_FIRST_ELEMENT + GET, GETATTR, parse_path, stringify_path, ) from deepdiff.anyset import AnySet @@ -20,7 +23,7 @@ logger = logging.getLogger(__name__) -VERIFICATION_MSG = 'Expected the old value for {} to be {} but it is {}. Error found on: {}' +VERIFICATION_MSG = 'Expected the old value for {} to be {} but it is {}. Error found on: {}. You may want to set force=True, especially if this delta is created by passing flat_rows_list or flat_dict_list' ELEM_NOT_FOUND_TO_ADD_MSG = 'Key or index of {} is not found for {} for setting operation.' TYPE_CHANGE_FAIL_MSG = 'Unable to do the type change for {} from to type {} due to {}' VERIFY_BIDIRECTIONAL_MSG = ('You have applied the delta to an object that has ' @@ -58,38 +61,49 @@ class DeltaNumpyOperatorOverrideError(ValueError): pass -class _ObjDoesNotExist: - pass - - class Delta: __doc__ = doc def __init__( self, - diff=None, - delta_path=None, - delta_file=None, - delta_diff=None, - flat_dict_list=None, - deserializer=pickle_load, - log_errors=True, - mutate=False, - raise_errors=False, - safe_to_import=None, - serializer=pickle_dump, - verify_symmetry=None, - bidirectional=False, - always_include_values=False, - iterable_compare_func_was_used=None, - force=False, + diff: Union[DeepDiff, Mapping, str, bytes, None]=None, + delta_path: Optional[str]=None, + delta_file: Optional[IO]=None, + delta_diff: Optional[dict]=None, + flat_dict_list: Optional[List[Dict]]=None, + flat_rows_list: Optional[List[FlatDeltaRow]]=None, + deserializer: Callable=pickle_load, + log_errors: bool=True, + mutate: bool=False, + raise_errors: bool=False, + safe_to_import: Optional[Set[str]]=None, + serializer: Callable=pickle_dump, + verify_symmetry: Optional[bool]=None, + bidirectional: bool=False, + always_include_values: bool=False, + iterable_compare_func_was_used: Optional[bool]=None, + force: bool=False, ): + # for pickle deserializer: if hasattr(deserializer, '__code__') and 'safe_to_import' in set(deserializer.__code__.co_varnames): _deserializer = deserializer else: def _deserializer(obj, safe_to_import=None): - return deserializer(obj) + result = deserializer(obj) + if result.get('_iterable_opcodes'): + _iterable_opcodes = {} + for path, op_codes in result['_iterable_opcodes'].items(): + _iterable_opcodes[path] = [] + for op_code in op_codes: + _iterable_opcodes[path].append( + Opcode( + **op_code + ) + ) + result['_iterable_opcodes'] = _iterable_opcodes + return result + self._reversed_diff = None @@ -125,7 +139,10 @@ def _deserializer(obj, safe_to_import=None): raise ValueError(BINIARY_MODE_NEEDED_MSG.format(e)) from None self.diff = _deserializer(content, safe_to_import=safe_to_import) elif flat_dict_list: - self.diff = self._from_flat_dicts(flat_dict_list) + # Use copy to preserve original value of flat_dict_list in calling module + self.diff = self._from_flat_dicts(copy.deepcopy(flat_dict_list)) + elif flat_rows_list: + self.diff = self._from_flat_rows(copy.deepcopy(flat_rows_list)) else: raise ValueError(DELTA_AT_LEAST_ONE_ARG_NEEDED) @@ -165,6 +182,7 @@ def __add__(self, other): self._do_type_changes() # NOTE: the remove iterable action needs to happen BEFORE # all the other iterables to match the reverse of order of operations in DeepDiff + self._do_iterable_opcodes() self._do_iterable_item_removed() self._do_iterable_item_added() self._do_ignore_order() @@ -381,12 +399,51 @@ def _sort_key_for_item_added(path_and_value): # We only care about the values in the elements not how to get the values. return [i[0] for i in elements] + @staticmethod + def _sort_comparison(left, right): + """ + We use sort comparison instead of _sort_key_for_item_added when we run into comparing element types that can not + be compared with each other, such as None to None. Or integer to string. + """ + # Example elements: [(4.3, 'GET'), ('b', 'GETATTR'), ('a3', 'GET')] + # We only care about the values in the elements not how to get the values. + left_path = [i[0] for i in _path_to_elements(left[0], root_element=None)] + right_path = [i[0] for i in _path_to_elements(right[0], root_element=None)] + try: + if left_path < right_path: + return -1 + elif left_path > right_path: + return 1 + else: + return 0 + except TypeError: + if len(left_path) > len(right_path): + left_path = left_path[:len(right_path)] + elif len(right_path) > len(left_path): + right_path = right_path[:len(left_path)] + for l_elem, r_elem in zip(left_path, right_path): + if type(l_elem) != type(r_elem) or type(l_elem) in None: + l_elem = str(l_elem) + r_elem = str(r_elem) + try: + if l_elem < r_elem: + return -1 + elif l_elem > r_elem: + return 1 + except TypeError: + continue + return 0 + + def _do_item_added(self, items, sort=True, insert=False): if sort: # sorting items by their path so that the items with smaller index # are applied first (unless `sort` is `False` so that order of # added items is retained, e.g. for dicts). - items = sorted(items.items(), key=self._sort_key_for_item_added) + try: + items = sorted(items.items(), key=self._sort_key_for_item_added) + except TypeError: + items = sorted(items.items(), key=cmp_to_key(self._sort_comparison)) else: items = items.items() @@ -450,6 +507,10 @@ def _get_elements_and_details(self, path): obj=parent, path_for_err_reporting=path, expected_old_value=None, elem=parent_to_obj_elem, action=parent_to_obj_action, next_element=next2_element) else: + # parent = self + # obj = self.root + # parent_to_obj_elem = 'root' + # parent_to_obj_action = GETATTR parent = parent_to_obj_elem = parent_to_obj_action = None obj = self # obj = self.get_nested_obj(obj=self, elements=elements[:-1]) @@ -504,7 +565,11 @@ def _do_item_removed(self, items): """ # Sorting the iterable_item_removed in reverse order based on the paths. # So that we delete a bigger index before a smaller index - for path, expected_old_value in sorted(items.items(), key=self._sort_key_for_item_added, reverse=True): + try: + sorted_item = sorted(items.items(), key=self._sort_key_for_item_added, reverse=True) + except TypeError: + sorted_item = sorted(items.items(), key=cmp_to_key(self._sort_comparison), reverse=True) + for path, expected_old_value in sorted_item: elem_and_details = self._get_elements_and_details(path) if elem_and_details: elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details @@ -516,10 +581,9 @@ def _do_item_removed(self, items): try: if action == GET: current_old_value = obj[elem] - look_for_expected_old_value = current_old_value != expected_old_value elif action == GETATTR: current_old_value = getattr(obj, elem) - look_for_expected_old_value = current_old_value != expected_old_value + look_for_expected_old_value = current_old_value != expected_old_value except (KeyError, IndexError, AttributeError, TypeError): look_for_expected_old_value = True @@ -547,25 +611,52 @@ def _find_closest_iterable_element_for_index(self, obj, elem, expected_old_value closest_distance = dist return closest_elem - def _do_item_removedOLD(self, items): - """ - Handle removing items. - """ - # Sorting the iterable_item_removed in reverse order based on the paths. - # So that we delete a bigger index before a smaller index - for path, expected_old_value in sorted(items.items(), key=self._sort_key_for_item_added, reverse=True): - elem_and_details = self._get_elements_and_details(path) - if elem_and_details: - elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details - else: - continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198 - current_old_value = self._get_elem_and_compare_to_old_value( - obj=obj, elem=elem, path_for_err_reporting=path, expected_old_value=expected_old_value, action=action) - if current_old_value is not_found: - continue - self._del_elem(parent, parent_to_obj_elem, parent_to_obj_action, - obj, elements, path, elem, action) - self._do_verify_changes(path, expected_old_value, current_old_value) + def _do_iterable_opcodes(self): + _iterable_opcodes = self.diff.get('_iterable_opcodes', {}) + if _iterable_opcodes: + for path, opcodes in _iterable_opcodes.items(): + transformed = [] + # elements = _path_to_elements(path) + elem_and_details = self._get_elements_and_details(path) + if elem_and_details: + elements, parent, parent_to_obj_elem, parent_to_obj_action, obj, elem, action = elem_and_details + if parent is None: + parent = self + obj = self.root + parent_to_obj_elem = 'root' + parent_to_obj_action = GETATTR + else: + continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198 + # import pytest; pytest.set_trace() + obj = self.get_nested_obj(obj=self, elements=elements) + is_obj_tuple = isinstance(obj, tuple) + for opcode in opcodes: + if opcode.tag == 'replace': + # Replace items in list a[i1:i2] with b[j1:j2] + transformed.extend(opcode.new_values) + elif opcode.tag == 'delete': + # Delete items from list a[i1:i2], so we do nothing here + continue + elif opcode.tag == 'insert': + # Insert items from list b[j1:j2] into the new list + transformed.extend(opcode.new_values) + elif opcode.tag == 'equal': + # Items are the same in both lists, so we add them to the result + transformed.extend(obj[opcode.t1_from_index:opcode.t1_to_index]) + if is_obj_tuple: + obj = tuple(obj) + # Making sure that the object is re-instated inside the parent especially if it was immutable + # and we had to turn it into a mutable one. In such cases the object has a new id. + self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem, + value=obj, action=parent_to_obj_action) + else: + obj[:] = transformed + + + + # obj = self.get_nested_obj(obj=self, elements=elements) + # for + def _do_iterable_item_removed(self): iterable_item_removed = self.diff.get('iterable_item_removed', {}) @@ -721,19 +812,21 @@ def _get_reverse_diff(self): elif action == 'values_changed': r_diff[action] = {} for path, path_info in info.items(): - r_diff[action][path] = { + reverse_path = path_info['new_path'] if path_info.get('new_path') else path + r_diff[action][reverse_path] = { 'new_value': path_info['old_value'], 'old_value': path_info['new_value'] } elif action == 'type_changes': r_diff[action] = {} for path, path_info in info.items(): - r_diff[action][path] = { + reverse_path = path_info['new_path'] if path_info.get('new_path') else path + r_diff[action][reverse_path] = { 'old_type': path_info['new_type'], 'new_type': path_info['old_type'], } if 'new_value' in path_info: - r_diff[action][path]['old_value'] = path_info['new_value'] + r_diff[action][reverse_path]['old_value'] = path_info['new_value'] if 'old_value' in path_info: - r_diff[action][path]['new_value'] = path_info['old_value'] + r_diff[action][reverse_path]['new_value'] = path_info['old_value'] elif action == 'iterable_item_moved': r_diff[action] = {} for path, path_info in info.items(): @@ -741,6 +834,23 @@ def _get_reverse_diff(self): r_diff[action][old_path] = { 'new_path': path, 'value': path_info['value'], } + elif action == '_iterable_opcodes': + r_diff[action] = {} + for path, op_codes in info.items(): + r_diff[action][path] = [] + for op_code in op_codes: + tag = op_code.tag + tag = {'delete': 'insert', 'insert': 'delete'}.get(tag, tag) + new_op_code = Opcode( + tag=tag, + t1_from_index=op_code.t2_from_index, + t1_to_index=op_code.t2_to_index, + t2_from_index=op_code.t1_from_index, + t2_to_index=op_code.t1_to_index, + new_values=op_code.old_values, + old_values=op_code.new_values, + ) + r_diff[action][path].append(new_op_code) return r_diff def dump(self, file): @@ -777,7 +887,12 @@ def _get_flat_row(action, info, _parse_path, keys_and_funcs): row[new_key] = func(details[key]) else: row[new_key] = details[key] - yield row + yield FlatDeltaRow(**row) + + @staticmethod + def _from_flat_rows(flat_rows_list: List[FlatDeltaRow]): + flat_dict_list = (i._asdict() for i in flat_rows_list) + return Delta._from_flat_dicts(flat_dict_list) @staticmethod def _from_flat_dicts(flat_dict_list): @@ -794,7 +909,8 @@ def _from_flat_dicts(flat_dict_list): action = flat_dict.get("action") path = flat_dict.get("path") value = flat_dict.get('value') - old_value = flat_dict.get('old_value', _ObjDoesNotExist) + new_path = flat_dict.get('new_path') + old_value = flat_dict.get('old_value', UnkownValueCode) if not action: raise ValueError("Flat dict need to include the 'action'.") if path is None: @@ -807,6 +923,10 @@ def _from_flat_dicts(flat_dict_list): else: root_element = ('root', GET) path_str = stringify_path(path, root_element=root_element) # We need the string path + if new_path and new_path != path: + new_path = stringify_path(new_path, root_element=root_element) + else: + new_path = None if action not in result: result[action] = {} if action in {'iterable_items_added_at_indexes', 'iterable_items_removed_at_indexes'}: @@ -823,13 +943,13 @@ def _from_flat_dicts(flat_dict_list): }: result[action][path_str] = value elif action == 'values_changed': - if old_value is _ObjDoesNotExist: + if old_value == UnkownValueCode: result[action][path_str] = {'new_value': value} else: result[action][path_str] = {'new_value': value, 'old_value': old_value} elif action == 'type_changes': - type_ = flat_dict.get('type', _ObjDoesNotExist) - old_type = flat_dict.get('old_type', _ObjDoesNotExist) + type_ = flat_dict.get('type', UnkownValueCode) + old_type = flat_dict.get('old_type', UnkownValueCode) result[action][path_str] = {'new_value': value} for elem, elem_value in [ @@ -837,20 +957,16 @@ def _from_flat_dicts(flat_dict_list): ('old_type', old_type), ('old_value', old_value), ]: - if elem_value is not _ObjDoesNotExist: + if elem_value != UnkownValueCode: result[action][path_str][elem] = elem_value elif action == 'iterable_item_moved': - result[action][path_str] = { - 'new_path': stringify_path( - flat_dict.get('new_path', ''), - root_element=('root', GET) - ), - 'value': value, - } + result[action][path_str] = {'value': value} + if new_path: + result[action][path_str]['new_path'] = new_path return result - def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): + def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]: """ Returns a flat list of actions that is easily machine readable. @@ -904,6 +1020,14 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): attribute_added attribute_removed """ + return [ + i._asdict() for i in self.to_flat_rows(include_action_in_path=False, report_type_changes=True) + ] + + def to_flat_rows(self, include_action_in_path=False, report_type_changes=True) -> List[FlatDeltaRow]: + """ + Just like to_flat_dicts but returns FlatDeltaRow Named Tuples + """ result = [] if include_action_in_path: _parse_path = partial(parse_path, include_actions=True) @@ -948,16 +1072,12 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): path2.append((index, 'GET')) else: path2.append(index) - result.append( - {'path': path2, 'value': value, 'action': new_action} - ) + result.append(FlatDeltaRow(path=path2, value=value, action=new_action)) elif action in {'set_item_added', 'set_item_removed'}: for path, values in info.items(): path = _parse_path(path) for value in values: - result.append( - {'path': path, 'value': value, 'action': action} - ) + result.append(FlatDeltaRow(path=path, value=value, action=action)) elif action == 'dictionary_item_added': for path, value in info.items(): path = _parse_path(path) @@ -972,18 +1092,14 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): elif isinstance(value, set) and len(value) == 1: value = value.pop() action = 'set_item_added' - result.append( - {'path': path, 'value': value, 'action': action} - ) + result.append(FlatDeltaRow(path=path, value=value, action=action)) elif action in { 'dictionary_item_removed', 'iterable_item_added', 'iterable_item_removed', 'attribute_removed', 'attribute_added' }: for path, value in info.items(): path = _parse_path(path) - result.append( - {'path': path, 'value': value, 'action': action} - ) + result.append(FlatDeltaRow(path=path, value=value, action=action)) elif action == 'type_changes': if not report_type_changes: action = 'values_changed' @@ -995,6 +1111,8 @@ def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): keys_and_funcs=keys_and_funcs, ): result.append(row) + elif action == '_iterable_opcodes': + result.extend(self._flatten_iterable_opcodes()) else: for row in self._get_flat_row( action=action, diff --git a/deepdiff/diff.py b/deepdiff/diff.py index d95b747f..9b05e00f 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -11,6 +11,7 @@ from enum import Enum from copy import deepcopy from math import isclose as is_close +from typing import List, Dict, IO, Callable, Set, Union, Any, Pattern, Tuple, Optional from collections.abc import Mapping, Iterable, Sequence from collections import defaultdict from inspect import getmembers @@ -25,14 +26,16 @@ np_ndarray, np_floating, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer, TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths, np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS, - PydanticBaseModel, ) + PydanticBaseModel, Opcode,) from deepdiff.serialization import SerializationMixin from deepdiff.distance import DistanceMixin from deepdiff.model import ( RemapDict, ResultDict, TextResult, TreeResult, DiffLevel, DictRelationship, AttributeRelationship, REPORT_KEYS, SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, - SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, PrettyOrderedSet, ) + SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, PrettyOrderedSet, + FORCE_DEFAULT, +) from deepdiff.deephash import DeepHash, combine_hashes_lists from deepdiff.base import Base from deepdiff.lfucache import LFUCache, DummyLFU @@ -111,52 +114,52 @@ class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, Base): CACHE_AUTO_ADJUST_THRESHOLD = 0.25 def __init__(self, - t1, - t2, - cache_purge_level=1, - cache_size=0, - cache_tuning_sample_size=0, - custom_operators=None, - cutoff_distance_for_pairs=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT, - cutoff_intersection_for_pairs=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT, - encodings=None, - exclude_obj_callback=None, - exclude_obj_callback_strict=None, - exclude_paths=None, - include_obj_callback=None, - include_obj_callback_strict=None, - include_paths=None, - exclude_regex_paths=None, - exclude_types=None, - get_deep_distance=False, - group_by=None, - group_by_sort_key=None, - hasher=None, - hashes=None, - ignore_encoding_errors=False, - ignore_nan_inequality=False, - ignore_numeric_type_changes=False, - ignore_order=False, - ignore_order_func=None, - ignore_private_variables=True, - ignore_string_case=False, - ignore_string_type_changes=False, - ignore_type_in_groups=None, - ignore_type_subclasses=False, - iterable_compare_func=None, - zip_ordered_iterables=False, - log_frequency_in_sec=0, - math_epsilon=None, - max_diffs=None, - max_passes=10000000, - number_format_notation="f", - number_to_string_func=None, - progress_logger=logger.info, - report_repetition=False, - significant_digits=None, - truncate_datetime=None, - verbose_level=1, - view=TEXT_VIEW, + t1: Any, + t2: Any, + cache_purge_level: int=1, + cache_size: int=0, + cache_tuning_sample_size: int=0, + custom_operators: Optional[List[Any]] =None, + cutoff_distance_for_pairs: float=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT, + cutoff_intersection_for_pairs: float=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT, + encodings: Optional[List[str]]=None, + exclude_obj_callback: Optional[Callable]=None, + exclude_obj_callback_strict: Optional[Callable]=None, + exclude_paths: Union[str, List[str]]=None, + include_obj_callback: Optional[Callable]=None, + include_obj_callback_strict: Optional[Callable]=None, + include_paths: Union[str, List[str]]=None, + exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None, + exclude_types: Optional[List[Any]]=None, + get_deep_distance: bool=False, + group_by: Union[str, Tuple[str, str], None]=None, + group_by_sort_key: Union[str, Callable, None]=None, + hasher: Optional[Callable]=None, + hashes: Optional[Dict]=None, + ignore_encoding_errors: bool=False, + ignore_nan_inequality: bool=False, + ignore_numeric_type_changes: bool=False, + ignore_order: bool=False, + ignore_order_func: Optional[Callable]=None, + ignore_private_variables: bool=True, + ignore_string_case: bool=False, + ignore_string_type_changes: bool=False, + ignore_type_in_groups: Optional[List[Tuple]]=None, + ignore_type_subclasses: bool=False, + iterable_compare_func: Optional[Callable]=None, + zip_ordered_iterables: bool=False, + log_frequency_in_sec: int=0, + math_epsilon: Optional[float]=None, + max_diffs: Optional[int]=None, + max_passes: int=10000000, + number_format_notation: str="f", + number_to_string_func: Optional[Callable]=None, + progress_logger: Callable=logger.info, + report_repetition: bool=False, + significant_digits: Optional[int]=None, + truncate_datetime: Optional[str]=None, + verbose_level: int=1, + view: str=TEXT_VIEW, _original_type=None, _parameters=None, _shared_parameters=None, @@ -203,7 +206,7 @@ def __init__(self, self.exclude_types = set(exclude_types) if exclude_types else None self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance self.ignore_type_subclasses = ignore_type_subclasses - self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group + self.type_check_func = type_in_type_group if ignore_type_subclasses else type_is_subclass_of_type_group self.ignore_string_case = ignore_string_case self.exclude_obj_callback = exclude_obj_callback self.exclude_obj_callback_strict = exclude_obj_callback_strict @@ -297,6 +300,7 @@ def _group_by_sort_key(x): self._parameters = _parameters self.deephash_parameters = self._get_deephash_params() self.tree = TreeResult() + self._iterable_opcodes = {} if group_by and self.is_root: try: original_t1 = t1 @@ -348,23 +352,23 @@ def _get_deephash_params(self): result['number_to_string_func'] = self.number_to_string return result - def _report_result(self, report_type, level, local_tree=None): + def _report_result(self, report_type, change_level, local_tree=None): """ Add a detected change to the reference-style result dictionary. report_type will be added to level. (We'll create the text-style report from there later.) :param report_type: A well defined string key describing the type of change. Examples: "set_item_added", "values_changed" - :param parent: A DiffLevel object describing the objects in question in their + :param change_level: A DiffLevel object describing the objects in question in their before-change and after-change object structure. - :rtype: None + :local_tree: None """ - if not self._skip_this(level): - level.report_type = report_type + if not self._skip_this(change_level): + change_level.report_type = report_type tree = self.tree if local_tree is None else local_tree - tree[report_type].add(level) + tree[report_type].add(change_level) def custom_report_result(self, report_type, level, extra_info=None): """ @@ -516,6 +520,8 @@ def _get_clean_to_keys_mapping(self, keys, level): clean_key = KEY_TO_VAL_STR.format(type_, clean_key) else: clean_key = key + if self.ignore_string_case: + clean_key = clean_key.lower() if clean_key in result: logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes' 'or ignore_numeric_type_changes are set to be true.').format( @@ -559,7 +565,7 @@ def _diff_dict( else: t1_keys = OrderedSet(t1.keys()) t2_keys = OrderedSet(t2.keys()) - if self.ignore_string_type_changes or self.ignore_numeric_type_changes: + if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case: t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level) t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level) t1_keys = OrderedSet(t1_clean_to_keys.keys()) @@ -581,7 +587,9 @@ def _diff_dict( notpresent, t2[key], child_relationship_class=rel_class, - child_relationship_param=key) + child_relationship_param=key, + child_relationship_param2=key, + ) self._report_result(item_added_key, change_level, local_tree=local_tree) for key in t_keys_removed: @@ -593,7 +601,9 @@ def _diff_dict( t1[key], notpresent, child_relationship_class=rel_class, - child_relationship_param=key) + child_relationship_param=key, + child_relationship_param2=key, + ) self._report_result(item_removed_key, change_level, local_tree=local_tree) for key in t_keys_intersect: # key present in both dicts - need to compare values @@ -612,7 +622,9 @@ def _diff_dict( t1[key1], t2[key2], child_relationship_class=rel_class, - child_relationship_param=key) + child_relationship_param=key, + child_relationship_param2=key, + ) self._diff(next_level, parents_ids_added, local_tree=local_tree) def _diff_set(self, level, local_tree=None): @@ -766,7 +778,7 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type and self.iterable_compare_func is None ): local_tree_pass = TreeResult() - self._diff_ordered_iterable_by_difflib( + opcodes_with_values = self._diff_ordered_iterable_by_difflib( level, parents_ids=parents_ids, _original_type=_original_type, @@ -785,6 +797,8 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type ) if len(local_tree_pass) >= len(local_tree_pass2): local_tree_pass = local_tree_pass2 + else: + self._iterable_opcodes[level.path(force=FORCE_DEFAULT)] = opcodes_with_values for report_type, levels in local_tree_pass.items(): if levels: self.tree[report_type] |= levels @@ -831,7 +845,9 @@ def _diff_by_forming_pairs_and_comparing_one_by_one( x, notpresent, child_relationship_class=child_relationship_class, - child_relationship_param=i) + child_relationship_param=i, + child_relationship_param2=j, + ) self._report_result('iterable_item_removed', change_level, local_tree=local_tree) elif x is ListItemRemovedOrAdded: # new item added @@ -839,7 +855,9 @@ def _diff_by_forming_pairs_and_comparing_one_by_one( notpresent, y, child_relationship_class=child_relationship_class, - child_relationship_param=j) + child_relationship_param=i, + child_relationship_param2=j, + ) self._report_result('iterable_item_added', change_level, local_tree=local_tree) else: # check if item value has changed @@ -890,7 +908,8 @@ def _diff_by_forming_pairs_and_comparing_one_by_one( x, y, child_relationship_class=child_relationship_class, - child_relationship_param=j + child_relationship_param=i, + child_relationship_param2=j, ) self._diff(next_level, parents_ids_added, local_tree=local_tree) @@ -900,12 +919,23 @@ def _diff_ordered_iterable_by_difflib( seq = difflib.SequenceMatcher(isjunk=None, a=level.t1, b=level.t2, autojunk=False) - opcode = seq.get_opcodes() - for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcode: + opcodes = seq.get_opcodes() + opcodes_with_values = [] + for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: if tag == 'equal': + opcodes_with_values.append(Opcode( + tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, + )) continue # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) + + opcodes_with_values.append(Opcode( + tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, + old_values = level.t1[t1_from_index: t1_to_index], + new_values = level.t2[t2_from_index: t2_to_index], + )) + if tag == 'replace': self._diff_by_forming_pairs_and_comparing_one_by_one( level, local_tree=local_tree, parents_ids=parents_ids, @@ -919,7 +949,9 @@ def _diff_ordered_iterable_by_difflib( x, notpresent, child_relationship_class=child_relationship_class, - child_relationship_param=index + t1_from_index) + child_relationship_param=index + t1_from_index, + child_relationship_param2=index + t1_from_index, + ) self._report_result('iterable_item_removed', change_level, local_tree=local_tree) elif tag == 'insert': for index, y in enumerate(level.t2[t2_from_index:t2_to_index]): @@ -927,8 +959,12 @@ def _diff_ordered_iterable_by_difflib( notpresent, y, child_relationship_class=child_relationship_class, - child_relationship_param=index + t2_from_index) + child_relationship_param=index + t2_from_index, + child_relationship_param2=index + t2_from_index, + ) self._report_result('iterable_item_added', change_level, local_tree=local_tree) + return opcodes_with_values + def _diff_str(self, level, local_tree=None): """Compare strings""" @@ -956,6 +992,12 @@ def _diff_str(self, level, local_tree=None): except UnicodeDecodeError: do_diff = False + if isinstance(level.t1, Enum): + t1_str = level.t1.value + + if isinstance(level.t2, Enum): + t2_str = level.t2.value + if t1_str == t2_str: return @@ -1125,7 +1167,9 @@ def defaultdict_orderedset(): pre_calced_distances = self._precalculate_numpy_arrays_distance( hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) - if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1: + if hashes_added and hashes_removed \ + and self.iterable_compare_func \ + and len(hashes_added) > 0 and len(hashes_removed) > 0: pre_calced_distances = self._precalculate_distance_by_custom_compare_func( hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type) @@ -1246,12 +1290,20 @@ def get_other_pair(hash_value, in_t1=True): other = get_other_pair(hash_value) item_id = id(other.item) indexes = t2_hashtable[hash_value].indexes if other.item is notpresent else other.indexes + # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. + # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). + # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. + if len(t2_hashtable[hash_value].indexes) == 1: + index2 = t2_hashtable[hash_value].indexes[0] + else: + index2 = None for i in indexes: change_level = level.branch_deeper( other.item, t2_hashtable[hash_value].item, child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=i + child_relationship_param=i, + child_relationship_param2=index2, ) if other.item is notpresent: self._report_result('iterable_item_added', change_level, local_tree=local_tree) @@ -1263,12 +1315,21 @@ def get_other_pair(hash_value, in_t1=True): return # pragma: no cover. This is already covered for addition. other = get_other_pair(hash_value, in_t1=False) item_id = id(other.item) + # When we report repetitions, we want the child_relationship_param2 only if there is no repetition. + # Because when there is a repetition, we report it in a different way (iterable_items_added_at_indexes for example). + # When there is no repetition, we want child_relationship_param2 so that we report the "new_path" correctly. + if other.item is notpresent or len(other.indexes > 1): + index2 = None + else: + index2 = other.indexes[0] for i in t1_hashtable[hash_value].indexes: change_level = level.branch_deeper( t1_hashtable[hash_value].item, other.item, child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=i) + child_relationship_param=i, + child_relationship_param2=index2, + ) if other.item is notpresent: self._report_result('iterable_item_removed', change_level, local_tree=local_tree) else: @@ -1308,11 +1369,14 @@ def get_other_pair(hash_value, in_t1=True): other = get_other_pair(hash_value) item_id = id(other.item) index = t2_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] + index2 = t2_hashtable[hash_value].indexes[0] change_level = level.branch_deeper( other.item, t2_hashtable[hash_value].item, child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=index) + child_relationship_param=index, + child_relationship_param2=index2, + ) if other.item is notpresent: self._report_result('iterable_item_added', change_level, local_tree=local_tree) else: @@ -1324,12 +1388,15 @@ def get_other_pair(hash_value, in_t1=True): return # pragma: no cover. This is already covered for addition. other = get_other_pair(hash_value, in_t1=False) item_id = id(other.item) + index = t1_hashtable[hash_value].indexes[0] + index2 = t1_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0] change_level = level.branch_deeper( t1_hashtable[hash_value].item, other.item, child_relationship_class=SubscriptableIterableRelationship, - child_relationship_param=t1_hashtable[hash_value].indexes[ - 0]) + child_relationship_param=index, + child_relationship_param2=index2, + ) if other.item is notpresent: self._report_result('iterable_item_removed', change_level, local_tree=local_tree) else: @@ -1358,7 +1425,7 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True): self._report_result('values_changed', level, local_tree=local_tree) else: # Bernhard10: I use string formatting for comparison, to be consistent with usecases where - # data is read from files that were previousely written from python and + # data is read from files that were previously written from python and # to be consistent with on-screen representation of numbers. # Other options would be abs(t1-t2)<10**-self.significant_digits # or math.is_close (python3.5+) @@ -1408,9 +1475,12 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): else: try: np.testing.assert_almost_equal(level.t1, level.t2, decimal=self.significant_digits) - return # all good + except TypeError: + np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality) except AssertionError: pass # do detailed checking below + else: + return # all good # compare array meta-data _original_type = level.t1.dtype @@ -1441,7 +1511,9 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): t1_row, t2_row, child_relationship_class=NumpyArrayRelationship, - child_relationship_param=t1_path) + child_relationship_param=t1_path, + child_relationship_param2=t2_path, + ) self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree) diff --git a/deepdiff/distance.py b/deepdiff/distance.py index fb572d6b..731fa814 100644 --- a/deepdiff/distance.py +++ b/deepdiff/distance.py @@ -156,7 +156,7 @@ def _get_item_length(item, parents_ids=frozenset([])): subitem = new_subitem # internal keys such as _numpy_paths should not count towards the distance - if isinstance(key, strings) and (key.startswith('_') or key == 'deep_distance'): + if isinstance(key, strings) and (key.startswith('_') or key == 'deep_distance' or key == 'new_path'): continue item_id = id(subitem) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 3abcc1c9..cdf34cab 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -7,9 +7,10 @@ import warnings import string import time +import enum +from typing import NamedTuple, Any, List, Optional from ast import literal_eval from decimal import Decimal, localcontext, InvalidOperation as InvalidDecimalOperation -from collections import namedtuple from itertools import repeat from ordered_set import OrderedSet from threading import Timer @@ -171,7 +172,9 @@ def get_semvar_as_integer(version): basic_types = strings + numbers + uuids + booleans + (type(None), ) -IndexedHash = namedtuple('IndexedHash', 'indexes item') +class IndexedHash(NamedTuple): + indexes: List + item: Any current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -418,6 +421,7 @@ def number_to_string(number, significant_digits, number_format_notation="f"): ) ) else: + # import pytest; pytest.set_trace() number = round(number=number, ndigits=significant_digits) if significant_digits == 0: @@ -720,3 +724,67 @@ def detailed__dict__(obj, ignore_private_variables=True, ignore_keys=frozenset() if not callable(value): result[key] = value return result + + +def named_tuple_repr(self): + fields = [] + for field, value in self._asdict().items(): + # Only include fields that do not have their default value + if field in self._field_defaults: + if value != self._field_defaults[field]: + fields.append(f"{field}={value!r}") + else: + fields.append(f"{field}={value!r}") + + return f"{self.__class__.__name__}({', '.join(fields)})" + + +class Opcode(NamedTuple): + tag: str + t1_from_index: int + t1_to_index: int + t2_from_index: int + t2_to_index: int + old_values: Optional[List[Any]] = None + new_values: Optional[List[Any]] = None + + __repr__ = __str__ = named_tuple_repr + + +class FlatDataAction(str, enum.Enum): + values_changed = 'values_changed' + type_changes = 'type_changes' + set_item_added = 'set_item_added' + set_item_removed = 'set_item_removed' + dictionary_item_added = 'dictionary_item_added' + dictionary_item_removed = 'dictionary_item_removed' + iterable_item_added = 'iterable_item_added' + iterable_item_removed = 'iterable_item_removed' + iterable_item_moved = 'iterable_item_moved' + iterable_items_inserted = 'iterable_items_inserted' # opcode + iterable_items_deleted = 'iterable_items_deleted' # opcode + iterable_items_replaced = 'iterable_items_replaced' # opcode + iterable_items_equal = 'iterable_items_equal' # opcode + attribute_removed = 'attribute_removed' + attribute_added = 'attribute_added' + unordered_iterable_item_added = 'unordered_iterable_item_added' + unordered_iterable_item_removed = 'unordered_iterable_item_removed' + + +UnkownValueCode = '*-UNKNOWN-*' + + +class FlatDeltaRow(NamedTuple): + path: List + action: FlatDataAction + value: Optional[Any] = UnkownValueCode + old_value: Optional[Any] = UnkownValueCode + type: Optional[Any] = UnkownValueCode + old_type: Optional[Any] = UnkownValueCode + new_path: Optional[List] = None + t1_from_index: Optional[int] = None + t1_to_index: Optional[int] = None + t2_from_index: Optional[int] = None + t2_to_index: Optional[int] = None + + __repr__ = __str__ = named_tuple_repr diff --git a/deepdiff/model.py b/deepdiff/model.py index 8fe9b444..f375fcde 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -152,9 +152,17 @@ def _from_tree_results(self, tree): self._from_tree_deep_distance(tree) self._from_tree_custom_results(tree) - def _from_tree_default(self, tree, report_type): + def _from_tree_default(self, tree, report_type, ignore_if_in_iterable_opcodes=False): if report_type in tree: + for change in tree[report_type]: # report each change + # When we convert from diff to delta result, we care more about opcodes than iterable_item_added or removed + if ( + ignore_if_in_iterable_opcodes + and report_type in {"iterable_item_added", "iterable_item_removed"} + and change.up.path(force=FORCE_DEFAULT) in self["_iterable_opcodes"] + ): + continue # determine change direction (added or removed) # Report t2 (the new one) whenever possible. # In cases where t2 doesn't exist (i.e. stuff removed), report t1. @@ -180,6 +188,7 @@ def _from_tree_default(self, tree, report_type): def _from_tree_type_changes(self, tree): if 'type_changes' in tree: for change in tree['type_changes']: + path = change.path(force=FORCE_DEFAULT) if type(change.t1) is type: include_values = False old_type = change.t1 @@ -190,19 +199,26 @@ def _from_tree_type_changes(self, tree): new_type = get_type(change.t2) remap_dict = RemapDict({ 'old_type': old_type, - 'new_type': new_type + 'new_type': new_type, }) - self['type_changes'][change.path( - force=FORCE_DEFAULT)] = remap_dict + if self.verbose_level > 1: + new_path = change.path(use_t2=True, force=FORCE_DEFAULT) + if path != new_path: + remap_dict['new_path'] = new_path + self['type_changes'][path] = remap_dict if self.verbose_level and include_values: remap_dict.update(old_value=change.t1, new_value=change.t2) def _from_tree_value_changed(self, tree): if 'values_changed' in tree and self.verbose_level > 0: for change in tree['values_changed']: + path = change.path(force=FORCE_DEFAULT) the_changed = {'new_value': change.t2, 'old_value': change.t1} - self['values_changed'][change.path( - force=FORCE_DEFAULT)] = the_changed + if self.verbose_level > 1: + new_path = change.path(use_t2=True, force=FORCE_DEFAULT) + if path != new_path: + the_changed['new_path'] = new_path + self['values_changed'][path] = the_changed if 'diff' in change.additional: the_changed.update({'diff': change.additional['diff']}) @@ -279,7 +295,7 @@ def _from_tree_custom_results(self, tree): class DeltaResult(TextResult): ADD_QUOTES_TO_STRINGS = False - def __init__(self, tree_results=None, ignore_order=None, always_include_values=False): + def __init__(self, tree_results=None, ignore_order=None, always_include_values=False, _iterable_opcodes=None): self.ignore_order = ignore_order self.always_include_values = always_include_values @@ -297,6 +313,7 @@ def __init__(self, tree_results=None, ignore_order=None, always_include_values=F "set_item_added": dict_(), "iterable_items_added_at_indexes": dict_(), "iterable_items_removed_at_indexes": dict_(), + "_iterable_opcodes": _iterable_opcodes or {}, }) if tree_results: @@ -318,8 +335,8 @@ def _from_tree_results(self, tree): self._from_tree_iterable_item_added_or_removed( tree, 'iterable_item_removed', delta_report_key='iterable_items_removed_at_indexes') else: - self._from_tree_default(tree, 'iterable_item_added') - self._from_tree_default(tree, 'iterable_item_removed') + self._from_tree_default(tree, 'iterable_item_added', ignore_if_in_iterable_opcodes=True) + self._from_tree_default(tree, 'iterable_item_removed', ignore_if_in_iterable_opcodes=True) self._from_tree_iterable_item_moved(tree) self._from_tree_default(tree, 'attribute_added') self._from_tree_default(tree, 'attribute_removed') @@ -370,21 +387,27 @@ def _from_tree_type_changes(self, tree): except Exception: pass + path = change.path(force=FORCE_DEFAULT) + new_path = change.path(use_t2=True, force=FORCE_DEFAULT) remap_dict = RemapDict({ 'old_type': old_type, - 'new_type': new_type + 'new_type': new_type, }) - self['type_changes'][change.path( - force=FORCE_DEFAULT)] = remap_dict + if path != new_path: + remap_dict['new_path'] = new_path + self['type_changes'][path] = remap_dict if include_values or self.always_include_values: remap_dict.update(old_value=change.t1, new_value=change.t2) def _from_tree_value_changed(self, tree): if 'values_changed' in tree: for change in tree['values_changed']: + path = change.path(force=FORCE_DEFAULT) + new_path = change.path(use_t2=True, force=FORCE_DEFAULT) the_changed = {'new_value': change.t2, 'old_value': change.t1} - self['values_changed'][change.path( - force=FORCE_DEFAULT)] = the_changed + if path != new_path: + the_changed['new_path'] = new_path + self['values_changed'][path] = the_changed # If we ever want to store the difflib results instead of the new_value # these lines need to be uncommented and the Delta object needs to be able # to use them. @@ -407,9 +430,12 @@ def _from_tree_repetition_change(self, tree): def _from_tree_iterable_item_moved(self, tree): if 'iterable_item_moved' in tree: for change in tree['iterable_item_moved']: - the_changed = {'new_path': change.path(use_t2=True), 'value': change.t2} - self['iterable_item_moved'][change.path( - force=FORCE_DEFAULT)] = the_changed + if ( + change.up.path(force=FORCE_DEFAULT) not in self["_iterable_opcodes"] + ): + the_changed = {'new_path': change.path(use_t2=True), 'value': change.t2} + self['iterable_item_moved'][change.path( + force=FORCE_DEFAULT)] = the_changed class DiffLevel: @@ -693,8 +719,8 @@ def path(self, root="root", force=None, get_parent_too=False, use_t2=False, outp # traverse all levels of this relationship while level and level is not self: # get this level's relationship object - if(use_t2): - next_rel = level.t2_child_rel + if use_t2: + next_rel = level.t2_child_rel or level.t1_child_rel else: next_rel = level.t1_child_rel or level.t2_child_rel # next relationship object to get a formatted param from diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index d2e85370..f13a33e7 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -45,7 +45,7 @@ from functools import partial from collections.abc import Mapping from deepdiff.helper import ( - strings, get_type, TEXT_VIEW, np_float32, np_float64, np_int32, np_int64 + strings, get_type, TEXT_VIEW, np_float32, np_float64, np_int32, np_int64, np_ndarray, Opcode, py_current_version ) from deepdiff.model import DeltaResult @@ -96,6 +96,7 @@ class UnsupportedFormatErr(TypeError): 'collections.namedtuple', 'collections.OrderedDict', 're.Pattern', + 'deepdiff.helper.Opcode', } @@ -241,7 +242,29 @@ def _to_delta_dict(self, directed=True, report_repetition_required=True, always_ if self.group_by is not None: raise ValueError(DELTA_ERROR_WHEN_GROUP_BY) - result = DeltaResult(tree_results=self.tree, ignore_order=self.ignore_order, always_include_values=always_include_values) + if directed and not always_include_values: + _iterable_opcodes = {} + for path, op_codes in self._iterable_opcodes.items(): + _iterable_opcodes[path] = [] + for op_code in op_codes: + new_op_code = Opcode( + tag=op_code.tag, + t1_from_index=op_code.t1_from_index, + t1_to_index=op_code.t1_to_index, + t2_from_index=op_code.t2_from_index, + t2_to_index=op_code.t2_to_index, + new_values=op_code.new_values, + ) + _iterable_opcodes[path].append(new_op_code) + else: + _iterable_opcodes = self._iterable_opcodes + + result = DeltaResult( + tree_results=self.tree, + ignore_order=self.ignore_order, + always_include_values=always_include_values, + _iterable_opcodes=_iterable_opcodes, + ) result.remove_empty_keys() if report_repetition_required and self.ignore_order and not self.report_repetition: raise ValueError(DELTA_IGNORE_ORDER_NEEDS_REPETITION_REPORT) @@ -537,6 +560,12 @@ def _serialize_decimal(value): return float(value) +def _serialize_tuple(value): + if hasattr(value, '_asdict'): # namedtuple + return value._asdict() + return value + + JSON_CONVERTOR = { decimal.Decimal: _serialize_decimal, ordered_set.OrderedSet: list, @@ -548,7 +577,10 @@ def _serialize_decimal(value): np_float32: float, np_float64: float, np_int32: int, - np_int64: int + np_int64: int, + np_ndarray: lambda x: x.tolist(), + tuple: _serialize_tuple, + Mapping: dict, } if PydanticBaseModel: diff --git a/docs/conf.py b/docs/conf.py index 03fcdf5d..d971afe5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,9 +61,9 @@ # built documents. # # The short X.Y version. -version = '6.7.1' +version = '7.0.0' # The full version, including alpha/beta/rc tags. -release = '6.7.1' +release = '7.0.0' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) diff --git a/docs/delta.rst b/docs/delta.rst index 751dfba3..fed718c5 100644 --- a/docs/delta.rst +++ b/docs/delta.rst @@ -181,6 +181,13 @@ Flat Dict List You can create a delta object from the list of flat dictionaries that are produced via :ref:`to_flat_dicts_label`. Read more on :ref:`delta_from_flat_dicts_label`. +.. _flat_rows_list_label: + +Flat Rows List +-------------- + +You can create a delta object from the list of flat dictionaries that are produced via :ref:`to_flat_rows_label`. Read more on :ref:`delta_from_flat_rows_label`. + .. _delta_deserializer_label: diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst index 106dd023..9c33d822 100644 --- a/docs/diff_doc.rst +++ b/docs/diff_doc.rst @@ -114,6 +114,10 @@ ignore_type_subclasses: Boolean, default = False :ref:`ignore_type_subclasses_label` ignore type (class) changes when dealing with the subclasses of classes that were marked to be ignored. +.. Note:: + ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 + Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. + ignore_string_case: Boolean, default = False :ref:`ignore_string_case_label` Whether to be case-sensitive or not when comparing strings. By settings ignore_string_case=False, strings will be compared case-insensitively. diff --git a/docs/ignore_types_or_values.rst b/docs/ignore_types_or_values.rst index 7d55b9cb..105ec1ac 100644 --- a/docs/ignore_types_or_values.rst +++ b/docs/ignore_types_or_values.rst @@ -108,6 +108,19 @@ ignore_type_in_groups: Tuple or List of Tuples, default = None 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] +Example: Ignore Enum to string comparison + >>> from deepdiff import DeepDiff + >>> from enum import Enum + >>> class MyEnum1(Enum): + ... book = "book" + ... cake = "cake" + ... + >>> DeepDiff("book", MyEnum1.book) + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 'book', 'new_value': }}} + >>> DeepDiff("book", MyEnum1.book, ignore_type_in_groups=[(Enum, str)]) + {} + + Example: Ignore Type Number - Dictionary that contains float and integer. Note that this is exactly the same as passing ignore_numeric_type_changes=True. >>> from deepdiff import DeepDiff >>> from pprint import pprint @@ -201,6 +214,10 @@ Ignore Type Subclasses ignore_type_subclasses: Boolean, default = False Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. +.. Note:: + ignore_type_subclasses was incorrectly doing the reverse of its job up until DeepDiff 6.7.1 + Please make sure to flip it in your use cases, when upgrading from older versions to 7.0.0 or above. + >>> from deepdiff import DeepDiff >>> class ClassA: ... def __init__(self, x, y): @@ -217,10 +234,10 @@ ignore_type_subclasses: Boolean, default = False >>> obj_a = ClassA(1, 2) >>> obj_c = ClassC(3) >>> - >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) + >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': <__main__.ClassA object at 0x10076a2e8>, 'new_value': <__main__.ClassC object at 0x10082f630>}}} >>> - >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) + >>> DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) {'values_changed': {'root.x': {'new_value': 3, 'old_value': 1}}, 'attribute_removed': [root.y]} diff --git a/docs/index.rst b/docs/index.rst index b337d0c6..e520c144 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 6.7.1 documentation! +DeepDiff 7.0.0 documentation! ============================= ******* diff --git a/docs/serialization.rst b/docs/serialization.rst index 2ed67a4a..0f63428a 100644 --- a/docs/serialization.rst +++ b/docs/serialization.rst @@ -106,6 +106,98 @@ Load the diff object from the json pickle dump. Take a look at the above :ref:`to_json_pickle_label` for an example. +.. _delta_to_flat_rows_label: + +Delta Serialize To Flat Rows +---------------------------- + +Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat rows. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_rows to achieve the desired outcome. The rows are named tuples and can be converted to dictionaries using `._asdict()` + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff, Delta + >>> t1 = {"key1": "value1"} + >>> t2 = {"field2": {"key2": "value2"}} + >>> diff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(diff, indent=2) + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, + 'dictionary_item_removed': {"root['key1']": 'value1'}} + >>> delta = Delta(diff, bidirectional=True) + >>> flat_rows = delta.to_flat_rows() + >>> pprint(flat_rows, indent=2) + [ FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2'), + FlatDeltaRow(path=['key1'], action='dictionary_item_removed', value='value1')] + +.. note:: + When converting a delta to flat rows, nested dictionaries that have single keys in them are flattened too. + Notice that the diff object says + + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}} + + but the flat row is: + + FlatDeltaRow(path=['field2', 'key2'], action='dictionary_item_added', value='value2') + + That means, when you recreate the delta from the flat rows, you need to set force=True to apply the delta: + + >>> t1 + delta == t2 + True + >>> t2 - delta == t1 + True + >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True) + >>> t1 + delta2 == t2 + Expected the old value for root['field2']['key2'] to be None but it is not found. Error found on: 'field2' + False. You may want to set force=True, especially if this delta is created by passing flat_rows_list or flat_dict_list + >>> t1 + delta + {'field2': {'key2': 'value2'}} + >>> t1 + delta2 + {} + >>> delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True, force=True) # We need to set force=True + >>> t1 + delta2 + {'field2': {'key2': 'value2'}} + >>> + + + +Flat Row Specs: + + + class FlatDataAction(str, enum.Enum): + values_changed = 'values_changed' + type_changes = 'type_changes' + set_item_added = 'set_item_added' + set_item_removed = 'set_item_removed' + dictionary_item_added = 'dictionary_item_added' + dictionary_item_removed = 'dictionary_item_removed' + iterable_item_added = 'iterable_item_added' + iterable_item_removed = 'iterable_item_removed' + iterable_item_moved = 'iterable_item_moved' + iterable_items_inserted = 'iterable_items_inserted' # opcode + iterable_items_deleted = 'iterable_items_deleted' # opcode + iterable_items_replaced = 'iterable_items_replaced' # opcode + iterable_items_equal = 'iterable_items_equal' # opcode + attribute_removed = 'attribute_removed' + attribute_added = 'attribute_added' + unordered_iterable_item_added = 'unordered_iterable_item_added' + unordered_iterable_item_removed = 'unordered_iterable_item_removed' + + + UnkownValueCode = '*-UNKNOWN-*' + + + class FlatDeltaRow(NamedTuple): + path: List + action: FlatDataAction + value: Optional[Any] = UnkownValueCode + old_value: Optional[Any] = UnkownValueCode + type: Optional[Any] = UnkownValueCode + old_type: Optional[Any] = UnkownValueCode + new_path: Optional[List] = None + t1_from_index: Optional[int] = None + t1_to_index: Optional[int] = None + t2_from_index: Optional[int] = None + t2_to_index: Optional[int] = None + + .. _delta_to_flat_dicts_label: Delta Serialize To Flat Dictionaries @@ -113,6 +205,12 @@ Delta Serialize To Flat Dictionaries Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat dictionaries. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_dicts to achieve the desired outcome. +Since None is a valid value, we use a special hard-coded string to signify "unkown": '*-UNKNOWN-*' + +.. note:: + Many new keys are added to the flat dicts in DeepDiff 7.0.0 + You may want to use :ref:`delta_to_flat_rows_label` instead of flat dicts. + For example: >>> from pprint import pprint @@ -123,14 +221,31 @@ For example: >>> pprint(diff, indent=2) { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, 'dictionary_item_removed': {"root['key1']": 'value1'}} - >>> - >>> delta = Delta(diff, verify_symmetry=True) + >>> delta = Delta(diff, bidirectional=True) >>> flat_dicts = delta.to_flat_dicts() >>> pprint(flat_dicts, indent=2) [ { 'action': 'dictionary_item_added', + 'new_path': None, + 'old_type': '*-UNKNOWN-*', + 'old_value': '*-UNKNOWN-*', 'path': ['field2', 'key2'], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': '*-UNKNOWN-*', 'value': 'value2'}, - {'action': 'dictionary_item_removed', 'path': ['key1'], 'value': 'value1'}] + { 'action': 'dictionary_item_removed', + 'new_path': None, + 'old_type': '*-UNKNOWN-*', + 'old_value': '*-UNKNOWN-*', + 'path': ['key1'], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': '*-UNKNOWN-*', + 'value': 'value1'}] Example 2: @@ -141,11 +256,31 @@ Example 2: >>> pprint(diff, indent=2) {'iterable_item_added': {'root[2]': 'C', 'root[3]': 'D'}} >>> - >>> delta = Delta(diff, verify_symmetry=True) + >>> delta = Delta(diff, bidirectional=True) >>> flat_dicts = delta.to_flat_dicts() >>> pprint(flat_dicts, indent=2) - [ {'action': 'iterable_item_added', 'path': [2], 'value': 'C'}, - {'action': 'iterable_item_added', 'path': [3], 'value': 'D'}] + [ { 'action': 'iterable_item_added', + 'new_path': None, + 'old_type': '*-UNKNOWN-*', + 'old_value': '*-UNKNOWN-*', + 'path': [2], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': '*-UNKNOWN-*', + 'value': 'C'}, + { 'action': 'iterable_item_added', + 'new_path': None, + 'old_type': '*-UNKNOWN-*', + 'old_value': '*-UNKNOWN-*', + 'path': [3], + 't1_from_index': None, + 't1_to_index': None, + 't2_from_index': None, + 't2_to_index': None, + 'type': '*-UNKNOWN-*', + 'value': 'D'}] .. _delta_from_flat_dicts_label: @@ -157,8 +292,7 @@ Delta Load From Flat Dictionaries >>> t3 = ["A", "B"] >>> t4 = ["A", "B", "C", "D"] >>> diff = DeepDiff(t3, t4, verbose_level=2) - >>> delta = Delta(diff, verify_symmetry=True) - DeepDiff Deprecation: use bidirectional instead of verify_symmetry parameter. + >>> delta = Delta(diff, bidirectional=True) >>> flat_dicts = delta.to_flat_dicts() >>> >>> delta2 = Delta(flat_dict_list=flat_dicts) diff --git a/requirements-cli.txt b/requirements-cli.txt index f487dc50..0ba0c7e6 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,2 +1,2 @@ -click==8.1.3 +click==8.1.7 pyyaml==6.0.1 diff --git a/requirements-dev-3.7.txt b/requirements-dev-3.7.txt deleted file mode 100644 index ba33dbbb..00000000 --- a/requirements-dev-3.7.txt +++ /dev/null @@ -1,15 +0,0 @@ --r requirements.txt --r requirements-cli.txt -bump2version==1.0.1 -jsonpickle==2.2.0 -ipdb==0.13.9 -numpy==1.21.6 -pytest==7.1.2 -python-dotenv==0.20.0 -python-dateutil==2.8.2 -wheel==0.38.1 -tomli==2.0.0 -tomli-w==1.0.0 -pydantic==1.10.8 -python_dateutil==2.8.2 -tomli_w==1.0.0 diff --git a/requirements-dev.txt b/requirements-dev.txt index f7dff137..909a263f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,21 +1,21 @@ -r requirements.txt -r requirements-cli.txt bump2version==1.0.1 -jsonpickle==3.0.2 -coverage==6.5.0 +jsonpickle==3.0.3 +coverage==7.4.4 ipdb==0.13.13 -numpy==1.24.4 -pytest==7.4.2 -pytest-cov==4.1.0 -python-dotenv==0.21.0 -watchdog==2.2.0 +numpy>=1.24.4,<2.0.0 +pytest==8.1.1 +pytest-cov==5.0.0 +python-dotenv==1.0.1 +watchdog>=2.2.0 Sphinx==6.2.1 # We use the html style that is not supported in Sphinx 7 anymore. sphinx-sitemap==2.5.1 -sphinxemoji==0.2.0 -flake8==6.1.0 -python-dateutil==2.8.2 -orjson==3.9.7 -wheel==0.41.2 +sphinxemoji>=0.2.0 +flake8==7.0.0 +python-dateutil==2.9.0.post0 +orjson==3.10.0 +wheel==0.43.0 tomli==2.0.1 tomli-w==1.0.0 -pydantic==2.4.2 +pydantic==2.6.4 diff --git a/requirements.txt b/requirements.txt index c8de6a12..6bfbf09f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -ordered-set>=4.0.2,<4.2.0 +ordered-set>=4.1.0,<4.2.0 diff --git a/setup.cfg b/setup.cfg index 5630d3ad..518ad74b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 6.7.1 +current_version = 7.0.0 commit = True tag = True tag_name = {new_version} @@ -15,6 +15,8 @@ exclude = ./data,./src,.svn,CVS,.bzr,.hg,.git,__pycache__ [bumpversion:file:README.md] +[bumpversion:file:CITATION.cff] + [bumpversion:file:docs/index.rst] [bumpversion:file:docs/conf.py] diff --git a/setup.py b/setup.py index 2660a668..dd90d576 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '6.7.1' +version = '7.0.0' def get_reqs(filename): @@ -43,7 +43,7 @@ def get_reqs(filename): long_description=long_description, long_description_content_type='text/markdown', install_requires=reqs, - python_requires='>=3.7', + python_requires='>=3.8', extras_require={ "cli": cli_reqs, "optimize": optimize_reqs, @@ -52,11 +52,11 @@ def get_reqs(filename): "Intended Audience :: Developers", "Operating System :: OS Independent", "Topic :: Software Development", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: PyPy", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License" diff --git a/tests/test_delta.py b/tests/test_delta.py index d3a614da..b03b9e60 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -1,3 +1,5 @@ +import copy + import pytest import os import io @@ -7,7 +9,7 @@ from unittest import mock from ordered_set import OrderedSet from deepdiff import Delta, DeepDiff -from deepdiff.helper import np, number_to_string, TEXT_VIEW, DELTA_VIEW, CannotCompare +from deepdiff.helper import np, number_to_string, TEXT_VIEW, DELTA_VIEW, CannotCompare, FlatDeltaRow from deepdiff.path import GETATTR, GET from deepdiff.delta import ( ELEM_NOT_FOUND_TO_ADD_MSG, @@ -69,10 +71,10 @@ def test_list_difference_add_delta(self): assert delta + t1 == t2 assert t1 + delta == t2 - flat_result1 = delta.to_flat_dicts() + flat_result1 = delta.to_flat_rows() flat_expected1 = [ - {'path': [3], 'value': 5, 'action': 'iterable_item_added'}, - {'path': [2], 'value': 3, 'action': 'iterable_item_added'}, + FlatDeltaRow(path=[3], value=5, action='iterable_item_added'), + FlatDeltaRow(path=[2], value=3, action='iterable_item_added'), ] assert flat_expected1 == flat_result1 @@ -229,7 +231,7 @@ def test_identical_delta(self): t1 = [1, 3] assert t1 + delta == t1 - flat_result1 = delta.to_flat_dicts() + flat_result1 = delta.to_flat_rows() flat_expected1 = [] assert flat_expected1 == flat_result1 @@ -287,11 +289,11 @@ def test_list_difference3_delta(self): assert delta + t1 == t2 assert t1 + delta == t2 - flat_result1 = delta.to_flat_dicts() + flat_result1 = delta.to_flat_rows() flat_expected1 = [ - {'path': [4, 'b', 2], 'action': 'values_changed', 'value': 2, 'old_value': 5}, - {'path': [4, 'b', 1], 'action': 'values_changed', 'value': 3, 'old_value': 2}, - {'path': [4, 'b', 3], 'value': 5, 'action': 'iterable_item_added'}, + FlatDeltaRow(path=[4, 'b', 2], action='values_changed', value=2, old_value=5), + FlatDeltaRow(path=[4, 'b', 1], action='values_changed', value=3, old_value=2), + FlatDeltaRow(path=[4, 'b', 3], value=5, action='iterable_item_added'), ] assert flat_expected1 == flat_result1 @@ -328,11 +330,11 @@ def test_list_difference_delta_raises_error_if_prev_value_does_not_match(self): delta2 = Delta(diff, bidirectional=False) assert delta2 + t1 == t2 - flat_result2 = delta2.to_flat_dicts() + flat_result2 = delta2.to_flat_rows() flat_expected2 = [ - {'path': [2], 'action': 'values_changed', 'value': 2, 'old_value': 5}, - {'path': [1], 'action': 'values_changed', 'value': 3, 'old_value': 2}, - {'path': [3], 'value': 5, 'action': 'iterable_item_added'}, + FlatDeltaRow(path=[2], action='values_changed', value=2, old_value=5), + FlatDeltaRow(path=[1], action='values_changed', value=3, old_value=2), + FlatDeltaRow(path=[3], value=5, action='iterable_item_added'), ] assert flat_expected2 == flat_result2 @@ -359,10 +361,10 @@ def test_list_difference_delta1(self): assert delta + t1 == t2 - flat_result = delta.to_flat_dicts() + flat_result = delta.to_flat_rows() flat_expected = [ - {'path': [4, 'b', 2], 'value': 'to_be_removed', 'action': 'iterable_item_removed'}, - {'path': [4, 'b', 3], 'value': 'to_be_removed2', 'action': 'iterable_item_removed'}, + FlatDeltaRow(path=[4, 'b', 2], value='to_be_removed', action='iterable_item_removed'), + FlatDeltaRow(path=[4, 'b', 3], value='to_be_removed2', action='iterable_item_removed'), ] assert flat_expected == flat_result @@ -461,6 +463,154 @@ def test_delta_dict_items_added_retain_order(self): delta2 = Delta(diff=diff, bidirectional=True) assert t1 == t2 - delta2 + def test_delta_constr_flat_dict_list_param_preserve(self): + """ + Issue: https://github.com/seperman/deepdiff/issues/457 + + Scenario: + We found that when a flat_rows_list was provided as a constructor + parameter for instantiating a new delta, the provided flat_rows_list + is unexpectedly being mutated/changed, which can be troublesome for the + caller if they were expecting the flat_rows_list to be used BY COPY + rather than BY REFERENCE. + + Intent: + Preserve the original value of the flat_rows_list variable within the + calling module/function after instantiating the new delta. + """ + + t1 = { + "individualNames": [ + { + "firstName": "Johnathan", + "lastName": "Doe", + "prefix": "COLONEL", + "middleName": "A", + "primaryIndicator": True, + "professionalDesignation": "PHD", + "suffix": "SR", + "nameIdentifier": "00001" + }, + { + "firstName": "John", + "lastName": "Doe", + "prefix": "", + "middleName": "", + "primaryIndicator": False, + "professionalDesignation": "", + "suffix": "SR", + "nameIdentifier": "00002" + } + ] + } + + t2 = { + "individualNames": [ + { + "firstName": "Johnathan", + "lastName": "Doe", + "prefix": "COLONEL", + "middleName": "A", + "primaryIndicator": True, + "professionalDesignation": "PHD", + "suffix": "SR", + "nameIdentifier": "00001" + }, + { + "firstName": "Johnny", + "lastName": "Doe", + "prefix": "", + "middleName": "A", + "primaryIndicator": False, + "professionalDesignation": "", + "suffix": "SR", + "nameIdentifier": "00003" + } + ] + } + + def compare_func(item1, item2, level=None): + print("*** inside compare ***") + it1_keys = item1.keys() + + try: + + # --- individualNames --- + if 'nameIdentifier' in it1_keys and 'lastName' in it1_keys: + match_result = item1['nameIdentifier'] == item2['nameIdentifier'] + print("individualNames - matching result:", match_result) + return match_result + else: + print("Unknown list item...", "matching result:", item1 == item2) + return item1 == item2 + except Exception: + raise CannotCompare() from None + # ---------------------------- End of nested function + + # This diff should show: + # 1 - list item (with an index on the path) being added + # 1 - list item (with an index on the path) being removed + diff = DeepDiff(t1, t2, report_repetition=True, + ignore_order=True, iterable_compare_func=compare_func, cutoff_intersection_for_pairs=1) + + # Now create a flat_rows_list from a delta instantiated from the diff... + temp_delta = Delta(diff, always_include_values=True, bidirectional=True, raise_errors=True) + flat_rows_list = temp_delta.to_flat_rows() + + # Note: the list index is provided on the path value... + assert flat_rows_list == [FlatDeltaRow(path=['individualNames', 1], + value={'firstName': 'Johnny', + 'lastName': 'Doe', + 'prefix': '', + 'middleName': 'A', + 'primaryIndicator': False, + 'professionalDesignation': '', + 'suffix': 'SR', + 'nameIdentifier': '00003'}, + action='unordered_iterable_item_added'), + FlatDeltaRow(path=['individualNames', 1], + value={'firstName': 'John', + 'lastName': 'Doe', + 'prefix': '', + 'middleName': '', + 'primaryIndicator': False, + 'professionalDesignation': '', + 'suffix': 'SR', + 'nameIdentifier': '00002'}, + action='unordered_iterable_item_removed')] + + preserved_flat_dict_list = copy.deepcopy(flat_rows_list) # Use this later for assert comparison + + # Now use the flat_rows_list to instantiate a new delta... + delta = Delta(flat_rows_list=flat_rows_list, + always_include_values=True, bidirectional=True, raise_errors=True) + + # if the flat_rows_list is (unexpectedly) mutated, it will be missing the list index number on the path value. + old_mutated_list_missing_indexes_on_path = [FlatDeltaRow(path=['individualNames'], + value={'firstName': 'Johnny', + 'lastName': 'Doe', + 'prefix': '', + 'middleName': 'A', + 'primaryIndicator': False, + 'professionalDesignation': '', + 'suffix': 'SR', + 'nameIdentifier': '00003'}, + action='unordered_iterable_item_added'), + FlatDeltaRow(path=['individualNames'], + value={'firstName': 'John', + 'lastName': 'Doe', + 'prefix': '', + 'middleName': '', + 'primaryIndicator': False, + 'professionalDesignation': '', + 'suffix': 'SR', + 'nameIdentifier': '00002'}, + action='unordered_iterable_item_removed')] + + # Verify that our fix in the delta constructor worked... + assert flat_rows_list != old_mutated_list_missing_indexes_on_path + assert flat_rows_list == preserved_flat_dict_list + picklalbe_obj_without_item = PicklableClass(11) del picklalbe_obj_without_item.item @@ -761,8 +911,9 @@ def test_delta_cases(self, test_name, t1, t2, deepdiff_kwargs, to_delta_kwargs, 'expected_delta_dict': { 'values_changed': { 'root[6]': { - 'new_value': 5 - } + 'new_value': 5, + 'new_path': 'root[3]', + }, }, 'iterable_items_removed_at_indexes': { 'root': { @@ -785,8 +936,9 @@ def test_delta_cases(self, test_name, t1, t2, deepdiff_kwargs, to_delta_kwargs, 'expected_delta_dict': { 'values_changed': { 'root[3]': { - 'new_value': 4 - } + 'new_value': 4, + 'new_path': 'root[6]', + }, }, 'iterable_items_added_at_indexes': { 'root': { @@ -809,10 +961,12 @@ def test_delta_cases(self, test_name, t1, t2, deepdiff_kwargs, to_delta_kwargs, 'expected_delta_dict': { 'values_changed': { 'root[4]': { - 'new_value': 7 + 'new_value': 7, + 'new_path': 'root[0]' }, 'root[0]': { - 'new_value': 8 + 'new_value': 8, + 'new_path': 'root[4]' } } }, @@ -829,10 +983,12 @@ def test_delta_cases(self, test_name, t1, t2, deepdiff_kwargs, to_delta_kwargs, 'expected_delta_dict': { 'values_changed': { 'root[6]': { - 'new_value': 7 + 'new_value': 7, + 'new_path': 'root[0]' }, 'root[0]': { - 'new_value': 8 + 'new_value': 8, + 'new_path': 'root[6]' } }, 'iterable_items_added_at_indexes': { @@ -864,10 +1020,12 @@ def test_delta_cases(self, test_name, t1, t2, deepdiff_kwargs, to_delta_kwargs, }, 'values_changed': { 'root[6]': { - 'new_value': 7 + 'new_value': 7, + 'new_path': 'root[0]', }, 'root[0]': { - 'new_value': 8 + 'new_value': 8, + 'new_path': 'root[6]', } } }, @@ -1166,6 +1324,15 @@ def test_ignore_order_delta_cases( }, 'expected_result': 't2' }, + 'delta_with_null_as_key': { + 't1': { None: [1, 2], 'foo': [1, 2] }, + 't2': { None: [1], 'foo': [1] }, + 'deepdiff_kwargs': {}, + 'to_delta_kwargs': {}, + 'expected_delta_dict': { + }, + 'expected_result': 't2' + }, } @@ -1236,7 +1403,7 @@ def test_list_ignore_order_various_deltas2(self): t1_plus_delta2 = t1 + delta2 assert t1_plus_delta2 == (8, 4, 4, 1, 3, 4, 1, 7) - flat_result1 = delta1.to_flat_dicts() + flat_result1 = delta1.to_flat_rows() flat_expected1 = [ {'path': [0], 'value': 7, 'action': 'unordered_iterable_item_added'}, {'path': [6], 'value': 8, 'action': 'unordered_iterable_item_added'}, @@ -1246,13 +1413,14 @@ def test_list_ignore_order_various_deltas2(self): {'path': [6], 'value': 6, 'action': 'unordered_iterable_item_removed'}, {'path': [0], 'value': 5, 'action': 'unordered_iterable_item_removed'}, ] + flat_expected1 = [FlatDeltaRow(**i) for i in flat_expected1] assert flat_expected1 == flat_result1 - delta1_again = Delta(flat_dict_list=flat_expected1) + delta1_again = Delta(flat_rows_list=flat_expected1) assert t1_plus_delta1 == t1 + delta1_again assert delta1.diff == delta1_again.diff - flat_result2 = delta2.to_flat_dicts() + flat_result2 = delta2.to_flat_rows() flat_expected2 = [ {'path': [1], 'value': 4, 'action': 'unordered_iterable_item_added'}, {'path': [2], 'value': 4, 'action': 'unordered_iterable_item_added'}, @@ -1260,9 +1428,10 @@ def test_list_ignore_order_various_deltas2(self): {'path': [6], 'action': 'values_changed', 'value': 7}, {'path': [0], 'action': 'values_changed', 'value': 8}, ] + flat_expected2 = [FlatDeltaRow(**i) for i in flat_expected2] assert flat_expected2 == flat_result2 - delta2_again = Delta(flat_dict_list=flat_expected2) + delta2_again = Delta(flat_rows_list=flat_expected2) assert delta2.diff == delta2_again.diff def test_delta_view_and_to_delta_dict_are_equal_when_parameteres_passed(self): @@ -1395,19 +1564,23 @@ def test_apply_delta_to_incompatible_object6_value_change(self): t4 = delta2 + t3 assert [] == t4 - flat_result2 = delta2.to_flat_dicts() + flat_result2 = delta2.to_flat_rows() flat_expected2 = [{'path': [1, 2, 0], 'action': 'values_changed', 'value': 5}] + flat_expected2 = [FlatDeltaRow(**i) for i in flat_expected2] + assert flat_expected2 == flat_result2 - delta2_again = Delta(flat_dict_list=flat_expected2) + delta2_again = Delta(flat_rows_list=flat_expected2) assert delta2.diff == delta2_again.diff delta3 = Delta(diff, raise_errors=False, bidirectional=True) - flat_result3 = delta3.to_flat_dicts() + flat_result3 = delta3.to_flat_rows() flat_expected3 = [{'path': [1, 2, 0], 'action': 'values_changed', 'value': 5, 'old_value': 4}] + flat_expected3 = [FlatDeltaRow(**i) for i in flat_expected3] + assert flat_expected3 == flat_result3 - delta3_again = Delta(flat_dict_list=flat_expected3) + delta3_again = Delta(flat_rows_list=flat_expected3) assert delta3.diff == delta3_again.diff def test_apply_delta_to_incompatible_object7_type_change(self): @@ -1511,11 +1684,13 @@ def test_delta_to_dict(self): expected = {'iterable_items_removed_at_indexes': {'root': {2: 'B'}}} assert expected == result - flat_result = delta.to_flat_dicts() + flat_result = delta.to_flat_rows() flat_expected = [{'action': 'unordered_iterable_item_removed', 'path': [2], 'value': 'B'}] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] + assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected) + delta_again = Delta(flat_rows_list=flat_expected) assert delta.diff == delta_again.diff def test_class_type_change(self): @@ -1566,38 +1741,44 @@ def test_none_in_delta_object(self): delta = Delta(dump) assert t2 == delta + t1 - flat_result = delta.to_flat_dicts() + flat_result = delta.to_flat_rows() flat_expected = [{'path': ['a'], 'action': 'type_changes', 'value': 1, 'type': int, 'old_type': type(None)}] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] + assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected) + delta_again = Delta(flat_rows_list=flat_expected) assert delta.diff == delta_again.diff with pytest.raises(ValueError) as exc_info: - delta.to_flat_dicts(report_type_changes=False) + delta.to_flat_rows(report_type_changes=False) assert str(exc_info.value).startswith("When converting to flat dictionaries, if report_type_changes=False and there are type") delta2 = Delta(dump, always_include_values=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) flat_expected2 = [{'path': ['a'], 'action': 'values_changed', 'value': 1}] + flat_expected2 = [FlatDeltaRow(**i) for i in flat_expected2] + assert flat_expected2 == flat_result2 def test_delta_set_in_objects(self): t1 = [[1, OrderedSet(['A', 'B'])], {1}] t2 = [[2, OrderedSet([10, 'C', 'B'])], {1}] delta = Delta(DeepDiff(t1, t2)) - flat_result = delta.to_flat_dicts() + flat_result = delta.to_flat_rows() flat_expected = [ {'path': [0, 1], 'value': 10, 'action': 'set_item_added'}, {'path': [0, 0], 'action': 'values_changed', 'value': 2}, {'path': [0, 1], 'value': 'A', 'action': 'set_item_removed'}, {'path': [0, 1], 'value': 'C', 'action': 'set_item_added'}, ] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] + # Sorting because otherwise the order is not deterministic for sets, # even though we are using OrderedSet here. It still is converted to set at some point and loses its order. - flat_result.sort(key=lambda x: str(x['value'])) + flat_result.sort(key=lambda x: str(x.value)) assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected) + delta_again = Delta(flat_rows_list=flat_expected) assert delta.diff == delta_again.diff def test_delta_with_json_serializer(self): @@ -1702,18 +1883,20 @@ def test_compare_func_with_duplicates_removed(self): recreated_t2 = t1 + delta assert t2 == recreated_t2 - flat_result = delta.to_flat_dicts() + flat_result = delta.to_flat_rows() flat_expected = [ {'path': [2], 'value': {'id': 1, 'val': 3}, 'action': 'iterable_item_removed'}, {'path': [0], 'value': {'id': 1, 'val': 3}, 'action': 'iterable_item_removed'}, {'path': [3], 'value': {'id': 3, 'val': 3}, 'action': 'iterable_item_removed'}, - {'path': [0], 'action': 'iterable_item_moved', 'value': {'id': 1, 'val': 3}, 'new_path': [2]}, + {'path': [0], 'action': 'iterable_item_moved', 'value': {'id': 1, 'val': 3}, 'new_path': [2]}, {'path': [3], 'action': 'iterable_item_moved', 'value': {'id': 3, 'val': 3}, 'new_path': [0]}, ] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] + assert flat_expected == flat_result # Delta.DEBUG = True - delta_again = Delta(flat_dict_list=flat_expected, iterable_compare_func_was_used=True) + delta_again = Delta(flat_rows_list=flat_expected, iterable_compare_func_was_used=True) expected_delta_dict = { 'iterable_item_removed': { 'root[2]': { @@ -1903,14 +2086,15 @@ def test_flatten_dict_with_one_key_added(self): t2 = {"field1": {"joe": "Joe Nobody"}, "field2": {"jimmy": "Jimmy"}} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) flat_expected = [ {'path': ['field2', 'jimmy'], 'value': 'Jimmy', 'action': 'dictionary_item_added'}, {'path': ['field1', 'joe'], 'action': 'values_changed', 'value': 'Joe Nobody'}, ] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected, force=True) # We need to enable force so it creates the dictionary when added to t1 + delta_again = Delta(flat_rows_list=flat_expected, force=True) # We need to enable force so it creates the dictionary when added to t1 expected_data_again_diff = {'dictionary_item_added': {"root['field2']['jimmy']": 'Jimmy'}, 'values_changed': {"root['field1']['joe']": {'new_value': 'Joe Nobody'}}} assert delta.diff != delta_again.diff, "Since a dictionary containing a single field was created, the flat dict acted like one key was added." @@ -1923,14 +2107,15 @@ def test_flatten_dict_with_multiple_keys_added(self): t2 = {"field1": {"joe": "Joe Nobody"}, "field2": {"jimmy": "Jimmy", "sar": "Sarah"}} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) flat_expected = [ {'path': ['field2'], 'value': {'jimmy': 'Jimmy', 'sar': 'Sarah'}, 'action': 'dictionary_item_added'}, {'path': ['field1', 'joe'], 'action': 'values_changed', 'value': 'Joe Nobody'}, ] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected) + delta_again = Delta(flat_rows_list=flat_expected) assert delta.diff == delta_again.diff def test_flatten_list_with_one_item_added(self): @@ -1939,22 +2124,25 @@ def test_flatten_list_with_one_item_added(self): t3 = {"field1": {"joe": "Joe"}, "field2": ["James", "Jack"]} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) flat_expected = [{'path': ['field2', 0], 'value': 'James', 'action': 'iterable_item_added'}] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected, force=True) + delta_again = Delta(flat_rows_list=flat_expected, force=True) assert {'iterable_item_added': {"root['field2'][0]": 'James'}} == delta_again.diff # delta_again.DEBUG = True assert t2 == t1 + delta_again diff2 = DeepDiff(t2, t3) delta2 = Delta(diff=diff2, always_include_values=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) flat_expected2 = [{'path': ['field2', 1], 'value': 'Jack', 'action': 'iterable_item_added'}] + flat_expected2 = [FlatDeltaRow(**i) for i in flat_expected2] + assert flat_expected2 == flat_result2 - delta_again2 = Delta(flat_dict_list=flat_expected2, force=True) + delta_again2 = Delta(flat_rows_list=flat_expected2, force=True) assert {'iterable_item_added': {"root['field2'][1]": 'Jack'}} == delta_again2.diff assert t3 == t2 + delta_again2 @@ -1966,21 +2154,24 @@ def test_flatten_set_with_one_item_added(self): diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) assert t2 == t1 + delta - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) flat_expected = [{'path': ['field2'], 'value': 'James', 'action': 'set_item_added'}] + flat_expected = [FlatDeltaRow(**i) for i in flat_expected] assert flat_expected == flat_result - delta_again = Delta(flat_dict_list=flat_expected, force=True) + delta_again = Delta(flat_rows_list=flat_expected, force=True) assert {'set_item_added': {"root['field2']": {'James'}}} == delta_again.diff assert t2 == t1 + delta_again diff = DeepDiff(t2, t3) delta2 = Delta(diff=diff, always_include_values=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) flat_expected2 = [{'path': ['field2'], 'value': 'Jack', 'action': 'set_item_added'}] + flat_expected2 = [FlatDeltaRow(**i) for i in flat_expected2] + assert flat_expected2 == flat_result2 - delta_again2 = Delta(flat_dict_list=flat_expected2, force=True) + delta_again2 = Delta(flat_rows_list=flat_expected2, force=True) assert {'set_item_added': {"root['field2']": {'Jack'}}} == delta_again2.diff assert t3 == t2 + delta_again2 @@ -1991,22 +2182,26 @@ def test_flatten_tuple_with_one_item_added(self): diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) assert t2 == t1 + delta - flat_expected = delta.to_flat_dicts(report_type_changes=False) + flat_expected = delta.to_flat_rows(report_type_changes=False) expected_result = [{'path': ['field2', 0], 'value': 'James', 'action': 'iterable_item_added'}] + expected_result = [FlatDeltaRow(**i) for i in expected_result] + assert expected_result == flat_expected - delta_again = Delta(flat_dict_list=flat_expected, force=True) + delta_again = Delta(flat_rows_list=flat_expected, force=True) assert {'iterable_item_added': {"root['field2'][0]": 'James'}} == delta_again.diff assert {'field1': {'joe': 'Joe'}, 'field2': ['James']} == t1 + delta_again, "We lost the information about tuple when we convert to flat dict." diff = DeepDiff(t2, t3) delta2 = Delta(diff=diff, always_include_values=True, force=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) expected_result2 = [{'path': ['field2', 1], 'value': 'Jack', 'action': 'iterable_item_added'}] + expected_result2 = [FlatDeltaRow(**i) for i in expected_result2] + assert expected_result2 == flat_result2 assert t3 == t2 + delta2 - delta_again2 = Delta(flat_dict_list=flat_result2) + delta_again2 = Delta(flat_rows_list=flat_result2) assert {'iterable_item_added': {"root['field2'][1]": 'Jack'}} == delta_again2.diff assert t3 == t2 + delta_again2 @@ -2015,15 +2210,17 @@ def test_flatten_list_with_multiple_item_added(self): t2 = {"field1": {"joe": "Joe"}, "field2": ["James", "Jack"]} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) expected_result = [{'path': ['field2'], 'value': ['James', 'Jack'], 'action': 'dictionary_item_added'}] + expected_result = [FlatDeltaRow(**i) for i in expected_result] + assert expected_result == flat_result delta2 = Delta(diff=diff, bidirectional=True, always_include_values=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) assert expected_result == flat_result2 - delta_again = Delta(flat_dict_list=flat_result) + delta_again = Delta(flat_rows_list=flat_result) assert delta.diff == delta_again.diff def test_flatten_attribute_added(self): @@ -2031,11 +2228,13 @@ def test_flatten_attribute_added(self): t2 = PicklableClass(10) diff = DeepDiff(t1, t2) delta = Delta(diff=diff, always_include_values=True) - flat_result = delta.to_flat_dicts(report_type_changes=False) + flat_result = delta.to_flat_rows(report_type_changes=False) expected_result = [{'path': ['item'], 'value': 10, 'action': 'attribute_added'}] + expected_result = [FlatDeltaRow(**i) for i in expected_result] + assert expected_result == flat_result - delta_again = Delta(flat_dict_list=flat_result) + delta_again = Delta(flat_rows_list=flat_result) assert delta.diff == delta_again.diff def test_flatten_when_simple_type_change(self): @@ -2050,20 +2249,24 @@ def test_flatten_when_simple_type_change(self): assert expected_diff == diff delta = Delta(diff=diff) with pytest.raises(ValueError) as exc_info: - delta.to_flat_dicts(report_type_changes=False) + delta.to_flat_rows(report_type_changes=False) assert str(exc_info.value).startswith("When converting to flat dictionaries") delta2 = Delta(diff=diff, always_include_values=True) - flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + flat_result2 = delta2.to_flat_rows(report_type_changes=False) expected_result2 = [{'path': [2], 'action': 'values_changed', 'value': 3}] + expected_result2 = [FlatDeltaRow(**i) for i in expected_result2] + assert expected_result2 == flat_result2 delta3 = Delta(diff=diff, always_include_values=True, bidirectional=True) - flat_result3 = delta3.to_flat_dicts(report_type_changes=False) + flat_result3 = delta3.to_flat_rows(report_type_changes=False) + expected_result3 = [{'path': [2], 'action': 'values_changed', 'value': 3, 'old_value': '3'}] + expected_result3 = [FlatDeltaRow(**i) for i in expected_result3] assert expected_result3 == flat_result3 - delta_again = Delta(flat_dict_list=flat_result3) + delta_again = Delta(flat_rows_list=flat_result3) assert {'values_changed': {'root[2]': {'new_value': 3, 'old_value': '3'}}} == delta_again.diff def test_subtract_delta1(self): @@ -2082,7 +2285,7 @@ def test_subtract_delta_made_from_flat_dicts1(self): t2 = {'field_name1': []} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, bidirectional=True) - flat_dict_list = delta.to_flat_dicts(include_action_in_path=False, report_type_changes=True) + flat_rows_list = delta.to_flat_rows(include_action_in_path=False, report_type_changes=True) expected_flat_dicts = [{ 'path': ['field_name1', 0], 'value': 'xxx', @@ -2092,16 +2295,18 @@ def test_subtract_delta_made_from_flat_dicts1(self): 'value': 'yyy', 'action': 'iterable_item_removed' }] - assert expected_flat_dicts == flat_dict_list + expected_flat_dicts = [FlatDeltaRow(**i) for i in expected_flat_dicts] - delta1 = Delta(flat_dict_list=flat_dict_list, bidirectional=True, force=True) + assert expected_flat_dicts == flat_rows_list + + delta1 = Delta(flat_rows_list=flat_rows_list, bidirectional=True, force=True) assert t1 == t2 - delta1 - delta2 = Delta(flat_dict_list=[flat_dict_list[0]], bidirectional=True, force=True) + delta2 = Delta(flat_rows_list=[flat_rows_list[0]], bidirectional=True, force=True) middle_t = t2 - delta2 assert {'field_name1': ['xxx']} == middle_t - delta3 = Delta(flat_dict_list=[flat_dict_list[1]], bidirectional=True, force=True) + delta3 = Delta(flat_rows_list=[flat_rows_list[1]], bidirectional=True, force=True) assert t1 == middle_t - delta3 def test_subtract_delta_made_from_flat_dicts2(self): @@ -2109,7 +2314,7 @@ def test_subtract_delta_made_from_flat_dicts2(self): t2 = {'field_name1': ['xxx', 'yyy']} diff = DeepDiff(t1, t2) delta = Delta(diff=diff, bidirectional=True) - flat_dict_list = delta.to_flat_dicts(include_action_in_path=False, report_type_changes=True) + flat_rows_list = delta.to_flat_rows(include_action_in_path=False, report_type_changes=True) expected_flat_dicts = [{ 'path': ['field_name1', 0], 'value': 'xxx', @@ -2119,16 +2324,182 @@ def test_subtract_delta_made_from_flat_dicts2(self): 'value': 'yyy', 'action': 'iterable_item_added' }] - assert expected_flat_dicts == flat_dict_list + expected_flat_dicts = [FlatDeltaRow(**i) for i in expected_flat_dicts] + + assert expected_flat_dicts == flat_rows_list - delta1 = Delta(flat_dict_list=flat_dict_list, bidirectional=True, force=True) + delta1 = Delta(flat_rows_list=flat_rows_list, bidirectional=True, force=True) assert t1 == t2 - delta1 # We need to subtract the changes in the reverse order if we want to feed the flat dict rows individually to Delta - delta2 = Delta(flat_dict_list=[flat_dict_list[0]], bidirectional=True, force=True) + delta2 = Delta(flat_rows_list=[flat_rows_list[0]], bidirectional=True, force=True) middle_t = t2 - delta2 assert {'field_name1': ['yyy']} == middle_t - delta3 = Delta(flat_dict_list=[flat_dict_list[1]], bidirectional=True, force=True) + delta3 = Delta(flat_rows_list=[flat_rows_list[1]], bidirectional=True, force=True) delta3.DEBUG = True assert t1 == middle_t - delta3 + + def test_list_of_alphabet_and_its_delta(self): + l1 = "A B C D E F G D H".split() + l2 = "B C X D H Y Z".split() + diff = DeepDiff(l1, l2) + + # Problem: The index of values_changed should be either all for AFTER removals or BEFORE removals. + # What we have here is that F & G transformation to Y and Z is not compatible with A and E removal + # it is really meant for the removals to happen first, and then have indexes in L2 for values changing + # rather than indexes in L1. Here what we need to have is: + # A B C D E F G D H + # A B C-X-E + # B C D F G D H # removal + + # What we really need is to report is as it is in difflib for delta specifically: + # A B C D E F G D H + # B C D E F G D H delete t1[0:1] --> t2[0:0] ['A'] --> [] + # B C D E F G D H equal t1[1:3] --> t2[0:2] ['B', 'C'] --> ['B', 'C'] + # B C X D H replace t1[3:7] --> t2[2:3] ['D', 'E', 'F', 'G'] --> ['X'] + # B C X D H equal t1[7:9] --> t2[3:5] ['D', 'H'] --> ['D', 'H'] + # B C X D H Y Z insert t1[9:9] --> t2[5:7] [] --> ['Y', 'Z'] + + # So in this case, it needs to also include information about what stays equal in the delta + # NOTE: the problem is that these operations need to be performed in a specific order. + # DeepDiff removes that order and just buckets all insertions vs. replace vs. delete in their own buckets. + # For times that we use Difflib, we may want to keep the information for the array_change key + # just for the sake of delta, but not for reporting in deepdiff itself. + # that way we can re-apply the changes as they were reported in delta. + + delta = Delta(diff) + assert l2 == l1 + delta + with pytest.raises(ValueError) as exc_info: + l1 == l2 - delta + assert "Please recreate the delta with bidirectional=True" == str(exc_info.value) + + delta2 = Delta(diff, bidirectional=True) + assert l2 == l1 + delta2 + assert l1 == l2 - delta2 + + dump = Delta(diff, bidirectional=True).dumps() + delta3 = Delta(dump, bidirectional=True) + + assert l2 == l1 + delta3 + assert l1 == l2 - delta3 + + dump4 = Delta(diff, bidirectional=True, serializer=json_dumps).dumps() + delta4 = Delta(dump4, bidirectional=True, deserializer=json_loads) + + assert l2 == l1 + delta4 + assert l1 == l2 - delta4 + + def test_delta_flat_rows(self): + t1 = {"key1": "value1"} + t2 = {"field2": {"key2": "value2"}} + diff = DeepDiff(t1, t2, verbose_level=2) + delta = Delta(diff, bidirectional=True) + assert t1 + delta == t2 + flat_rows = delta.to_flat_rows() + # we need to set force=True because when we create flat rows, if a nested + # dictionary with a single key is created, the path in the flat row will be + # the path to the leaf node. + delta2 = Delta(flat_rows_list=flat_rows, bidirectional=True, force=True) + assert t1 + delta2 == t2 + + def test_flat_dict_and_deeply_nested_dict(self): + beforeImage = [ + { + "usage": "Mailing", + "standardization": "YES", + "primaryIndicator": True, + "addressIdentifier": "Z8PDWBG42YC", + "addressLines": ["871 PHILLIPS FERRY RD"], + }, + { + "usage": "Residence", + "standardization": "YES", + "primaryIndicator": False, + "addressIdentifier": "Z8PDWBG42YC", + "addressLines": ["871 PHILLIPS FERRY RD"], + }, + { + "usage": "Mailing", + "standardization": None, + "primaryIndicator": False, + "addressIdentifier": "MHPP3BY0BYC", + "addressLines": ["871 PHILLIPS FERRY RD", "APT RV92"], + }, + ] + allAfterImage = [ + { + "usage": "Residence", + "standardization": "NO", + "primaryIndicator": False, + "addressIdentifier": "Z8PDWBG42YC", + "addressLines": ["871 PHILLIPS FERRY RD"], + }, + { + "usage": "Mailing", + "standardization": None, + "primaryIndicator": False, + "addressIdentifier": "MHPP3BY0BYC", + "addressLines": ["871 PHILLIPS FERRY RD", "APT RV92"], + }, + { + "usage": "Mailing", + "standardization": "NO", + "primaryIndicator": True, + "addressIdentifier": "Z8PDWBG42YC", + "addressLines": ["871 PHILLIPS FERRY RD"], + }, + ] + + diff = DeepDiff( + beforeImage, + allAfterImage, + ignore_order=True, + report_repetition=True, + ) + # reverse_diff = DeepDiff( + # allAfterImage, + # beforeImage, + # ignore_order=True, + # report_repetition=True, + # ) + delta = Delta( + diff, always_include_values=True, bidirectional=True + ) + # reverse_delta = Delta( + # reverse_diff, always_include_values=True, bidirectional=True + # ) + allAfterImageAgain = beforeImage + delta + diff2 = DeepDiff(allAfterImage, allAfterImageAgain, ignore_order=True) + assert not diff2 + + # print("\ndelta.diff") + # pprint(delta.diff) + # print("\ndelta._get_reverse_diff()") + # pprint(delta._get_reverse_diff()) + # print("\nreverse_delta.diff") + # pprint(reverse_delta.diff) + beforeImageAgain = allAfterImage - delta + diff3 = DeepDiff(beforeImage, beforeImageAgain, ignore_order=True) + assert not diff3 + + # ------ now let's recreate the delta from flat dicts ------- + + flat_dict_list = delta.to_flat_dicts() + + delta2 = Delta( + flat_dict_list=flat_dict_list, + always_include_values=True, + bidirectional=True, + raise_errors=False, + force=True, + ) + # print("\ndelta from flat dicts") + # pprint(delta2.diff) + allAfterImageAgain2 = beforeImage + delta2 + diff4 = DeepDiff(allAfterImage, allAfterImageAgain2, ignore_order=True) + assert not diff4 + + beforeImageAgain2 = allAfterImage - delta2 + diff4 = DeepDiff(beforeImage, beforeImageAgain2, ignore_order=True) + assert not diff4 diff --git a/tests/test_diff_numpy.py b/tests/test_diff_numpy.py index c971f0b9..ad9ecb94 100644 --- a/tests/test_diff_numpy.py +++ b/tests/test_diff_numpy.py @@ -119,6 +119,12 @@ 'deepdiff_kwargs': {'significant_digits': 3}, 'expected_result': {}, }, + 'numpy_almost_equal2': { + 't1': np.array(['a', 'b'], dtype=object), + 't2': np.array(['a', 'b'], dtype=object), + 'deepdiff_kwargs': {'significant_digits': 6}, + 'expected_result': {}, + }, 'numpy_different_shape': { 't1': np.array([[1, 1], [2, 3]]), 't2': np.array([1]), diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index d1e305a3..84cc5151 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -10,6 +10,7 @@ from deepdiff import DeepDiff from deepdiff.helper import pypy3, PydanticBaseModel from tests import CustomClass +from deepdiff.helper import np_float64 logging.disable(logging.CRITICAL) @@ -297,6 +298,28 @@ def test_string_difference_ignore_case(self): result = {} assert result == ddiff + def test_string_dict_key_ignore_case(self): + t1 = {'User': {'AboutMe': 1, 'ALIAS': 1}} + t2 = {'User': {'Alias': 1, 'AboutMe': 1}} + ddiff = DeepDiff(t1, t2) + result = {'dictionary_item_added': ["root['User']['Alias']"], 'dictionary_item_removed': ["root['User']['ALIAS']"]} + assert result == ddiff + + ddiff = DeepDiff(t1, t2, ignore_string_case=True) + result = {} + assert result == ddiff + + def test_string_list_ignore_case(self): + t1 = ['AboutMe', 'ALIAS'] + t2 = ['aboutme', 'alias'] + ddiff = DeepDiff(t1, t2) + result = {'values_changed': {'root[0]': {'new_value': 'aboutme', 'old_value': 'AboutMe'}, 'root[1]': {'new_value': 'alias', 'old_value': 'ALIAS'}}} + assert result == ddiff + + ddiff = DeepDiff(t1, t2, ignore_string_case=True) + result = {} + assert result == ddiff + def test_diff_quote_in_string(self): t1 = { "a']['b']['c": 1 @@ -624,6 +647,27 @@ class MyEnum(Enum): } assert ddiff == result + def test_enum_ignore_type_change(self): + + class MyEnum1(Enum): + book = "book" + cake = "cake" + + class MyEnum2(str, Enum): + book = "book" + cake = "cake" + + diff = DeepDiff("book", MyEnum1.book) + expected = { + 'type_changes': {'root': {'old_type': str, 'new_type': MyEnum1, 'old_value': 'book', 'new_value': MyEnum1.book}}} + assert expected == diff + + diff2 = DeepDiff("book", MyEnum1.book, ignore_type_in_groups=[(Enum, str)]) + assert not diff2 + + diff3 = DeepDiff("book", MyEnum2.book, ignore_type_in_groups=[(Enum, str)]) + assert not diff3 + def test_precompiled_regex(self): pattern_1 = re.compile('foo') @@ -789,11 +833,11 @@ def __repr__(self): obj_a = ClassA(1, 2) obj_c = ClassC(3) - ddiff = DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) + ddiff = DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) result = {'type_changes': {'root': {'old_type': ClassA, 'new_type': ClassC, 'old_value': obj_a, 'new_value': obj_c}}} assert result == ddiff - ddiff = DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=True) + ddiff = DeepDiff(obj_a, obj_c, ignore_type_in_groups=[(ClassA, ClassB)], ignore_type_subclasses=False) result = {'values_changed': {'root.x': {'new_value': 3, 'old_value': 1}}, 'attribute_removed': ['root.y']} assert result == ddiff @@ -1255,6 +1299,7 @@ def test_negative_significant_digits(self): (Decimal('100000.1'), 100000.1, 5, {}), (Decimal('100000'), 100000.1, 0, {}), (Decimal('100000'), 100000.1, 1, {'values_changed': {'root': {'new_value': 100000.1, 'old_value': Decimal('100000')}}}), + (np_float64(123.93420232), 123.93420232, 0, {}), ]) def test_decimal_digits(self, t1, t2, significant_digits, expected_result): ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, significant_digits=significant_digits) diff --git a/tests/test_hash.py b/tests/test_hash.py index bbf2c0ef..af6a30fe 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -86,6 +86,27 @@ def test_datetime(self): b_hash = DeepHash(b) assert a_hash[a] == b_hash[b] + def test_date1(self): + date = datetime.date(2024, 2, 1) + date_hash = DeepHash(date) + assert 'd90e95901f85ca09b2536d3cb81a49747c3a4fb14906d6fa0d492713ebb4309c' == date_hash[date] + + def test_date2(self): + item = {'due_date': datetime.date(2024, 2, 1)} + + result = DeepHash( + item, + significant_digits=12, + number_format_notation='f', + ignore_numeric_type_changes=True, + ignore_type_in_groups=[{int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, datetime.time}], + ignore_type_subclasses=False, + ignore_encoding_errors=False, + ignore_repetition=True, + number_to_string_func=number_to_string, + ) + assert 'e0d7ec984a0eda44ceb1e3c595f9b805530d715c779483e63a72c67cbce68615' == result[item] + def test_datetime_truncate(self): a = datetime.datetime(2020, 5, 17, 22, 15, 34, 913070) b = datetime.datetime(2020, 5, 17, 22, 15, 39, 296583) @@ -474,21 +495,20 @@ class ClassC(ClassB): burrito = Burrito() taco = Taco() - @pytest.mark.parametrize("t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual", [ - (taco, burrito, [], False, False), - (taco, burrito, [(Taco, Burrito)], False, True), - ([taco], [burrito], [(Taco, Burrito)], False, True), - ([obj_a], [obj_c], [(ClassA, ClassB)], False, False), - ([obj_a], [obj_c], [(ClassA, ClassB)], True, True), - ([obj_b], [obj_c], [(ClassB, )], True, True), + @pytest.mark.parametrize("test_num, t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual", [ + (1, taco, burrito, [], False, False), + (2, taco, burrito, [(Taco, Burrito)], False, True), + (3, [taco], [burrito], [(Taco, Burrito)], False, True), + (4, [obj_a], [obj_c], [(ClassA, ClassB)], False, True), + (5, [obj_a], [obj_c], [(ClassA, ClassB)], True, False), + (6, [obj_b], [obj_c], [(ClassB, )], True, False), ]) - def test_objects_with_same_content(self, t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual): - + def test_objects_with_same_content(self, test_num, t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual): t1_result = DeepHashPrep(t1, ignore_type_in_groups=ignore_type_in_groups, ignore_type_subclasses=ignore_type_subclasses) t2_result = DeepHashPrep(t2, ignore_type_in_groups=ignore_type_in_groups, ignore_type_subclasses=ignore_type_subclasses) - assert is_qual == (t1_result[t1] == t2_result[t2]) + assert is_qual == (t1_result[t1] == t2_result[t2]), f"test_objects_with_same_content #{test_num} failed." def test_custom_object(self): cc_a = CustomClass2(prop1=["a"], prop2=["b"]) diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 3385293f..e01e2fad 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -1,5 +1,6 @@ import pytest import re +import datetime from unittest import mock from deepdiff.helper import number_to_string, CannotCompare from deepdiff import DeepDiff @@ -149,7 +150,7 @@ def test_nested_list_with_dictionarry_difference_ignore_order(self): result = {} assert result == ddiff - def test_list_difference_ignore_order_report_repetition(self): + def test_list_difference_ignore_order_report_repetition1(self): t1 = [1, 3, 1, 4] t2 = [4, 4, 1] ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) @@ -176,6 +177,59 @@ def test_list_difference_ignore_order_report_repetition(self): } assert result == ddiff + @pytest.mark.skip + def test_list_difference_ignore_order_report_repetition2(self): + t1 = [1, 1, 1] + t2 = [2, 2] + ddiff = DeepDiff(t1, t2, ignore_order=True) + result = {'values_changed': {'root[0]': {'new_value': 2, 'old_value': 1}}} + assert result == ddiff + + ddiff2 = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, cutoff_intersection_for_pairs=1, cutoff_distance_for_pairs=1) + result2 = { + 'iterable_item_removed': { + 'root[0]': 1, + 'root[1]': 1, + 'root[2]': 1 + }, + 'iterable_item_added': { + 'root[0]': 2, + 'root[1]': 2, + }, + } + assert result2 == ddiff2 + + @pytest.mark.skip + def test_list_difference_ignore_order_report_repetition3(self): + t1 = [{"id": 1}, {"id": 1}, {"id": 1}] + t2 = [{"id": 1, "name": 1}] + + ddiff2 = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, cutoff_intersection_for_pairs=1, cutoff_distance_for_pairs=1) + result2 = { + 'iterable_item_removed': { + 'root[1]': {"id": 1}, + 'root[2]': {"id": 1}, + }, + 'dictionary_item_added': ["root[0]['name']"] + } + assert result2 == ddiff2 + + @pytest.mark.skip + def test_list_difference_ignore_order_report_repetition4(self): + t1 = [{"id": 1}, {"id": 1}, {"id": 1}, {"name": "Joe"}, {"name": "Joe"}] + t2 = [{"id": 1, "name": 1}, {"id": 1, "name": "Joe"}] + + ddiff2 = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, cutoff_intersection_for_pairs=1, cutoff_distance_for_pairs=1) + result2 = { + 'iterable_item_removed': { + 'root[2]': {"id": 1}, + 'root[3]': {"name": "Joe"}, + 'root[4]': {"name": "Joe"}, + }, + 'dictionary_item_added': ["root[0]['name']", "root[1]['name']"] + } + assert result2 == ddiff2 + def test_nested_list_ignore_order_report_repetition(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3, 3], 2, 1] @@ -594,8 +648,8 @@ def test_bool_vs_number(self): @pytest.mark.parametrize('max_passes, expected', [ (0, {'values_changed': {'root[0]': {'new_value': {'key5': 'CHANGE', 'key6': 'val6'}, 'old_value': {'key3': [[[[[1, 2, 4, 5]]]]], 'key4': [7, 8]}}, 'root[1]': {'new_value': {'key3': [[[[[1, 3, 5, 4]]]]], 'key4': [7, 8]}, 'old_value': {'key5': 'val5', 'key6': 'val6'}}}}), - (1, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5'}, "root[0]['key3'][0]": {'new_value': [[[[1, 3, 5, 4]]]], 'old_value': [[[[1, 2, 4, 5]]]]}}}), - (22, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5'}, "root[0]['key3'][0][0][0][0][1]": {'new_value': 3, 'old_value': 2}}}) + (1, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5', 'new_path': "root[0]['key5']"}, "root[0]['key3'][0]": {'new_value': [[[[1, 3, 5, 4]]]], 'old_value': [[[[1, 2, 4, 5]]]], 'new_path': "root[1]['key3'][0]"}}}), + (22, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5', 'new_path': "root[0]['key5']"}, "root[0]['key3'][0][0][0][0][1]": {'new_value': 3, 'old_value': 2, 'new_path': "root[1]['key3'][0][0][0][0][1]"}}}) ]) def test_ignore_order_max_passes(self, max_passes, expected): t1 = [ @@ -625,8 +679,8 @@ def test_ignore_order_max_passes(self, max_passes, expected): @pytest.mark.parametrize('max_diffs, expected', [ (1, {}), - (65, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5'}}}), - (80, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5'}, "root[0]['key3'][0][0][0][0][1]": {'new_value': 3, 'old_value': 2}}}), + (65, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5', 'new_path': "root[0]['key5']"}}}), + (80, {'values_changed': {"root[1]['key5']": {'new_value': 'CHANGE', 'old_value': 'val5', 'new_path': "root[0]['key5']"}, "root[0]['key3'][0][0][0][0][1]": {'new_value': 3, 'old_value': 2, 'new_path': "root[1]['key3'][0][0][0][0][1]"}}}), ]) def test_ignore_order_max_diffs(self, max_diffs, expected): t1 = [ @@ -923,6 +977,16 @@ def test_math_epsilon_when_ignore_order_in_nested_list(self): expected = {'values_changed': {'root[0]': {'new_value': {'x': 0.0011}, 'old_value': {'x': 0.001}}, 'root[1]': {'new_value': {'y': 2}, 'old_value': {'y': 2.00002}}}} assert expected == diff + def test_datetime_and_ignore_order(self): + diff = DeepDiff( + [{'due_date': datetime.date(2024, 2, 1)}], + [{'due_date': datetime.date(2024, 2, 2)}], + ignore_order=True, + ignore_numeric_type_changes=True + ) + assert {} != diff + + class TestCompareFuncIgnoreOrder: @@ -1072,6 +1136,121 @@ def compare_func(x, y, level=None): assert expected_with_compare_func == ddiff2 assert ddiff != ddiff2 + def test_ignore_order_with_compare_func_with_one_each_hashes_added_hashes_removed(self): + """ + Scenario: + In this example which demonstrates the problem... We have two dictionaries containing lists for + individualNames. Each list contains exactly 2 elements. The effective change is that we are + replacing the 2nd element in the list. + NOTE: This is considered a REPLACEMENT of the second element and not an UPDATE of the element + because we are providing a custom compare_func which will determine matching elements based on + the value of the nameIdentifier field. If the custom compare_func is not used, then + deepdiff.diff will mistakenly treat the difference as being individual field updates for every + field in the second element of the list. + + Intent: + Use our custom compare_func, since we have provided it. + We need to fall into self._precalculate_distance_by_custom_compare_func + To do this, we are proposing a change to deepdiff.diff line 1128: + + Original: + if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1: + + Proposed/Updated: + if hashes_added and hashes_removed \ + and self.iterable_compare_func \ + and len(hashes_added) > 0 and len(hashes_removed) > 0: + + NOTE: It is worth mentioning that deepdiff.diff line 1121, might also benefit by changing the length conditions + to evaluate for > 0 (rather than > 1). + """ + + t1 = { + "individualNames": [ + { + "firstName": "Johnathan", + "lastName": "Doe", + "prefix": "COLONEL", + "middleName": "A", + "primaryIndicator": True, + "professionalDesignation": "PHD", + "suffix": "SR", + "nameIdentifier": "00001" + }, + { + "firstName": "John", + "lastName": "Doe", + "prefix": "", + "middleName": "", + "primaryIndicator": False, + "professionalDesignation": "", + "suffix": "SR", + "nameIdentifier": "00002" + } + ] + } + + t2 = { + "individualNames": [ + { + "firstName": "Johnathan", + "lastName": "Doe", + "prefix": "COLONEL", + "middleName": "A", + "primaryIndicator": True, + "professionalDesignation": "PHD", + "suffix": "SR", + "nameIdentifier": "00001" + }, + { + "firstName": "Johnny", + "lastName": "Doe", + "prefix": "", + "middleName": "A", + "primaryIndicator": False, + "professionalDesignation": "", + "suffix": "SR", + "nameIdentifier": "00003" + } + ] + } + def compare_func(item1, item2, level=None): + print("*** inside compare ***") + it1_keys = item1.keys() + + try: + + # --- individualNames --- + if 'nameIdentifier' in it1_keys and 'lastName' in it1_keys: + match_result = item1['nameIdentifier'] == item2['nameIdentifier'] + print("individualNames - matching result:", match_result) + return match_result + else: + print("Unknown list item...", "matching result:", item1 == item2) + return item1 == item2 + except Exception: + raise CannotCompare() from None + # ---------------------------- End of nested function + + actual_diff = DeepDiff(t1, t2, report_repetition=True, + ignore_order=True, iterable_compare_func=compare_func, cutoff_intersection_for_pairs=1) + + old_invalid_diff = { + 'values_changed': {"root['individualNames'][1]['firstName']": {'new_value': 'Johnny', 'old_value': 'John'}, + "root['individualNames'][1]['middleName']": {'new_value': 'A', 'old_value': ''}, + "root['individualNames'][1]['nameIdentifier']": {'new_value': '00003', + 'old_value': '00002'}}} + new_expected_diff = {'iterable_item_added': { + "root['individualNames'][1]": {'firstName': 'Johnny', 'lastName': 'Doe', 'prefix': '', 'middleName': 'A', + 'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR', + 'nameIdentifier': '00003'}}, 'iterable_item_removed': { + "root['individualNames'][1]": {'firstName': 'John', 'lastName': 'Doe', 'prefix': '', 'middleName': '', + 'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR', + 'nameIdentifier': '00002'}}} + + assert old_invalid_diff != actual_diff + assert new_expected_diff == actual_diff + class TestDynamicIgnoreOrder: def test_ignore_order_func(self): diff --git a/tests/test_model.py b/tests/test_model.py index cc5390b6..12130e0c 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -3,6 +3,7 @@ import logging import pytest from tests import CustomClass, CustomClassMisleadingRepr +from deepdiff import DeepDiff from deepdiff.model import (DiffLevel, ChildRelationship, DictRelationship, SubscriptableIterableRelationship, AttributeRelationship) @@ -170,6 +171,76 @@ def test_path_when_both_children_empty(self): assert path == 'root' assert down.path(output_format='list') == [] + def test_t2_path_when_nested(self): + t1 = { + "type": "struct", + "fields": [ + {"name": "Competition", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "TeamName", "metadata": {}, "nullable": True, "type": "string"}, + { + "name": "Contents", + "metadata": {}, + "nullable": True, + "type": { + "type": "struct", + "fields": [ + {"name": "Date", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "Player1", "metadata": {}, "nullable": True, "type": "string"} + ] + } + } + ] + } + + t2 = { + "type": "struct", + "fields": [ + {"name": "Competition", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "GlobalId", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "TeamName", "metadata": {}, "nullable": True, "type": "string"}, + { + "name": "Contents", + "metadata": {}, + "nullable": True, + "type": { + "type": "struct", + "fields": [ + {"name": "Date", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "Player1", "metadata": {}, "nullable": True, "type": "string"}, + {"name": "Player2", "metadata": {}, "nullable": True, "type": "string"} + ] + } + } + ] + } + + diff = DeepDiff(t1=t1, t2=t2, ignore_order=True, verbose_level=2, view='tree') + + expected_diff = { + "iterable_item_added": { + "root['fields'][1]": { + "name": "GlobalId", + "metadata": {}, + "nullable": True, + "type": "string", + }, + "root['fields'][2]['type']['fields'][2]": { + "name": "Player2", + "metadata": {}, + "nullable": True, + "type": "string", + }, + } + } + + path = diff['iterable_item_added'][1].path() + assert "root['fields'][2]['type']['fields'][2]" == path + + path_t2 = diff['iterable_item_added'][1].path(use_t2=True) + assert "root['fields'][3]['type']['fields'][2]" == path_t2 + + + def test_repr_short(self): level = self.lowest.verbose_level try: diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 8a9c02f5..7122976c 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -4,10 +4,13 @@ import sys import pytest import datetime +import numpy as np +from typing import NamedTuple, Optional from pickle import UnpicklingError from decimal import Decimal +from collections import Counter from deepdiff import DeepDiff -from deepdiff.helper import pypy3 +from deepdiff.helper import pypy3, py_current_version, np_ndarray, Opcode from deepdiff.serialization import ( pickle_load, pickle_dump, ForbiddenModule, ModuleNotFoundError, MODULE_NOT_FOUND_MSG, FORBIDDEN_MODULE_MSG, pretty_print_diff, @@ -23,6 +26,19 @@ t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +class SomeStats(NamedTuple): + counter: Optional[Counter] + context_aware_counter: Optional[Counter] = None + min_int: Optional[int] = 0 + max_int: Optional[int] = 0 + + +field_stats1 = SomeStats( + counter=Counter(["a", "a", "b"]), + max_int=10 +) + + class TestSerialization: """Tests for Serializations.""" @@ -323,10 +339,24 @@ def test_pretty_form_method(self, expected, verbose_level): (5, {1, 2, 10}, set), (6, datetime.datetime(2023, 10, 11), datetime.datetime.fromisoformat), (7, datetime.datetime.utcnow(), datetime.datetime.fromisoformat), + (8, field_stats1, lambda x: SomeStats(**x)), + (9, np.array([[ 101, 3533, 1998, 4532, 2024, 3415, 1012, 102]]), np.array) ]) def test_json_dumps_and_loads(self, test_num, value, func_to_convert_back): + if test_num == 8 and py_current_version < 3.8: + print(f"Skipping test_json_dumps_and_loads #{test_num} on Python {py_current_version}") + return serialized = json_dumps(value) back = json_loads(serialized) if func_to_convert_back: back = func_to_convert_back(back) - assert value == back, f"test_json_dumps_and_loads test #{test_num} failed" + if isinstance(back, np_ndarray): + assert np.array_equal(value, back), f"test_json_dumps_and_loads test #{test_num} failed" + else: + assert value == back, f"test_json_dumps_and_loads test #{test_num} failed" + + def test_namedtuple_seriazliation(self): + op_code = Opcode(tag="replace", t1_from_index=0, t1_to_index=1, t2_from_index=10, t2_to_index=20) + serialized = json_dumps(op_code) + expected = '{"tag":"replace","t1_from_index":0,"t1_to_index":1,"t2_from_index":10,"t2_to_index":20,"old_values":null,"new_values":null}' + assert serialized == expected