From 14c2bfd92961fdd6bc74b3c5f8f979c3be38f781 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 27 Dec 2024 12:08:48 -0800 Subject: [PATCH 01/11] small optimizations so we don't load functions that are not needed --- deepdiff/diff.py | 4 +- deepdiff/helper.py | 1 - deepdiff/serialization.py | 94 +++++++++++++++++---------------------- 3 files changed, 45 insertions(+), 54 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index a6fe06b..461cae7 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -11,11 +11,12 @@ from enum import Enum from copy import deepcopy from math import isclose as is_close -from typing import List, Dict, IO, Callable, Set, Union, Any, Pattern, Tuple, Optional +from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional from collections.abc import Mapping, Iterable, Sequence from collections import defaultdict from inspect import getmembers from itertools import zip_longest +from functools import lru_cache from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent, IndexedHash, unprocessed, add_to_frozen_set, basic_types, convert_item_or_items_into_set_else_none, get_type, @@ -1123,6 +1124,7 @@ def _create_hashtable(self, level, t): return local_hashes @staticmethod + @lru_cache(maxsize=2028) def _get_distance_cache_key(added_hash, removed_hash): key1, key2 = (added_hash, removed_hash) if added_hash > removed_hash else (removed_hash, added_hash) if isinstance(key1, int): diff --git a/deepdiff/helper.py b/deepdiff/helper.py index e0be6a1..36d4362 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -1,7 +1,6 @@ import sys import re import os -import math import datetime import uuid import logging diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index 4119742..aa56399 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -11,36 +11,6 @@ import decimal # NOQA import orderly_set # NOQA import collections # NOQA -try: - import yaml -except ImportError: # pragma: no cover. - yaml = None # pragma: no cover. -try: - if sys.version_info >= (3, 11): - import tomllib as tomli - else: - import tomli -except ImportError: # pragma: no cover. - tomli = None # pragma: no cover. -try: - import tomli_w -except ImportError: # pragma: no cover. - tomli_w = None # pragma: no cover. -try: - import clevercsv - csv = None -except ImportError: # pragma: no cover. - import csv - clevercsv = None # pragma: no cover. -try: - import orjson -except ImportError: # pragma: no cover. - orjson = None -try: - from pydantic import BaseModel as PydanticBaseModel -except ImportError: # pragma: no cover. - PydanticBaseModel = None - from copy import deepcopy, copy from functools import partial from collections.abc import Mapping @@ -56,16 +26,17 @@ np_ndarray, Opcode, SetOrdered, + pydantic_base_model_type, + PydanticBaseModel, ) from deepdiff.model import DeltaResult -logger = logging.getLogger(__name__) - try: - import jsonpickle -except ImportError: # pragma: no cover. Json pickle is getting deprecated. - jsonpickle = None # pragma: no cover. Json pickle is getting deprecated. + import orjson +except ImportError: # pragma: no cover. + orjson = None +logger = logging.getLogger(__name__) class UnsupportedFormatErr(TypeError): pass @@ -162,10 +133,11 @@ def to_json_pickle(self): :ref:`to_json_pickle_label` Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, running to_json() is the safer option that json pickle. """ - if jsonpickle: + try: + import jsonpickle copied = self.copy() return jsonpickle.encode(copied) - else: + except ImportError: # pragma: no cover. Json pickle is getting deprecated. logger.error('jsonpickle library needs to be installed in order to run to_json_pickle') # pragma: no cover. Json pickle is getting deprecated. @classmethod @@ -175,9 +147,10 @@ def from_json_pickle(cls, value): Load DeepDiff object with all the bells and whistles from the json pickle dump. Note that json pickle dump comes from to_json_pickle """ - if jsonpickle: + try: + import jsonpickle return jsonpickle.decode(value) - else: + except ImportError: # pragma: no cover. Json pickle is getting deprecated. logger.error('jsonpickle library needs to be installed in order to run from_json_pickle') # pragma: no cover. Json pickle is getting deprecated. def to_json(self, default_mapping: Optional[dict]=None, force_use_builtin_json=False, **kwargs): @@ -483,19 +456,27 @@ def load_path_content(path, file_type=None): """ Loads and deserializes the content of the path. """ + if file_type is None: file_type = path.split('.')[-1] if file_type == 'json': with open(path, 'r') as the_file: content = json_loads(the_file.read()) elif file_type in {'yaml', 'yml'}: - if yaml is None: # pragma: no cover. - raise ImportError('Pyyaml needs to be installed.') # pragma: no cover. + try: + import yaml + except ImportError: # pragma: no cover. + raise ImportError('Pyyaml needs to be installed.') from None # pragma: no cover. with open(path, 'r') as the_file: content = yaml.safe_load(the_file) elif file_type == 'toml': - if tomli is None: # pragma: no cover. - raise ImportError('On python<=3.10 tomli needs to be installed.') # pragma: no cover. + try: + if sys.version_info >= (3, 11): + import tomllib as tomli + else: + import tomli + except ImportError: # pragma: no cover. + raise ImportError('On python<=3.10 tomli needs to be installed.') from None # pragma: no cover. with open(path, 'rb') as the_file: content = tomli.load(the_file) elif file_type == 'pickle': @@ -503,11 +484,14 @@ def load_path_content(path, file_type=None): content = the_file.read() content = pickle_load(content) elif file_type in {'csv', 'tsv'}: - if clevercsv: # pragma: no cover. + try: + import clevercsv content = clevercsv.read_dicts(path) - else: + except ImportError: # pragma: no cover. + import csv with open(path, 'r') as the_file: content = list(csv.DictReader(the_file)) + logger.info(f"NOTE: CSV content was empty in {path}") # Everything in csv is string but we try to automatically convert any numbers we find @@ -554,22 +538,28 @@ def _save_content(content, path, file_type, keep_backup=True): content = json_dumps(content) the_file.write(content) elif file_type in {'yaml', 'yml'}: - if yaml is None: # pragma: no cover. - raise ImportError('Pyyaml needs to be installed.') # pragma: no cover. + try: + import yaml + except ImportError: # pragma: no cover. + raise ImportError('Pyyaml needs to be installed.') from None # pragma: no cover. with open(path, 'w') as the_file: content = yaml.safe_dump(content, stream=the_file) elif file_type == 'toml': - if tomli_w is None: # pragma: no cover. - raise ImportError('Tomli-w needs to be installed.') # pragma: no cover. + try: + import tomli_w + except ImportError: # pragma: no cover. + raise ImportError('Tomli-w needs to be installed.') from None # pragma: no cover. with open(path, 'wb') as the_file: content = tomli_w.dump(content, the_file) elif file_type == 'pickle': with open(path, 'wb') as the_file: content = pickle_dump(content, file_obj=the_file) elif file_type in {'csv', 'tsv'}: - if clevercsv: # pragma: no cover. + try: + import clevercsv dict_writer = clevercsv.DictWriter - else: + except ImportError: # pragma: no cover. + import csv dict_writer = csv.DictWriter with open(path, 'w', newline='') as csvfile: fieldnames = list(content[0].keys()) @@ -613,7 +603,7 @@ def _serialize_tuple(value): Mapping: dict, } -if PydanticBaseModel: +if PydanticBaseModel is not pydantic_base_model_type: JSON_CONVERTOR[PydanticBaseModel] = lambda x: x.dict() From 4733bc0259f288079d1eab7ba8a02f219f278ae4 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 27 Dec 2024 12:11:37 -0800 Subject: [PATCH 02/11] fixing workflow --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 4bbcd75..6a21503 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -44,7 +44,7 @@ jobs: ${{ runner.os }}-pip- ${{ runner.os }}- - name: Upgrade setuptools - if: matrix.python-version => 3.12 + if: matrix.python-version >= 3.12 run: | # workaround for 3.13, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 pip install --upgrade setuptools From 123e770b5d97a33c98fd5c8117c4a0c214c49a25 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 27 Dec 2024 12:13:41 -0800 Subject: [PATCH 03/11] upgrading to cachev4 for github actions --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 6a21503..8717ae9 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ jobs: architecture: ${{ matrix.architecture }} - name: Cache pip 3.8 if: matrix.python-version == 3.8 - uses: actions/cache@v2 + uses: actions/cache@v4 with: # This path is specific to Ubuntu path: ~/.cache/pip @@ -34,7 +34,7 @@ jobs: ${{ runner.os }}- - name: Cache pip if: matrix.python-version != 3.8 - uses: actions/cache@v2 + uses: actions/cache@v4 with: # This path is specific to Ubuntu path: ~/.cache/pip From 8f5f35d75a51e76a65fea83da9ab78f2f3e86200 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 09:57:12 -0800 Subject: [PATCH 04/11] updating version of orderly-set --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8270bf8..7fc4bb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -orderly-set>=5.2.3,<6 +orderly-set>=5.3.0,<6 From 2f2a4b827cf0858943650b3576d5fc5f1f5dbeac Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 16:51:42 -0800 Subject: [PATCH 05/11] moving requirements for docs to a separate file because they are not needed when runnin the tests --- requirements-cli.txt | 2 +- requirements-dev.txt | 19 ++++++++----------- requirements-dev3.8.txt | 4 ---- requirements-docs.txt | 3 +++ 4 files changed, 12 insertions(+), 16 deletions(-) create mode 100644 requirements-docs.txt diff --git a/requirements-cli.txt b/requirements-cli.txt index 5f1275e..3ed6361 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,2 +1,2 @@ -click==8.1.7 +click==8.1.8 pyyaml==6.0.2 diff --git a/requirements-dev.txt b/requirements-dev.txt index 9bde159..495ebc9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,24 +1,21 @@ -r requirements.txt -r requirements-cli.txt bump2version==1.0.1 -jsonpickle==4.0.0 -coverage==7.6.9 +jsonpickle==4.0.1 +coverage==7.6.10 ipdb==0.13.13 -numpy==2.1.3 +numpy==2.2.2 pytest==8.3.4 pytest-cov==6.0.0 python-dotenv==1.0.1 -Sphinx==6.2.1 # We use the html style that is not supported in Sphinx 7 anymore. -sphinx-sitemap==2.6.0 -sphinxemoji==0.3.1 flake8==7.1.1 python-dateutil==2.9.0.post0 -orjson==3.10.12 +orjson==3.10.15 wheel==0.45.1 tomli==2.2.1 -tomli-w==1.1.0 -pydantic==2.10.3 +tomli-w==1.2.0 +pydantic==2.10.6 pytest-benchmark==5.1.0 pandas==2.2.3 -polars==1.16.0 -setuptools==75.6.0 +polars==1.21.0 +setuptools==75.8.0 diff --git a/requirements-dev3.8.txt b/requirements-dev3.8.txt index b39b7fe..b4f8405 100644 --- a/requirements-dev3.8.txt +++ b/requirements-dev3.8.txt @@ -8,10 +8,6 @@ numpy>=1.24.4,<2.0.0 pytest==8.2.2 pytest-cov==5.0.0 python-dotenv==1.0.1 -watchdog>=4.0.1 -Sphinx==6.2.1 # We use the html style that is not supported in Sphinx 7 anymore. -sphinx-sitemap==2.6.0 -sphinxemoji==0.2.0 flake8==7.1.0 python-dateutil==2.9.0.post0 orjson==3.10.12 diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..9a03684 --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,3 @@ +Sphinx==6.2.1 # We use the html style that is not supported in Sphinx 7 anymore. +sphinx-sitemap==2.6.0 +sphinxemoji==0.3.1 From 6476effd48c8b9a156495426b807c23351fccf29 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 18:06:04 -0800 Subject: [PATCH 06/11] trying maturin to see if that fixes pyo3 problem --- .github/workflows/main.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8717ae9..ca9d06c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -48,6 +48,7 @@ jobs: run: | # workaround for 3.13, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 pip install --upgrade setuptools + pip install maturin - name: Install dependencies if: matrix.python-version > 3.9 run: pip install -r requirements-dev.txt From 7965b51df2c4b3bc5ebfa4e5a75b46008e8c176b Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 18:16:53 -0800 Subject: [PATCH 07/11] last try to use have py3.13 run on github actions --- .github/workflows/main.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index ca9d06c..4b8dd7e 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ jobs: architecture: ${{ matrix.architecture }} - name: Cache pip 3.8 if: matrix.python-version == 3.8 - uses: actions/cache@v4 + uses: actions/cache@v5 with: # This path is specific to Ubuntu path: ~/.cache/pip @@ -34,7 +34,9 @@ jobs: ${{ runner.os }}- - name: Cache pip if: matrix.python-version != 3.8 - uses: actions/cache@v4 + env: + PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1" + uses: actions/cache@v5 with: # This path is specific to Ubuntu path: ~/.cache/pip @@ -48,7 +50,6 @@ jobs: run: | # workaround for 3.13, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 pip install --upgrade setuptools - pip install maturin - name: Install dependencies if: matrix.python-version > 3.9 run: pip install -r requirements-dev.txt From 311ff096a8edf5ba81df33f9506d4ad614e377a9 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 18:18:02 -0800 Subject: [PATCH 08/11] github actions v4 --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 4b8dd7e..201fb0b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -23,7 +23,7 @@ jobs: architecture: ${{ matrix.architecture }} - name: Cache pip 3.8 if: matrix.python-version == 3.8 - uses: actions/cache@v5 + uses: actions/cache@v4 with: # This path is specific to Ubuntu path: ~/.cache/pip @@ -36,7 +36,7 @@ jobs: if: matrix.python-version != 3.8 env: PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1" - uses: actions/cache@v5 + uses: actions/cache@v4 with: # This path is specific to Ubuntu path: ~/.cache/pip From 83dcad712e7d5c22845586409ef37c3b9526652a Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 18:25:36 -0800 Subject: [PATCH 09/11] forget about python3.13 on github actions --- .github/workflows/main.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 201fb0b..345ee9f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] architecture: ["x64"] steps: - uses: actions/checkout@v2 @@ -48,7 +48,7 @@ jobs: - name: Upgrade setuptools if: matrix.python-version >= 3.12 run: | - # workaround for 3.13, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 + # workaround for 3.12, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177 pip install --upgrade setuptools - name: Install dependencies if: matrix.python-version > 3.9 @@ -57,23 +57,23 @@ jobs: if: matrix.python-version <= 3.9 run: pip install -r requirements-dev3.8.txt - name: Lint with flake8 - if: matrix.python-version == 3.13 + if: matrix.python-version == 3.12 run: | # stop the build if there are Python syntax errors or undefined names flake8 deepdiff --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 deepdiff --count --exit-zero --max-complexity=26 --max-line-lengt=250 --statistics - name: Test with pytest and get the coverage - if: matrix.python-version == 3.13 + if: matrix.python-version == 3.12 run: | pytest --benchmark-disable --cov-report=xml --cov=deepdiff tests/ --runslow - name: Test with pytest and no coverage report - if: matrix.python-version != 3.13 + if: matrix.python-version != 3.12 run: | pytest --benchmark-disable - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 - if: matrix.python-version == 3.13 + if: matrix.python-version == 3.12 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: From 000ec0b2dcf765a7be641bafed5fd5f23a57247a Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 23:31:26 -0800 Subject: [PATCH 10/11] handling timezone. We assume any timezone naive datetime is in UTC. --- deepdiff/diff.py | 18 +++++++++++++++--- deepdiff/helper.py | 19 ++++++++++++++++++- tests/test_diff_datetime.py | 31 ++++++++++++++++++++++++++++--- tests/test_diff_text.py | 19 ++++++++++--------- tests/test_hash.py | 13 ++++++++++++- 5 files changed, 83 insertions(+), 17 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 461cae7..76f186b 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -8,6 +8,7 @@ import difflib import logging import types +import datetime from enum import Enum from copy import deepcopy from math import isclose as is_close @@ -1487,7 +1488,15 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True): if t1_s != t2_s: self._report_result('values_changed', level, local_tree=local_tree) - def _diff_datetimes(self, level, local_tree=None): + def _diff_datetime(self, level, local_tree=None): + """Diff DateTimes""" + level.t1 = datetime_normalize(self.truncate_datetime, level.t1) + level.t2 = datetime_normalize(self.truncate_datetime, level.t2) + + if level.t1 != level.t2: + self._report_result('values_changed', level, local_tree=local_tree) + + def _diff_time(self, level, local_tree=None): """Diff DateTimes""" if self.truncate_datetime: level.t1 = datetime_normalize(self.truncate_datetime, level.t1) @@ -1670,8 +1679,11 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree= elif isinstance(level.t1, strings): self._diff_str(level, local_tree=local_tree) - elif isinstance(level.t1, datetimes): - self._diff_datetimes(level, local_tree=local_tree) + elif isinstance(level.t1, datetime.datetime): + self._diff_datetime(level, local_tree=local_tree) + + elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)): + self._diff_time(level, local_tree=local_tree) elif isinstance(level.t1, uuids): self._diff_uuids(level, local_tree=local_tree) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 36d4362..ff6d668 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -623,12 +623,29 @@ def datetime_normalize(truncate_datetime, obj): elif truncate_datetime == 'day': obj = obj.replace(hour=0, minute=0, second=0, microsecond=0) if isinstance(obj, datetime.datetime): - obj = obj.replace(tzinfo=datetime.timezone.utc) + if has_timezone(obj): + obj = obj.astimezone(datetime.timezone.utc) + else: + obj = obj.replace(tzinfo=datetime.timezone.utc) elif isinstance(obj, datetime.time): obj = time_to_seconds(obj) return obj +def has_timezone(dt): + """ + Function to check if a datetime object has a timezone + + Checking dt.tzinfo.utcoffset(dt) ensures that the datetime object is truly timezone-aware + because some datetime objects may have a tzinfo attribute that is not None but still + doesn't provide a valid offset. + + Certain tzinfo objects, such as pytz.timezone(None), can exist but do not provide meaningful UTC offset information. + If tzinfo is present but calling .utcoffset(dt) returns None, the datetime is not truly timezone-aware. + """ + return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None + + def get_truncate_datetime(truncate_datetime): """ Validates truncate_datetime value diff --git a/tests/test_diff_datetime.py b/tests/test_diff_datetime.py index 54555e6..8612f00 100644 --- a/tests/test_diff_datetime.py +++ b/tests/test_diff_datetime.py @@ -1,4 +1,5 @@ -from datetime import date, datetime, time +import pytz +from datetime import date, datetime, time, timezone from deepdiff import DeepDiff @@ -19,8 +20,8 @@ def test_datetime_diff(self): expected = { "values_changed": { "root['a']": { - "new_value": datetime(2023, 7, 5, 11, 11, 12), - "old_value": datetime(2023, 7, 5, 10, 11, 12), + "new_value": datetime(2023, 7, 5, 11, 11, 12, tzinfo=timezone.utc), + "old_value": datetime(2023, 7, 5, 10, 11, 12, tzinfo=timezone.utc), } } } @@ -73,3 +74,27 @@ def test_time_diff(self): } } assert res == expected + + def test_diffs_datetimes_different_timezones(self): + dt_utc = datetime(2025, 2, 3, 12, 0, 0, tzinfo=pytz.utc) # UTC timezone + # Convert it to another timezone (e.g., New York) + dt_ny = dt_utc.astimezone(pytz.timezone('America/New_York')) + assert dt_utc == dt_ny + diff = DeepDiff(dt_utc, dt_ny) + assert not diff + + t1 = [dt_utc, dt_ny] + t2 = [dt_ny, dt_utc] + assert not DeepDiff(t1, t2) + assert not DeepDiff(t1, t2, ignore_order=True) + + t2 = [dt_ny, dt_utc, dt_ny] + assert not DeepDiff(t1, t2, ignore_order=True) + + def test_datetime_within_array_with_timezone_diff(self): + d1 = [datetime(2020, 8, 31, 13, 14, 1)] + d2 = [datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc)] + + assert not DeepDiff(d1, d2) + assert not DeepDiff(d1, d2, ignore_order=True) + assert not DeepDiff(d1, d2, truncate_datetime='second') diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 63df30a..9b42604 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1446,7 +1446,8 @@ def test_ignore_type_in_groups_str_and_datetime(self): t1 = [1, 2, 3, 'a', now] t2 = [1, 2, 3, 'a', 'now'] ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[(str, bytes, datetime.datetime)]) - result = {'values_changed': {'root[4]': {'new_value': 'now', 'old_value': now}}} + now_utc = now.replace(tzinfo=datetime.timezone.utc) + result = {'values_changed': {'root[4]': {'new_value': 'now', 'old_value': now_utc}}} assert result == ddiff def test_ignore_type_in_groups_float_vs_decimal(self): @@ -2146,20 +2147,20 @@ def test_diffs_rrules(self): assert d == { "values_changed": { "root[0]": { - "new_value": datetime.datetime(2011, 12, 31, 0, 0), - "old_value": datetime.datetime(2014, 12, 31, 0, 0), + "new_value": datetime.datetime(2011, 12, 31, 0, 0, tzinfo=datetime.timezone.utc), + "old_value": datetime.datetime(2014, 12, 31, 0, 0, tzinfo=datetime.timezone.utc), }, "root[1]": { - "new_value": datetime.datetime(2012, 1, 31, 0, 0), - "old_value": datetime.datetime(2015, 1, 31, 0, 0), + "new_value": datetime.datetime(2012, 1, 31, 0, 0, tzinfo=datetime.timezone.utc), + "old_value": datetime.datetime(2015, 1, 31, 0, 0, tzinfo=datetime.timezone.utc), }, "root[2]": { - "new_value": datetime.datetime(2012, 3, 31, 0, 0), - "old_value": datetime.datetime(2015, 3, 31, 0, 0), + "new_value": datetime.datetime(2012, 3, 31, 0, 0, tzinfo=datetime.timezone.utc), + "old_value": datetime.datetime(2015, 3, 31, 0, 0, tzinfo=datetime.timezone.utc), }, "root[3]": { - "new_value": datetime.datetime(2012, 5, 31, 0, 0), - "old_value": datetime.datetime(2015, 5, 31, 0, 0), + "new_value": datetime.datetime(2012, 5, 31, 0, 0, tzinfo=datetime.timezone.utc), + "old_value": datetime.datetime(2015, 5, 31, 0, 0, tzinfo=datetime.timezone.utc), }, }, "iterable_item_removed": {"root[4]": datetime.datetime(2015, 7, 31, 0, 0)}, diff --git a/tests/test_hash.py b/tests/test_hash.py index 22a86e2..f5cdc56 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,9 +1,10 @@ #!/usr/bin/env python import re import pytest -from pathlib import Path +import pytz import logging import datetime +from pathlib import Path from collections import namedtuple from functools import partial from enum import Enum @@ -896,6 +897,16 @@ def test_list1(self): result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result + def test_datetime_hash(self): + dt_utc = datetime.datetime(2025, 2, 3, 12, 0, 0, tzinfo=pytz.utc) # UTC timezone + # Convert it to another timezone (e.g., New York) + dt_ny = dt_utc.astimezone(pytz.timezone('America/New_York')) + assert dt_utc == dt_ny + + result_utc = DeepHash(dt_utc, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + result_ny = DeepHash(dt_ny, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert result_utc[dt_utc] == result_ny[dt_ny] + def test_dict1(self): string1 = "a" key1 = "key1" From eed7669984f098af7a075fe27c6f6a4f309ca12f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 3 Feb 2025 23:55:27 -0800 Subject: [PATCH 11/11] updating the docs --- CHANGELOG.md | 4 +++ README.md | 56 ++++--------------------------------- docs/basics.rst | 13 +++++++++ docs/changelog.rst | 6 ++++ docs/faq.rst | 22 +++++++++++++++ tests/test_diff_datetime.py | 1 + 6 files changed, 52 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e091e07..2ecac9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # DeepDiff Change log +- v8-2-0 + - Small optimizations so we don't load functions that are not needed + - Updated the minimum version of Orderly-set + - Normalize all datetimes into UTC. Assume timezone naive datetimes are UTC. - v8-1-0 - Removing deprecated lines from setup.py diff --git a/README.md b/README.md index 69ed188..5872c91 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,12 @@ Tested on Python 3.8+ and PyPy3. Please check the [ChangeLog](CHANGELOG.md) file for the detailed information. +DeepDiff 8-2-0 + +- Small optimizations so we don't load functions that are not needed +- Updated the minimum version of Orderly-set +- Normalize all datetimes into UTC. Assume timezone naive datetimes are UTC. + DeepDiff 8-1-0 - Removing deprecated lines from setup.py @@ -40,56 +46,6 @@ DeepDiff 8-1-0 - Fixes accessing the affected_root_keys property on the diff object returned by DeepDiff fails when one of the dicts is empty - Fixes accessing the affected_root_keys property on the diff object returned by DeepDiff fails when one of the dicts is empty #508 -DeepDiff 8-0-1 - -- Bugfix. Numpy should be optional. - -DeepDiff 8-0-0 - -With the introduction of `threshold_to_diff_deeper`, the values returned are different than in previous versions of DeepDiff. You can still get the older values by setting `threshold_to_diff_deeper=0`. However to signify that enough has changed in this release that the users need to update the parameters passed to DeepDiff, we will be doing a major version update. - -- `use_enum_value=True` makes it so when diffing enum, we use the enum's value. It makes it so comparing an enum to a string or any other value is not reported as a type change. -- `threshold_to_diff_deeper=float` is a number between 0 and 1. When comparing dictionaries that have a small intersection of keys, we will report the dictionary as a `new_value` instead of reporting individual keys changed. If you set it to zero, you get the same results as DeepDiff 7.0.1 and earlier, which means this feature is disabled. The new default is 0.33 which means if less that one third of keys between dictionaries intersect, report it as a new object. -- Deprecated `ordered-set` and switched to `orderly-set`. The `ordered-set` package was not being maintained anymore and starting Python 3.6, there were better options for sets that ordered. I forked one of the new implementations, modified it, and published it as `orderly-set`. -- Added `use_log_scale:bool` and `log_scale_similarity_threshold:float`. They can be used to ignore small changes in numbers by comparing their differences in logarithmic space. This is different than ignoring the difference based on significant digits. -- json serialization of reversed lists. -- Fix for iterable moved items when `iterable_compare_func` is used. -- Pandas and Polars support. - -DeepDiff 7-0-1 - -- Fixes the translation between Difflib opcodes and Delta flat rows. - -DeepDiff 7-0-0 - -- DeepDiff 7 comes with an improved delta object. [Delta to flat dictionaries](https://zepworks.com/deepdiff/current/serialization.html#delta-serialize-to-flat-dictionaries) have undergone a major change. We have also introduced [Delta serialize to flat rows](https://zepworks.com/deepdiff/current/serialization.html#delta-serialize-to-flat-rows). -- Subtracting delta objects have dramatically improved at the cost of holding more metadata about the original objects. -- When `verbose=2`, and the "path" of an item has changed in a report between t1 and t2, we include it as `new_path`. -- `path(use_t2=True)` returns the correct path to t2 in any reported change in the [`tree view`](https://zepworks.com/deepdiff/current/view.html#tree-view) -- Python 3.7 support is dropped and Python 3.12 is officially supported. - - -DeepDiff 6-7-1 - -- Support for subtracting delta objects when iterable_compare_func is used. -- Better handling of force adding a delta to an object. -- Fix for [`Can't compare dicts with both single and double quotes in keys`](https://github.com/seperman/deepdiff/issues/430) -- Updated docs for Inconsistent Behavior with math_epsilon and ignore_order = True - -DeepDiff 6-7-0 - -- Delta can be subtracted from other objects now. -- verify_symmetry is deprecated. Use bidirectional instead. -- always_include_values flag in Delta can be enabled to include values in the delta for every change. -- Fix for Delta.__add__ breaks with esoteric dict keys. -- You can load a delta from the list of flat dictionaries. - -DeepDiff 6-6-1 - -- Fix for [DeepDiff raises decimal exception when using significant digits](https://github.com/seperman/deepdiff/issues/426) -- Introducing group_by_sort_key -- Adding group_by 2D. For example `group_by=['last_name', 'zip_code']` - ## Installation diff --git a/docs/basics.rst b/docs/basics.rst index b120303..df734a4 100644 --- a/docs/basics.rst +++ b/docs/basics.rst @@ -148,6 +148,19 @@ Object attribute added: 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} +Datetime + DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. + That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. + >>> from deepdiff import DeepDiff + >>> from datetime import datetime, timezone + >>> d1 = datetime(2020, 8, 31, 13, 14, 1) + >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) + >>> d1 == d2 + False + >>> DeepDiff(d1, d2) + {} + + .. note:: All the examples above use the default :ref:`text_view_label`. If you want traversing functionality in the results, use the :ref:`tree_view_label`. diff --git a/docs/changelog.rst b/docs/changelog.rst index 00f6185..efaf4cb 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,6 +6,12 @@ Changelog DeepDiff Changelog +- v8-2-0 + - Small optimizations so we don't load functions that are not needed + - Updated the minimum version of Orderly-set + - Normalize all datetimes into UTC. Assume timezone naive datetimes are UTC. + + - v8-1-0 - Removing deprecated lines from setup.py diff --git a/docs/faq.rst b/docs/faq.rst index 1c57f5a..ce97948 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -148,6 +148,28 @@ Or use the tree view so you can use path(output_format='list'): [4, 'b'] +Q: Why my datetimes are reported in UTC? + +**Answer** + +DeepDiff converts all datetimes into UTC. If a datetime is timezone naive, we assume it is in UTC too. +That is different than what Python does. Python assumes your timezone naive datetime is in your local timezone. + + >>> from deepdiff import DeepDiff + >>> from datetime import datetime, timezone + >>> d1 = datetime(2020, 8, 31, 13, 14, 1) + >>> d2 = datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc) + >>> d1 == d2 + False + >>> DeepDiff(d1, d2) + {} + + >>> d3 = d2.astimezone(pytz.timezone('America/New_York')) + >>> DeepDiff(d1, d3) + {} + >>> d1 == d3 + False + --------- .. admonition:: A message from `Sep `__, the creator of DeepDiff diff --git a/tests/test_diff_datetime.py b/tests/test_diff_datetime.py index 8612f00..6a8e786 100644 --- a/tests/test_diff_datetime.py +++ b/tests/test_diff_datetime.py @@ -95,6 +95,7 @@ def test_datetime_within_array_with_timezone_diff(self): d1 = [datetime(2020, 8, 31, 13, 14, 1)] d2 = [datetime(2020, 8, 31, 13, 14, 1, tzinfo=timezone.utc)] + assert d1 != d2, "Python doesn't think these are the same datetimes" assert not DeepDiff(d1, d2) assert not DeepDiff(d1, d2, ignore_order=True) assert not DeepDiff(d1, d2, truncate_datetime='second')