diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5020505..2efd4aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks -exclude: '^.subtrees/(jarowinkler|rapidfuzz)/' +exclude: '^.subtrees/rapidfuzz/' repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. diff --git a/.subtrees/jarowinkler/.github/FUNDING.yml b/.subtrees/jarowinkler/.github/FUNDING.yml deleted file mode 100644 index 1db8bdd..0000000 --- a/.subtrees/jarowinkler/.github/FUNDING.yml +++ /dev/null @@ -1 +0,0 @@ -github: maxbachmann diff --git a/.subtrees/jarowinkler/.github/workflows/branchbuild.yml b/.subtrees/jarowinkler/.github/workflows/branchbuild.yml deleted file mode 100644 index 22e24b7..0000000 --- a/.subtrees/jarowinkler/.github/workflows/branchbuild.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Build - -on: - push: - branches-ignore: - - main - -jobs: - tests: - name: "Python ${{ matrix.python-version }}" - runs-on: "ubuntu-latest" - strategy: - matrix: - python-version: ["3.6", "3.9", "3.10"] - os: [ubuntu-latest, windows-latest, macos-latest] - - steps: - - uses: "actions/checkout@v2" - with: - submodules: 'true' - - uses: "actions/setup-python@v2" - with: - python-version: "${{ matrix.python-version }}" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest hypothesis mypy - - - name: build - run: | - pip install build; python -m build --sdist - # test whether tarball contains all files required for compiling - pip install dist/jarowinkler-*.tar.gz - - - name: Test type stubs - run: | - python -m mypy.stubtest jarowinkler --ignore-missing-stub - - - name: Test with pytest - run: | - pytest tests diff --git a/.subtrees/jarowinkler/.github/workflows/releasebuild.yml b/.subtrees/jarowinkler/.github/workflows/releasebuild.yml deleted file mode 100644 index 77f1c44..0000000 --- a/.subtrees/jarowinkler/.github/workflows/releasebuild.yml +++ /dev/null @@ -1,223 +0,0 @@ -name: Build - -on: - push: - branches: - - main - release: - types: - - published - -jobs: - build_wheels_windows: - name: Build wheel on windows-latest/${{matrix.arch}}/${{matrix.python_tag}} - runs-on: windows-latest - strategy: - fail-fast: false - matrix: - arch: [auto32, auto64] - python_tag: ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "pp37-*", "pp38-*", "pp39-*"] - exclude: - # PyPy only supports x86_64 on Windows - - arch: auto32 - python_tag: "pp37-*" - - arch: auto32 - python_tag: "pp38-*" - - arch: auto32 - python_tag: "pp39-*" - - # PyPy Windows is currently broken in scikit-build - - arch: auto64 - python_tag: "pp37-*" - - arch: auto64 - python_tag: "pp38-*" - - arch: auto64 - python_tag: "pp39-*" - env: - CIBW_BUILD: ${{matrix.python_tag}} - CIBW_ARCHS: ${{matrix.arch}} - CIBW_TEST_REQUIRES: pytest hypothesis - CIBW_TEST_COMMAND: pytest {package}/tests - CIBW_BUILD_VERBOSITY: 3 - - steps: - - uses: actions/checkout@v2 - with: - submodules: 'true' - - - uses: actions/setup-python@v2 - - - name: Build wheels - uses: pypa/cibuildwheel@v2.7.0 - with: - output-dir: wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - path: ./wheelhouse/*.whl - - build_wheels_macos: - name: Build wheel on macos-latest/${{matrix.arch}}/${{matrix.python_tag}} - runs-on: macos-latest - strategy: - fail-fast: false - matrix: - arch: [x86_64, arm64, universal2] - python_tag: ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "pp37-*", "pp38-*", "pp39-*"] - exclude: - # MacOS Arm only supported since Python 3.8 - - arch: arm64 - python_tag: "cp36-*" - - arch: arm64 - python_tag: "cp37-*" - - arch: universal2 - python_tag: "cp36-*" - - arch: universal2 - python_tag: "cp37-*" - - # PyPy not supported on MacOS Arm - - arch: arm64 - python_tag: "pp37-*" - - arch: arm64 - python_tag: "pp38-*" - - arch: arm64 - python_tag: "pp39-*" - - arch: universal2 - python_tag: "pp37-*" - - arch: universal2 - python_tag: "pp38-*" - - arch: universal2 - python_tag: "pp39-*" - env: - CIBW_BUILD: ${{matrix.python_tag}} - CIBW_ARCHS: ${{matrix.arch}} - CIBW_TEST_SKIP: "*-macosx_{universal2,arm64}" - CIBW_TEST_REQUIRES: pytest hypothesis - CIBW_TEST_COMMAND: pytest {package}/tests - CIBW_BUILD_VERBOSITY: 3 - - steps: - - uses: actions/checkout@v2 - with: - submodules: 'true' - - - uses: actions/setup-python@v2 - - - name: Build wheels - uses: pypa/cibuildwheel@v2.7.0 - with: - output-dir: wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - path: ./wheelhouse/*.whl - - build_wheels_linux: - name: Build wheels on ubuntu-latest/${{matrix.arch}}/${{matrix.python_tag}} - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - arch: [auto, aarch64, ppc64le, s390x] - python_tag: [ "cp36-*", "cp37-*", "cp38-*", "cp39-*", "cp310-*", "pp37-*", "pp38-*", "pp39-*"] - exclude: - # PyPy builds not available for these platforms - - arch: ppc64le - python_tag: "pp37-*" - - arch: ppc64le - python_tag: "pp38-*" - - arch: ppc64le - python_tag: "pp39-*" - - arch: s390x - python_tag: "pp37-*" - - arch: s390x - python_tag: "pp38-*" - - arch: s390x - python_tag: "pp39-*" - env: - CIBW_ARCHS_LINUX: ${{matrix.arch}} - CIBW_BUILD: ${{matrix.python_tag}} - CIBW_TEST_SKIP: "{*_{aarch64,ppc64le,s390x},*musllinux_*}" - CIBW_TEST_REQUIRES: pytest hypothesis - CIBW_TEST_COMMAND: pytest {package}/tests - CIBW_BUILD_VERBOSITY: 3 - - steps: - - uses: actions/checkout@v2 - with: - submodules: 'true' - - - uses: actions/setup-python@v2 - - - uses: docker/setup-qemu-action@v1 - name: Set up QEMU - - - name: Build wheel - uses: pypa/cibuildwheel@v2.7.0 - with: - output-dir: wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - path: ./wheelhouse/*.whl - - build_sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - submodules: 'true' - - - uses: actions/setup-python@v2 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest hypothesis mypy rapidfuzz_capi Cython==3.0.0a11 - - # The cythonized files allow installation from the sdist without cython - - name: Generate cython - run: | - chmod +x ./src/jarowinkler/generate.sh - ./src/jarowinkler/generate.sh - - - name: Build sdist - run: | - git apply ./tools/sdist.patch - pip install build; python -m build --sdist - # test whether tarball contains all files required for compiling - pip install dist/jarowinkler-*.tar.gz - - - name: Test type stubs - run: | - python -m mypy.stubtest jarowinkler --ignore-missing-stub - - - name: Test with pytest - run: | - pytest tests - python -m pytest tests - - - uses: actions/upload-artifact@v2 - with: - path: dist/*.tar.gz - - deploy-wheels: - if: github.event_name == 'release' && github.event.action == 'published' - needs: [build_wheels_windows, build_wheels_macos, build_wheels_linux, build_sdist] - name: deploy wheels to pypi - runs-on: ubuntu-18.04 - - steps: - - uses: actions/download-artifact@v2 - with: - name: artifact - path: dist - - - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.pypi_password }} diff --git a/.subtrees/jarowinkler/.gitignore b/.subtrees/jarowinkler/.gitignore deleted file mode 100644 index 27d0b7a..0000000 --- a/.subtrees/jarowinkler/.gitignore +++ /dev/null @@ -1,30 +0,0 @@ -.vscode/ -__pycache__/ -.idea/ -build/ -_skbuild/ -*.egg-info/ -dist/ -*.data -*.so -*.o -*.out -test.py -src/*.html -.coverage -coverage.xml -.venv/ -.coveragerc - -# Sphinx documentation -site/ - -# benchmark results -bench_results/ - -# Hypothesis results -.hypothesis/ - -# Cython -*.cxx - diff --git a/.subtrees/jarowinkler/.gitmodules b/.subtrees/jarowinkler/.gitmodules deleted file mode 100644 index ae1cad3..0000000 --- a/.subtrees/jarowinkler/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "extern/jarowinkler-cpp"] - path = extern/jarowinkler-cpp - url = https://github.com/maxbachmann/jarowinkler-cpp.git diff --git a/.subtrees/jarowinkler/CHANGELOG.md b/.subtrees/jarowinkler/CHANGELOG.md deleted file mode 100644 index 9308c04..0000000 --- a/.subtrees/jarowinkler/CHANGELOG.md +++ /dev/null @@ -1,49 +0,0 @@ -## Changelog - -### [1.2.0] - 2022-07-19 -#### Changed -- added in-tree build backend to install cmake and ninja only when it is not installed yet - and only when wheels are available - -### [1.1.2] - 2022-07-11 -#### Fixed -- remove incorrect module import - -### [1.1.1] - 2022-07-09 -#### Fixed -- fix missing type stubs - -### [1.1.0] - 2022-07-04 -#### Changed -- change src layout to make package import from root directory possible -- added pure python fallback for all implementations with the following exceptions: - - no support for sequences of hashables. Only strings supported so far - -#### Fixed -- fixed type hints of jarowinkler_similarity - -### [1.0.5] - 2022-06-29 -#### Fixed -- treat hash for -1 and -2 as different - -### [1.0.4] - 2022-06-23 -#### Changed -- add fallback implementations of `jarowinkler-cpp` back to wheel, - since some package building systems like piwheels can't clone sources - -## [1.0.3] - 2022-06-11 -#### Added -- add wheels for PyPy3.9 -- added tests to sdist - -#### Changed -- Allow installation from system installed version of jarowinkler-cpp -- use system version of cmake on arm platforms, since the cmake package fails to compile - -## [1.0.2] - 2022-03-13 -#### Fixed -- only depend on cython when it is actually required - -## [1.0.1] - 2022-03-06 -#### Fixed -- type hints are now correctly packaged in the wheels diff --git a/.subtrees/jarowinkler/CMakeLists.txt b/.subtrees/jarowinkler/CMakeLists.txt deleted file mode 100644 index 709c81f..0000000 --- a/.subtrees/jarowinkler/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.12.0) - -cmake_policy(SET CMP0054 NEW) -set(SKBUILD_LINK_LIBRARIES_KEYWORD PRIVATE) - -set(THREADS_PREFER_PTHREAD_FLAG ON) -if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") -endif() - -project(jarowinkler LANGUAGES C CXX) - -find_package(PythonExtensions REQUIRED) -find_package(Python COMPONENTS Interpreter Development) -include(FetchContent) - -set(JW_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) - -find_package(jaro_winkler 1.0.2 QUIET) -if (jaro_winkler_FOUND) - message("Using system supplied version of jaro_winkler") -else() - message("Using packaged version of jaro_winkler") - add_subdirectory(extern/jarowinkler-cpp) -endif() - -add_subdirectory(src/jarowinkler) diff --git a/.subtrees/jarowinkler/LICENSE b/.subtrees/jarowinkler/LICENSE deleted file mode 100644 index 5b55a39..0000000 --- a/.subtrees/jarowinkler/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright © 2020 maxbachmann -Copyright © 2011 Adam Cohen - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/.subtrees/jarowinkler/MANIFEST.in b/.subtrees/jarowinkler/MANIFEST.in deleted file mode 100644 index 10c95b2..0000000 --- a/.subtrees/jarowinkler/MANIFEST.in +++ /dev/null @@ -1,16 +0,0 @@ -include MANIFEST.in -include setup.py -include CMakeLists.txt -include README.md -include LICENSE -include pyproject.toml -include _custom_build/backend.py -include src/jarowinkler/py.typed - -recursive-include src/jarowinkler CMakeLists.txt -recursive-include src/jarowinkler *.hpp *.pyx *.pxd *.cxx *.pyi -recursive-include tests * - -include extern/jarowinkler-cpp/LICENSE -include extern/jarowinkler-cpp/CMakeLists.txt -recursive-include extern/jarowinkler-cpp/jaro_winkler *.hpp diff --git a/.subtrees/jarowinkler/README.md b/.subtrees/jarowinkler/README.md deleted file mode 100644 index c4ffae1..0000000 --- a/.subtrees/jarowinkler/README.md +++ /dev/null @@ -1,117 +0,0 @@ - -
- -
- -## ⚙️ Installation - -You can install this library from [PyPI](https://pypi.org/project/jarowinkler/) with pip: -``` -pip install jarowinkler -``` -JaroWinkler provides binary wheels for all common platforms. - -### Source builds - -For a source build (for example from a SDist packaged) you only require a C++14 compatible compiler. You can install directly from GitHub if you would like. -``` -pip install git+https://github.com/maxbachmann/JaroWinkler.git@main -``` - -## 📖 Usage - -Any algorithms in JaroWinkler can not only be used with strings, but with any arbitary sequences of hashable objects: -```python -from jarowinkler import jarowinkler_similarity - - -jarowinkler_similarity("this is an example".split(), ["this", "is", "a", "example"]) -# 0.8666666666666667 -``` - -So as long as two objects have the same hash they are treated as similar. You can provide a `__hash__` method for your own object instances. - -```python -class MyObject: - def __init__(self, hash): - self.hash = hash - - def __hash__(self): - return self.hash - -jarowinkler_similarity([MyObject(1), MyObject(2)], [MyObject(1), MyObject(2), MyObject(3)]) -# 0.9111111111111111 -``` - -All algorithms provide a `score_cutoff` parameter. This parameter can be used to filter out bad matches. Internally this allows JaroWinkler to select faster implementations in some places: - -```python -jaro_similarity("Johnathan", "Jonathan", score_cutoff=0.9) -# 0.0 - -jaro_similarity("Johnathan", "Jonathan", score_cutoff=0.85) -# 0.8796296296296297 -``` - -JaroWinkler can be used with RapidFuzz, which provides multiple methods to compute string metrics on collections of inputs. JaroWinkler implements the RapidFuzz C-API which allows RapidFuzz to call the functions without any of the usual overhead of python, which makes this even faster. - -```python -from rapidfuzz import process - -process.cdist(["Johnathan", "Jonathan"], ["Johnathan", "Jonathan"], scorer=jarowinkler_similarity) -array([[1. , 0.9037037], - [0.9037037, 1. ]], dtype=float32) -``` - -## 👍 Contributing - -PRs are welcome! -- Found a bug? Report it in form of an [issue](https://github.com/maxbachmann/JaroWinkler/issues) or even better fix it! -- Can make something faster? Great! Just avoid external dependencies and remember that existing functionality should still work. -- Something else that do you think is good? Do it! Just make sure that CI passes and everything from the README is still applicable (interface, features, and so on). -- Have no time to code? Tell your friends and subscribers about JaroWinkler. More users, more contributions, more amazing features. - -Thank you :heart: - -## ⚠️ License -Copyright 2021 - present [maxbachmann](https://github.com/maxbachmann). `JaroWinkler` is free and open-source software licensed under the [MIT License](https://github.com/maxbachmann/JaroWinkler/blob/main/LICENSE). diff --git a/.subtrees/jarowinkler/_custom_build/backend.py b/.subtrees/jarowinkler/_custom_build/backend.py deleted file mode 100644 index 5cd1ba5..0000000 --- a/.subtrees/jarowinkler/_custom_build/backend.py +++ /dev/null @@ -1,90 +0,0 @@ -from setuptools import build_meta as _orig -from packaging import version as _version -from packaging.tags import sys_tags as _sys_tags -from skbuild.exceptions import SKBuildError as _SKBuildError -from skbuild.cmaker import get_cmake_version as _get_cmake_version -import subprocess as _subprocess -import platform as _platform - -prepare_metadata_for_build_wheel = _orig.prepare_metadata_for_build_wheel -build_wheel = _orig.build_wheel -build_sdist = _orig.build_sdist -get_requires_for_build_sdist = _orig.get_requires_for_build_sdist - -cmake_wheels = { - "win_amd64", - "win32", - "musllinux_1_1_x86_64", - "musllinux_1_1_s390x", - "musllinux_1_1_ppc64le", - "musllinux_1_1_i686", - "musllinux_1_1_aarch64", - "manylinux_2_17_s390x", - "manylinux_2_17_ppc64le", - "manylinux_2_17_aarch64", - "manylinux_2_17_x86_64", - "manylinux_2_17_i686", - "manylinux_2_5_x86_64", - "manylinux_2_5_i686", - "macosx_10_10_universal2", -} - -ninja_wheels = { - "win_amd64", - "win32.whl", - "musllinux_1_1_x86_64", - "musllinux_1_1_s390x", - "musllinux_1_1_ppc64le", - "musllinux_1_1_i686", - "musllinux_1_1_aarch64", - "manylinux_2_17_s390x", - "manylinux_2_17_ppc64le", - "manylinux_2_17_aarch64", - "manylinux_2_5_x86_64", - "manylinux_2_5_i686", - "macosx_10_9_universal2", -} - -def _cmake_required(): - try: - if _version.parse(_get_cmake_version()) >= _version.parse("3.12"): - print("Using System version of cmake") - return False - except _SKBuildError: - pass - - for tag in _sys_tags(): - if tag.platform in cmake_wheels: - return True - - print("No Cmake wheel available on platform") - return False - -def _ninja_required(): - if _platform.system() == "Windows": - print("Ninja is part of the MSVC installation on Windows") - return False - - for generator in ("ninja", "make"): - try: - _subprocess.check_output([generator, '--version']) - print(f"Using System version of {generator}") - return False - except (OSError, _subprocess.CalledProcessError): - pass - - for tag in _sys_tags(): - if tag.platform in ninja_wheels: - return True - - print("No Ninja wheel available on platform") - return False - -def get_requires_for_build_wheel(self, config_settings=None): - packages = [] - if _cmake_required(): - packages.append('cmake') - if _ninja_required(): - packages.append('ninja') - - return _orig.get_requires_for_build_wheel(config_settings) + packages diff --git a/.subtrees/jarowinkler/bench/benchmark_jaro_winkler.py b/.subtrees/jarowinkler/bench/benchmark_jaro_winkler.py deleted file mode 100644 index 56907d6..0000000 --- a/.subtrees/jarowinkler/bench/benchmark_jaro_winkler.py +++ /dev/null @@ -1,52 +0,0 @@ -# todo combine benchmarks of scorers into common code base -import timeit -import pandas -import numpy as np - -def benchmark(name, func, setup, lengths, count): - print(f"starting {name}") - start = timeit.default_timer() - results = [] - for length in lengths: - test = timeit.Timer(func, setup=setup.format(length, count)) - results.append(min(test.timeit(number=1) for _ in range(7)) / count) - stop = timeit.default_timer() - print(f"finished {name}, Runtime: ", stop - start) - return results - -setup =""" -from jarowinkler import JaroWinkler -import jellyfish -import Levenshtein -import string -import random -random.seed(18) -characters = string.ascii_letters + string.digits + string.whitespace + string.punctuation -a = ''.join(random.choice(characters) for _ in range({0})) -b_list = [''.join(random.choice(characters) for _ in range({0})) for _ in range({1})] -""" - -lengths = list(range(1,512,4)) -count = 4000 - -time_jarowinkler = benchmark("jarowinkler", - '[JaroWinkler.similarity(a, b) for b in b_list]', - setup, lengths, count) - -# this gets very slow, so only benchmark it for smaller values -time_jellyfish = benchmark("jellyfish", - '[jellyfish.jaro_winkler(a, b) for b in b_list]', - setup, list(range(1,128,4)), count) + [np.NaN] * 96 - -time_python_levenshtein = benchmark("python-Levenshtein", - '[Levenshtein.jaro_winkler(a, b) for b in b_list]', - setup, list(range(1,256,4)), count) + [np.NaN] * 64 - -df = pandas.DataFrame(data={ - "length": lengths, - "jarowinkler": time_jarowinkler, - "jellyfish": time_jellyfish, - "python-Levenshtein": time_python_levenshtein -}) - -df.to_csv("results/jaro_winkler.csv", sep=',',index=False) diff --git a/.subtrees/jarowinkler/bench/benchmark_visualize.py b/.subtrees/jarowinkler/bench/benchmark_visualize.py deleted file mode 100644 index 089d2e6..0000000 --- a/.subtrees/jarowinkler/bench/benchmark_visualize.py +++ /dev/null @@ -1,22 +0,0 @@ -import pandas as pd -import matplotlib.pyplot as plt - -df=pd.read_csv("results/jaro_winkler.csv") - -df *= 1000 * 1000 -df["length"] /= 1000 * 1000 - - -ax=df.plot(x="length") - -plt.xticks(list(range(0, 513, 64))) - -plt.title("Performance comparision of the \nJaro-Winkler similarity in different libraries") -plt.xlabel("string length [in characters]") -plt.ylabel("runtime [μs]") -ax.set_xlim(xmin=0) -ax.set_ylim(bottom=0) -plt.grid() -plt.show() - - diff --git a/.subtrees/jarowinkler/bench/results/JaroWinkler.svg b/.subtrees/jarowinkler/bench/results/JaroWinkler.svg deleted file mode 100644 index 1b840f2..0000000 --- a/.subtrees/jarowinkler/bench/results/JaroWinkler.svg +++ /dev/null @@ -1,1587 +0,0 @@ - - - diff --git a/.subtrees/jarowinkler/bench/results/jaro_winkler.csv b/.subtrees/jarowinkler/bench/results/jaro_winkler.csv deleted file mode 100644 index bb53364..0000000 --- a/.subtrees/jarowinkler/bench/results/jaro_winkler.csv +++ /dev/null @@ -1,129 +0,0 @@ -length,jarowinkler,jellyfish,python-Levenshtein -1,1.0242649997849184e-07,2.1970424999722126e-06,1.3773950001905178e-07 -5,1.604507500019281e-07,2.2241952499939545e-06,1.7559025002356065e-07 -9,1.723850000132643e-07,2.262624000024971e-06,2.1795374999555862e-07 -13,1.8368225002518557e-07,2.374377749958967e-06,2.877064999893264e-07 -17,1.924025000050733e-07,2.5461917500138043e-06,3.8818949997221353e-07 -21,2.0168775000684037e-07,2.744041250025475e-06,5.088235000130226e-07 -25,2.1474375000707368e-07,3.0330870000057074e-06,6.272479999438474e-07 -29,2.2143399999663415e-07,3.3364647500206955e-06,7.628200000340257e-07 -33,2.312827500077219e-07,3.6793797499967697e-06,9.097077499973238e-07 -37,2.425382499779971e-07,4.034564999983559e-06,1.1261084999887317e-06 -41,2.527402500049902e-07,4.3864649999818535e-06,1.257153750032103e-06 -45,2.6626899997950205e-07,4.817342999956509e-06,1.474185999995825e-06 -49,2.7837799999019806e-07,5.273319750017435e-06,1.6941224999982296e-06 -53,2.916282500109446e-07,5.771962249980334e-06,1.9098797499736977e-06 -57,3.0220974997519077e-07,6.267949749997115e-06,2.1186800000236873e-06 -61,3.139089999990574e-07,6.878165500040723e-06,2.3666472499712653e-06 -65,9.577017499964314e-07,7.4521482500244925e-06,2.59658500004889e-06 -69,1.02525724997804e-06,8.081028749984397e-06,2.834802999984731e-06 -73,1.0829539999974712e-06,8.788844749972213e-06,3.129796999985501e-06 -77,1.2351502499825528e-06,9.51282099998707e-06,3.3736514999986865e-06 -81,1.2876245000086329e-06,1.024160924998796e-05,3.654816749985912e-06 -85,1.3457082499996887e-06,1.1041964499952429e-05,3.954588499993861e-06 -89,1.4153912499921263e-06,1.1803611000004821e-05,4.2408792500054916e-06 -93,1.45458750000671e-06,1.2555690750048142e-05,4.554326999993919e-06 -97,1.5235607500017068e-06,1.3453278249983214e-05,4.865760500024408e-06 -101,1.5784222499917177e-06,1.4144099250017917e-05,5.180090250007652e-06 -105,1.6366962500171667e-06,1.5205634249980448e-05,5.503461500040885e-06 -109,1.71132099998772e-06,1.5863534250001975e-05,5.833295749994249e-06 -113,1.783151500006852e-06,1.6970079249972512e-05,6.203173500011871e-06 -117,1.8469757499985918e-06,1.781968174998383e-05,6.561579749984503e-06 -121,1.8918727500079059e-06,1.84527600000024e-05,6.9218770000247784e-06 -125,1.9274875000121483e-06,1.9624498999974093e-05,7.310718749977241e-06 -129,2.2159554999916508e-06,,7.5819754999884025e-06 -133,2.2954330000004573e-06,,7.969897750001564e-06 -137,2.3665685000082704e-06,,8.366741500026364e-06 -141,2.4165595000056327e-06,,8.765191750001123e-06 -145,2.4681579999992208e-06,,9.184628750006141e-06 -149,2.5228457500077185e-06,,9.602934249983264e-06 -153,2.5684847499860553e-06,,1.0019458500039491e-05 -157,2.6178302499886286e-06,,1.0451530250009e-05 -161,2.6563587499879304e-06,,1.0880893499972898e-05 -165,2.708121250009299e-06,,1.1332152499960557e-05 -169,2.758271999994122e-06,,1.1775580749997517e-05 -173,2.8185682500065923e-06,,1.2239040999986628e-05 -177,2.860736249999718e-06,,1.2703667250036687e-05 -181,2.9110177500228927e-06,,1.3161808250004014e-05 -185,2.9579790000013874e-06,,1.3631287750001775e-05 -189,3.0070002499940073e-06,,1.4119006749979234e-05 -193,3.2074334999947496e-06,,1.4624213249987861e-05 -197,3.290538500010598e-06,,1.5134493499999735e-05 -201,3.3515060000013364e-06,,1.5658886499977598e-05 -205,3.412287250000645e-06,,1.6156184999999822e-05 -209,3.459391749998986e-06,,1.668725200005383e-05 -213,3.50881224997579e-06,,1.7207881249987622e-05 -217,3.5567982500026573e-06,,1.7732209249970764e-05 -221,3.591963499985695e-06,,1.82662612500053e-05 -225,3.6219195000057878e-06,,1.88169255000048e-05 -229,3.662653750012623e-06,,1.9391362500016384e-05 -233,3.703763749996369e-06,,1.995370349999348e-05 -237,3.755599750007832e-06,,2.05113547499991e-05 -241,3.767430000010563e-06,,2.1101369500001966e-05 -245,3.8017152500060546e-06,,2.1704576999979962e-05 -249,3.850303250004572e-06,,2.2287643250024303e-05 -253,3.896969250007487e-06,,2.2908058750033435e-05 -257,4.140366999990874e-06,, -261,4.22011474998385e-06,, -265,4.2900737500133345e-06,, -269,4.329841250012123e-06,, -273,4.39095650000354e-06,, -277,4.449307999976781e-06,, -281,4.502229749988373e-06,, -285,4.560061000006499e-06,, -289,4.669512500015571e-06,, -293,4.663764999975229e-06,, -297,4.724741250015541e-06,, -301,4.7662102500112265e-06,, -305,4.83254349995832e-06,, -309,4.873275249963171e-06,, -313,4.932502999963617e-06,, -317,4.978358999949251e-06,, -321,5.282012499947086e-06,, -325,5.36726349997707e-06,, -329,5.430304250012341e-06,, -333,5.472211750031874e-06,, -337,5.5281757499869855e-06,, -341,5.582512750038404e-06,, -345,5.626182500009236e-06,, -349,5.66335949997665e-06,, -353,5.646911749977335e-06,, -357,5.685996249951586e-06,, -361,5.725557999994635e-06,, -365,5.744895250018089e-06,, -369,5.78226274996041e-06,, -373,5.792189250030333e-06,, -377,5.839759249965937e-06,, -381,5.896312250001756e-06,, -385,6.1217295000233206e-06,, -389,6.182883750000201e-06,, -393,6.2012852499719885e-06,, -397,6.2434312499703995e-06,, -401,6.280881999998655e-06,, -405,6.341878749992702e-06,, -409,6.389777749973291e-06,, -413,6.441894000033699e-06,, -417,6.478261499978544e-06,, -421,6.519561999994039e-06,, -425,6.581959500010726e-06,, -429,6.62881125003878e-06,, -433,6.693353749994912e-06,, -437,6.717007500014915e-06,, -441,6.778519750014311e-06,, -445,6.792850250008087e-06,, -449,7.10868499999151e-06,, -453,7.1913857499907865e-06,, -457,7.262821499978145e-06,, -461,7.324349999976221e-06,, -465,7.3540757499586105e-06,, -469,7.43951474998994e-06,, -473,7.4477272499962055e-06,, -477,7.495162249995246e-06,, -481,7.522579749945635e-06,, -485,7.55803875000538e-06,, -489,7.57794699995884e-06,, -493,7.66633949996276e-06,, -497,7.68676900003129e-06,, -501,7.716344000016307e-06,, -505,7.767418250011816e-06,, -509,7.779927999990831e-06,, diff --git a/.subtrees/jarowinkler/extern/jarowinkler-cpp b/.subtrees/jarowinkler/extern/jarowinkler-cpp deleted file mode 160000 index 655c259..0000000 --- a/.subtrees/jarowinkler/extern/jarowinkler-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 655c25926250f6a73a9380a862a603c70e016aec diff --git a/.subtrees/jarowinkler/pyproject.toml b/.subtrees/jarowinkler/pyproject.toml deleted file mode 100644 index 2ea8ca1..0000000 --- a/.subtrees/jarowinkler/pyproject.toml +++ /dev/null @@ -1,9 +0,0 @@ -[build-system] -requires = [ - "setuptools>=42", - "scikit-build>=0.13.0", - "Cython==3.0.0a11", - "rapidfuzz_capi==1.0.5" -] -build-backend = "backend" -backend-path = ["_custom_build"] diff --git a/.subtrees/jarowinkler/setup.py b/.subtrees/jarowinkler/setup.py deleted file mode 100644 index 45d2009..0000000 --- a/.subtrees/jarowinkler/setup.py +++ /dev/null @@ -1,77 +0,0 @@ -import os - -def show_message(*lines): - print("=" * 74) - for line in lines: - print(line) - print("=" * 74) - -with open('README.md', 'rt', encoding="utf8") as f: - readme = f.read() - -setup_args = { - "name": "jarowinkler", - "version": "1.2.0", - "url": "https://github.com/maxbachmann/JaroWinkler", - "author": "Max Bachmann", - "author_email": "pypi@maxbachmann.de", - "description": "library for fast approximate string matching using Jaro and Jaro-Winkler similarity", - "long_description": readme, - "long_description_content_type": "text/markdown", - - "license": "MIT", - "classifiers": [ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "License :: OSI Approved :: MIT License" - ], - - "packages": ["jarowinkler"], - "package_dir": {'': 'src'}, - "package_data": {"jarowinkler": ["*.pyi", "py.typed"]}, - "python_requires": ">=3.6" -} - -def run_setup(with_binary): - if with_binary: - from skbuild import setup - import rapidfuzz_capi - - setup( - **setup_args, - cmake_args=[ - f'-DRF_CAPI_PATH:STRING={rapidfuzz_capi.get_include()}' - ] - ) - else: - from setuptools import setup - setup(**setup_args) - -# when packaging only build wheels which include the C extension -packaging = "1" in { - os.environ.get("CIBUILDWHEEL", "0"), - os.environ.get("CONDA_BUILD", "0"), - os.environ.get("JAROWINKLER_BUILD_EXTENSION", "0") -} -if packaging: - run_setup(True) -else: - try: - run_setup(True) - except: - show_message( - "WARNING: The C extension could not be compiled, speedups" - " are not enabled.", - "Failure information, if any, is above.", - "Retrying the build without the C extension now.", - ) - run_setup(False) - show_message( - "WARNING: The C extension could not be compiled, speedups" - " are not enabled.", - "Plain-Python build succeeded.", - ) diff --git a/.subtrees/jarowinkler/src/jarowinkler/CMakeLists.txt b/.subtrees/jarowinkler/src/jarowinkler/CMakeLists.txt deleted file mode 100644 index 8caf71a..0000000 --- a/.subtrees/jarowinkler/src/jarowinkler/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -function(create_cython_target _name) - if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx) - set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE) - else() - find_package(Cython REQUIRED) - # should use target_include_directories once this is supported by scikit-build - include_directories(${JW_BASE_DIR}/jarowinkler) - add_cython_target(${_name} CXX) - set(${_name} ${_name} PARENT_SCOPE) - endif() -endfunction(create_cython_target) - -create_cython_target(_initialize_cpp) -add_library(_initialize_cpp MODULE ${_initialize_cpp}) -target_compile_features(_initialize_cpp PUBLIC cxx_std_14) -target_include_directories(_initialize_cpp PRIVATE ${RF_CAPI_PATH} ${JW_BASE_DIR}/jarowinkler) -target_link_libraries(_initialize_cpp PRIVATE jaro_winkler::jaro_winkler) -python_extension_module(_initialize_cpp) -install(TARGETS _initialize_cpp LIBRARY DESTINATION src/jarowinkler) diff --git a/.subtrees/jarowinkler/src/jarowinkler/__init__.py b/.subtrees/jarowinkler/src/jarowinkler/__init__.py deleted file mode 100644 index de157f6..0000000 --- a/.subtrees/jarowinkler/src/jarowinkler/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -__author__: str = "Max Bachmann" -__license__: str = "MIT" -__version__: str = "1.2.0" - -def _fallback_import(module: str, name: str): - import importlib - import os - - impl = os.environ.get("JAROWINKLER_IMPLEMENTATION") - - if impl == "cpp": - mod = importlib.import_module(module + "_cpp") - elif impl == "python": - mod = importlib.import_module(module + "_py") - else: - try: - mod = importlib.import_module(module + "_cpp") - except ModuleNotFoundError: - mod = importlib.import_module(module + "_py") - - func = getattr(mod, name) - if not func: - raise ImportError( - f"cannot import name '{name}' from '{mod.__name}' ({mod.__file__})" - ) - return func - -jaro_similarity = _fallback_import("jarowinkler._initialize", "jaro_similarity") -jarowinkler_similarity = _fallback_import("jarowinkler._initialize", "jarowinkler_similarity") \ No newline at end of file diff --git a/.subtrees/jarowinkler/src/jarowinkler/__init__.pyi b/.subtrees/jarowinkler/src/jarowinkler/__init__.pyi deleted file mode 100644 index a9a2ccf..0000000 --- a/.subtrees/jarowinkler/src/jarowinkler/__init__.pyi +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Callable, Hashable, Sequence, Optional, Union, TypeVar - -__author__: str -__license__: str -__version__: str - -_StringType = Sequence[Hashable] -_S1 = TypeVar("_S1") -_S2 = TypeVar("_S2") - -def jaro_similarity( - s1: _S1, s2: _S2, *, - processor: Optional[Callable[[Union[_S1, _S2]], _StringType]] = None, - score_cutoff: Optional[float] = 0) -> float: ... - -def jarowinkler_similarity( - s1: _S1, s2: _S2, *, - prefix_weight: float = 0.1, - processor: Optional[Callable[[Union[_S1, _S2]], _StringType]] = None, - score_cutoff: Optional[float] = 0) -> float: ... diff --git a/.subtrees/jarowinkler/src/jarowinkler/_initialize_cpp.pyx b/.subtrees/jarowinkler/src/jarowinkler/_initialize_cpp.pyx deleted file mode 100644 index 013ede5..0000000 --- a/.subtrees/jarowinkler/src/jarowinkler/_initialize_cpp.pyx +++ /dev/null @@ -1,165 +0,0 @@ -# distutils: language=c++ -# cython: language_level=3, binding=True, linetrace=True - -from array import array - -from rapidfuzz_capi cimport ( - RF_String, RF_Scorer, RF_Kwargs, RF_ScorerFunc, RF_Preprocess, RF_KwargsInit, - SCORER_STRUCT_VERSION, RF_Preprocessor, - RF_ScorerFlags, - RF_SCORER_FLAG_RESULT_F64, RF_SCORER_FLAG_SYMMETRIC -) -from common cimport RF_StringWrapper, conv_sequence - -from libcpp cimport bool -from libc.stdint cimport int64_t -from libc.stdlib cimport malloc, free -from cpython.pycapsule cimport PyCapsule_New, PyCapsule_IsValid, PyCapsule_GetPointer -from cython.operator cimport dereference - -cdef extern from "scorer.hpp": - double jaro_similarity_func( const RF_String&, const RF_String&, double) nogil except + - bool JaroSimilarityInit( RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False - double jaro_winkler_similarity_func(const RF_String &, const RF_String &, double, double) nogil except + - bool JaroWinklerSimilarityInit(RF_ScorerFunc *, const RF_Kwargs *, int64_t, const RF_String *) nogil except False - -cdef inline void preprocess_strings(s1, s2, processor, RF_StringWrapper* s1_proc, RF_StringWrapper* s2_proc) except *: - cdef RF_Preprocessor* preprocess_context = NULL - - if processor is None: - s1_proc[0] = RF_StringWrapper(conv_sequence(s1)) - s2_proc[0] = RF_StringWrapper(conv_sequence(s2)) - else: - processor_capsule = getattr(processor, '_RF_Preprocess', processor) - if PyCapsule_IsValid(processor_capsule, NULL): - preprocess_context =