From 7f833016154bc2cd22c73c3a39204c94c9290571 Mon Sep 17 00:00:00 2001 From: Hsiang-Fu Yu Date: Mon, 26 Apr 2021 20:42:01 +0000 Subject: [PATCH] Initial commit --- .github/ISSUE_TEMPLATE/bug_report.md | 44 + .github/ISSUE_TEMPLATE/feature_request.md | 17 + .github/PULL_REQUEST_TEMPLATE.md | 6 + .github/build_pypi_wheel.sh | 35 + .github/workflows/build_pypi.yml | 54 + .github/workflows/codeql.yml | 50 + .github/workflows/pytest.yml | 31 + .github/workflows/style_type_check.yml | 32 + .style_type.cfg | 7 + CODE_OF_CONDUCT.md | 4 + CONTRIBUTING.md | 89 + LICENSE | 201 + MANIFEST.in | 8 + NOTICE | 1 + README.md | 147 + THIRD-PARTY-LICENSES.txt | 123 + pecos/__init__.py | 100 + pecos/apps/__init__.py | 10 + pecos/apps/text2text/README.md | 88 + pecos/apps/text2text/__init__.py | 11 + pecos/apps/text2text/evaluate.py | 130 + pecos/apps/text2text/model.py | 488 + pecos/apps/text2text/predict.py | 194 + pecos/apps/text2text/train.py | 308 + pecos/core/__init__.py | 21 + pecos/core/base.py | 1300 + pecos/core/libpecos.cpp | 276 + pecos/core/third_party/nlohmann_json/json.hpp | 22875 ++++++++++++++++ .../robin_hood_hashing/robin_hood.h | 2665 ++ pecos/core/utils/clustering.hpp | 371 + pecos/core/utils/file_util.hpp | 129 + pecos/core/utils/matrix.hpp | 1038 + pecos/core/utils/parallel.hpp | 75 + pecos/core/utils/random.hpp | 47 + pecos/core/utils/scipy_loader.hpp | 436 + pecos/core/utils/tfidf.hpp | 1465 + pecos/core/xmc/inference.hpp | 1971 ++ pecos/core/xmc/linear_solver.hpp | 585 + pecos/utils/__init__.py | 10 + pecos/utils/cli.py | 59 + pecos/utils/cluster_util.py | 350 + pecos/utils/featurization/__init__.py | 10 + pecos/utils/featurization/text/README.md | 86 + pecos/utils/featurization/text/__init__.py | 10 + pecos/utils/featurization/text/preprocess.py | 377 + .../text/sentencepiece/__init__.py | 11 + .../featurization/text/sentencepiece/model.py | 355 + .../text/sentencepiece/predict.py | 71 + .../featurization/text/sentencepiece/train.py | 113 + pecos/utils/featurization/text/vectorizers.py | 814 + pecos/utils/logging_util.py | 37 + pecos/utils/parallel_util.py | 32 + pecos/utils/smat_util.py | 823 + pecos/utils/torch_util.py | 67 + pecos/xmc/__init__.py | 18 + pecos/xmc/base.py | 1685 ++ pecos/xmc/xlinear/README.md | 100 + pecos/xmc/xlinear/__init__.py | 11 + pecos/xmc/xlinear/evaluate.py | 63 + pecos/xmc/xlinear/model.py | 388 + pecos/xmc/xlinear/predict.py | 159 + pecos/xmc/xlinear/train.py | 328 + pecos/xmc/xtransformer/README.md | 59 + pecos/xmc/xtransformer/__init__.py | 12 + pecos/xmc/xtransformer/matcher.py | 1439 + pecos/xmc/xtransformer/model.py | 529 + pecos/xmc/xtransformer/module.py | 114 + pecos/xmc/xtransformer/network.py | 394 + pecos/xmc/xtransformer/predict.py | 181 + pecos/xmc/xtransformer/train.py | 567 + setup.cfg | 46 + setup.py | 174 + test/pecos/apps/text2text/test_text2text.py | 194 + test/pecos/test_pecos.py | 15 + .../text/sentencepiece/test_sentencepiece.py | 92 + .../featurization/text/test_preprocessor.py | 159 + .../featurization/text/test_vectorizer.py | 281 + test/pecos/utils/test_cluster_util.py | 74 + test/pecos/utils/test_smat_utils.py | 163 + test/pecos/utils/test_torch_util.py | 31 + test/pecos/utils/test_utils.py | 17 + test/pecos/xmc/test_xmc.py | 245 + test/pecos/xmc/xlinear/test_xlinear.py | 745 + test/pecos/xmc/xtransformer/test_model.py | 17 + .../xmc/xtransformer/test_xtransformer.py | 121 + test/tst-data/apps/text2text/query_text.txt | 20 + .../apps/text2text/query_to_keep_text.txt | 5 + .../sklearn_tfidf_true_pred_items.txt | 2 + .../sklearn_tfidf_true_pred_items_keep.txt | 2 + test/tst-data/apps/text2text/test.txt | 2 + test/tst-data/apps/text2text/train.txt | 8 + .../apps/text2text/true_pred_items.txt | 2 + test/tst-data/apps/text2text/truth_items.txt | 2 + .../utils/data/dummy_transformer_config.json | 36 + .../utils/data/dummy_transformer_model.bin | Bin 0 -> 15759 bytes .../utils/data/hashing_vectorizer_config.json | 7 + test/tst-data/utils/data/label_vocab.txt | 2 + .../utils/data/sentencepiece_train.src | 20 + .../utils/data/sentencepiece_train.tgt | 20 + .../data/sentencepiece_word_tokenized.src | 20 + .../data/sklearn_tfidf_vectorizer_config.json | 7 + .../utils/data/tfidf_vectorizer_config.json | 9 + .../tst-data/utils/data/train.hashing.tgt.npz | Bin 0 -> 1007 bytes .../utils/data/train.sklearn_tfidf.tgt.npz | Bin 0 -> 998 bytes test/tst-data/utils/data/train.src | 2 + test/tst-data/utils/data/train.tfidf.tgt.npz | Bin 0 -> 1241 bytes .../utils/data/train.transformer.tgt.npy | Bin 0 -> 192 bytes test/tst-data/utils/data/train_text.src | 2 + .../data/transformer_vectorizer_config.json | 10 + test/tst-data/utils/data/vocab.txt | 15 + test/tst-data/xmc/xlinear/L.pifa.npz | Bin 0 -> 1002 bytes test/tst-data/xmc/xlinear/P:nr_splits=2.npz | Bin 0 -> 956 bytes test/tst-data/xmc/xlinear/P:nr_splits=4.npz | Bin 0 -> 936 bytes test/tst-data/xmc/xlinear/X.npz | Bin 0 -> 1001 bytes test/tst-data/xmc/xlinear/Xt.npz | Bin 0 -> 1001 bytes test/tst-data/xmc/xlinear/Y.npz | Bin 0 -> 866 bytes test/tst-data/xmc/xlinear/Yt.npz | Bin 0 -> 866 bytes test/tst-data/xmc/xlinear/Yt_pred.npz | Bin 0 -> 1067 bytes .../xmc/xlinear/Yt_pred_with_tfn+man.npz | Bin 0 -> 948 bytes test/tst-data/xmc/xtransformer/clusters.npz | Bin 0 -> 1309 bytes .../xmc/xtransformer/dense_train_feat.npy | Bin 0 -> 248 bytes .../xmc/xtransformer/saved_model/C.npz | Bin 0 -> 1309 bytes .../saved_model/encoder/config.json | 37 + .../saved_model/encoder/pytorch_model.bin | Bin 0 -> 16473 bytes .../saved_model/text_encoder/config.json | 37 + .../text_encoder/pytorch_model.bin | Bin 0 -> 16473 bytes .../saved_model/text_tokenizer/vocab.txt | 15 + .../saved_model/tokenizer/vocab.txt | 15 + test/tst-data/xmc/xtransformer/train.txt | 3 + test/tst-data/xmc/xtransformer/train_feat.npz | Bin 0 -> 911 bytes .../tst-data/xmc/xtransformer/train_label.npz | Bin 0 -> 1337 bytes 131 files changed, 47378 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100755 .github/build_pypi_wheel.sh create mode 100644 .github/workflows/build_pypi.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/pytest.yml create mode 100644 .github/workflows/style_type_check.yml create mode 100644 .style_type.cfg create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 NOTICE create mode 100644 README.md create mode 100644 THIRD-PARTY-LICENSES.txt create mode 100644 pecos/__init__.py create mode 100644 pecos/apps/__init__.py create mode 100644 pecos/apps/text2text/README.md create mode 100644 pecos/apps/text2text/__init__.py create mode 100644 pecos/apps/text2text/evaluate.py create mode 100644 pecos/apps/text2text/model.py create mode 100644 pecos/apps/text2text/predict.py create mode 100644 pecos/apps/text2text/train.py create mode 100644 pecos/core/__init__.py create mode 100644 pecos/core/base.py create mode 100644 pecos/core/libpecos.cpp create mode 100644 pecos/core/third_party/nlohmann_json/json.hpp create mode 100644 pecos/core/third_party/robin_hood_hashing/robin_hood.h create mode 100644 pecos/core/utils/clustering.hpp create mode 100644 pecos/core/utils/file_util.hpp create mode 100644 pecos/core/utils/matrix.hpp create mode 100644 pecos/core/utils/parallel.hpp create mode 100644 pecos/core/utils/random.hpp create mode 100644 pecos/core/utils/scipy_loader.hpp create mode 100644 pecos/core/utils/tfidf.hpp create mode 100644 pecos/core/xmc/inference.hpp create mode 100644 pecos/core/xmc/linear_solver.hpp create mode 100644 pecos/utils/__init__.py create mode 100644 pecos/utils/cli.py create mode 100644 pecos/utils/cluster_util.py create mode 100644 pecos/utils/featurization/__init__.py create mode 100644 pecos/utils/featurization/text/README.md create mode 100644 pecos/utils/featurization/text/__init__.py create mode 100644 pecos/utils/featurization/text/preprocess.py create mode 100644 pecos/utils/featurization/text/sentencepiece/__init__.py create mode 100644 pecos/utils/featurization/text/sentencepiece/model.py create mode 100644 pecos/utils/featurization/text/sentencepiece/predict.py create mode 100644 pecos/utils/featurization/text/sentencepiece/train.py create mode 100644 pecos/utils/featurization/text/vectorizers.py create mode 100644 pecos/utils/logging_util.py create mode 100644 pecos/utils/parallel_util.py create mode 100644 pecos/utils/smat_util.py create mode 100644 pecos/utils/torch_util.py create mode 100644 pecos/xmc/__init__.py create mode 100644 pecos/xmc/base.py create mode 100644 pecos/xmc/xlinear/README.md create mode 100644 pecos/xmc/xlinear/__init__.py create mode 100644 pecos/xmc/xlinear/evaluate.py create mode 100644 pecos/xmc/xlinear/model.py create mode 100644 pecos/xmc/xlinear/predict.py create mode 100644 pecos/xmc/xlinear/train.py create mode 100644 pecos/xmc/xtransformer/README.md create mode 100644 pecos/xmc/xtransformer/__init__.py create mode 100644 pecos/xmc/xtransformer/matcher.py create mode 100644 pecos/xmc/xtransformer/model.py create mode 100644 pecos/xmc/xtransformer/module.py create mode 100644 pecos/xmc/xtransformer/network.py create mode 100644 pecos/xmc/xtransformer/predict.py create mode 100644 pecos/xmc/xtransformer/train.py create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test/pecos/apps/text2text/test_text2text.py create mode 100644 test/pecos/test_pecos.py create mode 100644 test/pecos/utils/featurization/text/sentencepiece/test_sentencepiece.py create mode 100644 test/pecos/utils/featurization/text/test_preprocessor.py create mode 100644 test/pecos/utils/featurization/text/test_vectorizer.py create mode 100644 test/pecos/utils/test_cluster_util.py create mode 100644 test/pecos/utils/test_smat_utils.py create mode 100644 test/pecos/utils/test_torch_util.py create mode 100644 test/pecos/utils/test_utils.py create mode 100644 test/pecos/xmc/test_xmc.py create mode 100644 test/pecos/xmc/xlinear/test_xlinear.py create mode 100644 test/pecos/xmc/xtransformer/test_model.py create mode 100644 test/pecos/xmc/xtransformer/test_xtransformer.py create mode 100644 test/tst-data/apps/text2text/query_text.txt create mode 100644 test/tst-data/apps/text2text/query_to_keep_text.txt create mode 100644 test/tst-data/apps/text2text/sklearn_tfidf_true_pred_items.txt create mode 100644 test/tst-data/apps/text2text/sklearn_tfidf_true_pred_items_keep.txt create mode 100644 test/tst-data/apps/text2text/test.txt create mode 100644 test/tst-data/apps/text2text/train.txt create mode 100644 test/tst-data/apps/text2text/true_pred_items.txt create mode 100644 test/tst-data/apps/text2text/truth_items.txt create mode 100644 test/tst-data/utils/data/dummy_transformer_config.json create mode 100644 test/tst-data/utils/data/dummy_transformer_model.bin create mode 100644 test/tst-data/utils/data/hashing_vectorizer_config.json create mode 100644 test/tst-data/utils/data/label_vocab.txt create mode 100644 test/tst-data/utils/data/sentencepiece_train.src create mode 100644 test/tst-data/utils/data/sentencepiece_train.tgt create mode 100644 test/tst-data/utils/data/sentencepiece_word_tokenized.src create mode 100644 test/tst-data/utils/data/sklearn_tfidf_vectorizer_config.json create mode 100644 test/tst-data/utils/data/tfidf_vectorizer_config.json create mode 100644 test/tst-data/utils/data/train.hashing.tgt.npz create mode 100644 test/tst-data/utils/data/train.sklearn_tfidf.tgt.npz create mode 100644 test/tst-data/utils/data/train.src create mode 100644 test/tst-data/utils/data/train.tfidf.tgt.npz create mode 100644 test/tst-data/utils/data/train.transformer.tgt.npy create mode 100644 test/tst-data/utils/data/train_text.src create mode 100644 test/tst-data/utils/data/transformer_vectorizer_config.json create mode 100644 test/tst-data/utils/data/vocab.txt create mode 100644 test/tst-data/xmc/xlinear/L.pifa.npz create mode 100644 test/tst-data/xmc/xlinear/P:nr_splits=2.npz create mode 100644 test/tst-data/xmc/xlinear/P:nr_splits=4.npz create mode 100644 test/tst-data/xmc/xlinear/X.npz create mode 100644 test/tst-data/xmc/xlinear/Xt.npz create mode 100644 test/tst-data/xmc/xlinear/Y.npz create mode 100644 test/tst-data/xmc/xlinear/Yt.npz create mode 100644 test/tst-data/xmc/xlinear/Yt_pred.npz create mode 100644 test/tst-data/xmc/xlinear/Yt_pred_with_tfn+man.npz create mode 100644 test/tst-data/xmc/xtransformer/clusters.npz create mode 100644 test/tst-data/xmc/xtransformer/dense_train_feat.npy create mode 100644 test/tst-data/xmc/xtransformer/saved_model/C.npz create mode 100644 test/tst-data/xmc/xtransformer/saved_model/encoder/config.json create mode 100644 test/tst-data/xmc/xtransformer/saved_model/encoder/pytorch_model.bin create mode 100644 test/tst-data/xmc/xtransformer/saved_model/text_encoder/config.json create mode 100644 test/tst-data/xmc/xtransformer/saved_model/text_encoder/pytorch_model.bin create mode 100644 test/tst-data/xmc/xtransformer/saved_model/text_tokenizer/vocab.txt create mode 100644 test/tst-data/xmc/xtransformer/saved_model/tokenizer/vocab.txt create mode 100644 test/tst-data/xmc/xtransformer/train.txt create mode 100644 test/tst-data/xmc/xtransformer/train_feat.npz create mode 100644 test/tst-data/xmc/xtransformer/train_label.npz diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..9c500921 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,44 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: 'bug' +assignees: '' + +--- +## Description +(A clear and concise description of what the bug is.) + +## How to Reproduce? +(If you developed your own code, please provide a short script that reproduces the error. For existing examples, please provide link.) + +### Steps to reproduce +(Please provide minimal example of code snippet that reproduces the error. For existing examples, please provide link.) + +```python +put code here +``` + +(Paste the commands you ran that produced the error.) + +1. +2. + +## What have you tried to solve it? + +1. +2. + +## Error message or code output +(Paste the complete error message, including stack trace, or the undesired output that the above snippet produces.) + +``` +put error or undesired output here +``` + +## Environment +- Operating system: +- Python version: +- PECOS version: + +(Add as much information about your environment as possible, e.g. dependencies versions.) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..366743f0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest an enhancement, feature request, or idea for this project +title: '' +labels: 'enhancement' +assignees: '' + +--- + +## Description +(A clear and concise description of what the feature is.) +- If the proposal is about a new model, provide description of what the model is. +- If the proposal is about an API, provide mock examples if possible. + +## References +- list reference and related literature +- list known implementations diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..632000da --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,6 @@ +*Issue #, if available:* + +*Description of changes:* + + +By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. \ No newline at end of file diff --git a/.github/build_pypi_wheel.sh b/.github/build_pypi_wheel.sh new file mode 100755 index 00000000..4fb0fb4b --- /dev/null +++ b/.github/build_pypi_wheel.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -e + +# Get pip +echo "Build wheel using Python version $PIP_VER..." +PIP=$(ls /opt/python/cp${PIP_VER//./}-cp*/bin/pip) +if [ -z $PIP ]; then + echo "No pip found for version $PIP_VER, exit" + exit 1 +fi +echo "pip: $($PIP --version)" + + +# Install dependencies +echo "Install dependencies..." +$PIP install setuptools wheel twine auditwheel +yum install -y openblas-devel + + +# Build wheel +PECOS_SOURCE=$DOCKER_MNT/ +WHEEL_OUTPUT_FOLDER=$DOCKER_MNT/$WHEEL_DIR + +$PIP wheel $PECOS_SOURCE --no-deps -w $WHEEL_OUTPUT_FOLDER +WHEEL_NAME=$(ls $WHEEL_OUTPUT_FOLDER) + +echo "Temporary wheel: $(ls $WHEEL_OUTPUT_FOLDER)" +auditwheel show $WHEEL_OUTPUT_FOLDER/$WHEEL_NAME + +echo "Auditing wheel to platform $PLAT..." +auditwheel repair $WHEEL_OUTPUT_FOLDER/$WHEEL_NAME -w $WHEEL_OUTPUT_FOLDER +rm $WHEEL_OUTPUT_FOLDER/$WHEEL_NAME + +echo "Audited wheel: $(ls $WHEEL_OUTPUT_FOLDER)" +auditwheel show $WHEEL_OUTPUT_FOLDER/$(ls $WHEEL_OUTPUT_FOLDER) \ No newline at end of file diff --git a/.github/workflows/build_pypi.yml b/.github/workflows/build_pypi.yml new file mode 100644 index 00000000..fb584e67 --- /dev/null +++ b/.github/workflows/build_pypi.yml @@ -0,0 +1,54 @@ +name: Pypi Build and Release + +on: [push, pull_request] + +jobs: + build: + name: Build PyPI Wheel + strategy: + max-parallel: 4 + fail-fast: false + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + build_platform: ["manylinux2014_x86_64"] + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + + - name: Build Wheel + env: + DOCKER_IMG: quay.io/pypa/${{ matrix.build_platform }} + DOCKER_MNT: pypi_build + WHEEL_DIR: wheel # wheel location + run: | + echo "Build wheel for PECOS using Python version ${{ matrix.python-version }}..." + docker run --rm -v $(pwd):/$DOCKER_MNT \ + --env PLAT=${{ matrix.build_platform }} --env PIP_VER=${{ matrix.python-version }} \ + --env DOCKER_MNT=$DOCKER_MNT --env WHEEL_DIR=$WHEEL_DIR \ + $DOCKER_IMG /bin/bash /$DOCKER_MNT/.github/build_pypi_wheel.sh + + - name: Check Version Tag + id: check-ver-tag + run: | + if [[ "${{ github.event_name }}" == "push" ]] && \ + [[ "${{ github.event.ref }}" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo ::set-output name=match::true + fi + + - name: Upload to PyPI + if: steps.check-ver-tag.outputs.match == 'true' # Only upload for tags v*.*.* + env: + TWINE_USERNAME: ${{ secrets. PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets. PYPI_PASSWORD }} + WHEEL_DIR: wheel # same as Build-Wheel/WHEEL_DIR + run: | + WHEEL=$(ls $(pwd)/$WHEEL_DIR) + if [ -z $WHEEL ]; then + echo "Wheel build not successful, exit" + exit 1 + fi + echo "Will upload $WHEEL to PyPI..." + pip install twine --upgrade + python3 -m twine upload $(pwd)/$WHEEL_DIR/$WHEEL --verbose \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..2e273978 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,50 @@ +name: "Code scanning - action" + +on: + push: + pull_request: + schedule: + - cron: '0 19 * * 0' + +jobs: + CodeQL-Build: + name: CodeQL + # CodeQL runs on ubuntu-latest + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + language: [ 'python','cpp' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 00000000..0e7bdefd --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,31 @@ +name: Python Unit Tests + +on: [push, pull_request] + +jobs: + build: + name: Python Unit Tests + strategy: + max-parallel: 4 + fail-fast: false + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + platform: [ubuntu-16.04, ubuntu-18.04, ubuntu-20.04] + + runs-on: ${{ matrix.platform }} + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install pip + python -m pip install --upgrade pip + pip install pytest pytest-coverage + pip install -v --editable . + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/style_type_check.yml b/.github/workflows/style_type_check.yml new file mode 100644 index 00000000..f8ad498c --- /dev/null +++ b/.github/workflows/style_type_check.yml @@ -0,0 +1,32 @@ +name: Style and type checks + +on: [push, pull_request] + +jobs: + build: + name: Style and Type Checks + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - name: Install dependencies + run: | + python -m pip install pip + python -m pip install --upgrade pip + - name: Style checks + run: | + pip install black + python -m black -v --check --config .style_type.cfg --line-length=100 --diff ./pecos + python -m black -v --check --config .style_type.cfg --line-length=100 --diff ./test + - name: Type checks + run: | + pip install mypy + python -m mypy -v --config-file .style_type.cfg -p pecos + python -m mypy -v --config-file .style_type.cfg `find ./test/ -type f -name "*.py"` + if: ${{ success() || failure() }} + # - name: Check license headers + # run: python .devtools/license check src test \ No newline at end of file diff --git a/.style_type.cfg b/.style_type.cfg new file mode 100644 index 00000000..5fbe0b45 --- /dev/null +++ b/.style_type.cfg @@ -0,0 +1,7 @@ +[isort] +line_length = 100 +multi_line_output = 3 +include_trailing_comma = true + +[mypy] +ignore_missing_imports = true \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..5b627cfa --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,4 @@ +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..e823b511 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,89 @@ +# Contributing Guidelines + +Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional +documentation, we greatly value feedback and contributions from our community. + +Please read through this document before submitting any issues or pull requests to ensure we have all the necessary +information to effectively respond to your bug report or contribution. + + +## Reporting Bugs/Feature Requests + +We welcome you to use the GitHub issue tracker to report bugs or suggest features. + +When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already +reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: + +* A reproducible test case or series of steps +* The version of our code being used +* Any modifications you've made relevant to the bug +* Anything unusual about your environment or deployment + + +## Contributing via Pull Requests +Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: + +1. You are working against the latest source on the `mainline` branch. +2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. +3. You open an issue to discuss any significant work - we would hate for your time to be wasted. If there is an existing issue your are interested in working on, you should comment on it so others don't start working on the same issue. + +To send us a pull request, please: + +1. Create a personal fork of the project on Github webpage. Clone the fork on your local machine. +2. Add the original repository as a remote called `upstream`. Your remote repo on Github is called `origin`. + ``` + git remote add upstream https://github.com/amzn/pecos.git + git remote -v # Display all remotes to double-check + ``` +3. If you created your fork a while ago, be sure to pull upstream changes into your local repository. + ``` + git checkout mainline + git pull -r upstream + ``` +4. Create a new branch from `mainline` to work on. +5. Implement your code on the new branch: + * Follow the code style of the project. + * Write or adapt tests as needed. + * Add or change the documentation as needed. +6. **Ensure local style/type checks and tests pass.** +7. Commit using clear messages. **Squash your commits into a single commit.** +8. Push your branch to **your fork** `origin` on Github. + ``` + git push --set-upstream origin + ``` +9. On GitHub webpage's pull request panel, open a pull request into **original repository** `upstream` from **your fork**. *(NOTE: this is the default option if one does not make changes when creating PR)* + * Carefully fill out PR template + * Click on "Draft PR" on drop-down menu to double-check by oneself + * When ready, click on “Ready for review” +10. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. +12. If you need to make changes to the pull request, simply modify on your branch by **amend** the commit, and then *force-push* to your `origin` branch. The pull request will automatically update its timeline: + ``` + # Do modification + git commit -a --amend + git push -f + ``` +11. Once the pull request is approved and merged, pull the changes from upstream to your local repo and delete your extra branch(es). + +GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and +[creating a pull request](https://help.github.com/articles/creating-a-pull-request/). + + +## Finding contributions to work on +Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'Call for Contribution' or 'good first issue' issues is a great place to start. + + +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. + + +## Security issue notifications +If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. + + +## Licensing + +See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. + +We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..0a6974bc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include LICENSE +include NOTICE +include CODE_OF_CONDUCT.md +include CONTRIBUTING.md +include THIRD-PARTY-LICENSES.txt + +recursive-include pecos/core *.hpp *.h *.cpp *.c +recursive-include test * \ No newline at end of file diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..546b777f --- /dev/null +++ b/NOTICE @@ -0,0 +1 @@ +Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. diff --git a/README.md b/README.md new file mode 100644 index 00000000..a00c1f3f --- /dev/null +++ b/README.md @@ -0,0 +1,147 @@ +# PECOS - Predictions for Enormous and Correlated Output Spaces + +[![PyPi Latest Release](https://img.shields.io/pypi/v/libpecos)](https://img.shields.io/pypi/v/libpecos) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) + +PECOS is a versatile and modular machine learning (ML) framework for fast learning and inference on problems with large output spaces, such as extreme multi-label ranking (XMR) and large-scale retrieval. +PECOS' design is intentionally agnostic to the specific nature of the inputs and outputs as it is envisioned to be a general-purpose framework for multiple distinct applications. + +Given an input, PECOS identifies a small set (10-100) of relevant outputs from amongst an extremely large (~100MM) candidate set and ranks these outputs in terms of relevance. + + +### Features + +#### Extreme Multi-label Ranking and Classification +* X-Linear ([`pecos.xmc.xlinear`](pecos/xmc/xlinear/README.md)): recursive linear models learning to traverse an input from the root of a hierarchical label tree to a few leaf node clusters, and return top-k relevant labels within the clusters as predictions. See more details in the [PECOS paper (Yu et al., 2020)](https://arxiv.org/pdf/2010.05878.pdf). + + fast real-time inference in C++ + + can handle 100MM output space + +* X-Transformer ([`pecos.xmc.xtransformer`](pecos/xmc/xtransformer/README.md)): a Transformer matcher learning to traverse an input from the root of a hierarchical label tree to a few leaf node clusters, and return top-k relevant labels within the clusters using a linear ranker as predictions. See technical details in [X-Transformer paper (Chang et al., 2020)](https://arxiv.org/pdf/1905.02331.pdf) and latest SOTA results in the [PECOS paper (Yu et al., 2020)](https://arxiv.org/pdf/2010.05878.pdf). + + easy to extend with many pre-trained Transformer models from [huggingface transformers](https://github.com/huggingface/transformers). + + one of the State-of-the-art in deep learning based XMC methods. + +* text2text application ([`pecos.apps.text2text`](pecos/apps/text2text/README.md)): an easy-to-use text classification pipeline (with X-Linear backend) that supports n-gram TFIDF vectorization, classification, and ensemble predictions. + + + +## Requirements and Installation + +* Python (>=3.6) +* Pip (>=19.3) + +See other dependencies in [`setup.py`](https://github.com/amzn/pecos/blob/mainline/setup.py#L135) +You should install PECOS in a [virtual environment](https://docs.python.org/3/library/venv.html). +If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). + +### Supporting Platforms +* Ubuntu 16.04, 18.04 and 20.04 +* Amazon Linux 2 + +### Installation from Wheel + + +PECOS can be installed using pip as follows: +```bash +pip3 install libpecos +``` + +### Installation from Source + +#### Prerequisite builder tools +* For Ubuntu (16.04, 18.04, 20.04): +``` bash +apt-get update && apt-get install -y build-essential git python3 python3-distutils python3-venv +``` +* For Amazon Linux 2: +``` bash +yum -y install python3 python3-devel python3-distutils python3-venv && yum -y install groupinstall 'Development Tools' +``` + +#### Install and develop locally +```bash +git clone https://github.com/amzn/pecos +cd pecos +pip3 install --editable ./ +``` + + +## Quick Tour +To have a glimpse of how PECOS works, here is a quick tour of using PECOS API for the XMR problem. + +### Toy Example +The eXtreme Multi-label Ranking (XMR) problem is defined by two matrices +* instance-to-feature matrix `X`, of shape `N by D` in [`SciPy CSR format`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html) +* instance-to-label matrix `Y`, of shape `N by L` in [`SciPy CSR format`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html) + +Some toy data matrices are available in the [`tst-data`](https://github.com/amzn/pecos/tree/mainline/test/tst-data/xmc/xlinear) folder. + +PECOS constructs a hierarchical label tree and learns linear models recursively (e.g., XR-Linear): +```python +>>> from pecos.xmc.xlinear.model import XLinearModel +>>> from pecos.xmc import Indexer, LabelEmbeddingFactory + +# Build hierarchical label tree and train a XR-Linear model +>>> label_feat = LabelEmbeddingFactory.create(Y, X) +>>> cluster_chain = Indexer.gen(label_feat) +>>> model = XLinearModel.train(X, Y, C=cluster_chain) +>>> model.save("./save-models") +``` + +After learning the model, we do prediction and evaluation +```python +>>> from pecos.utils import smat_util +>>> Yt_pred = model.predict(Xt) +# print precision and recall at k=10 +>>> print(smat_util.Metrics.generate(Yt, Yt_pred)) +``` + +PECOS also offers optimized C++ implementation for fast real-time inference +```python +>>> model = XLinearModel.load("./save-models", is_predict_only=True) +>>> for i in range(X_tst.shape[0]): +>>> y_tst_pred = model.predict(X_tst[i], threads=1) +``` + + +## Citation + +If you find PECOS useful, please consider citing our papers. + +* H. Yu, K. Zhong, I. Dhillon, [PECOS: Prediction for Enormous and Correlated Output Spaces](https://arxiv.org/pdf/2010.05878.pdf), Arxiv 2020. +```bibtex +@article{yu2020pecos, + title={PECOS: Prediction for Enormous and Correlated Output Spaces}, + author={Yu, Hsiang-Fu and Zhong, Kai and Dhillon, Inderjit S}, + journal={arXiv preprint arXiv:2010.05878}, + year={2020} +} +``` + +* W. Chang, H. Yu, K. Zhong, Y. Yang, I. Dhillon, [Taming pretrained transformers for extreme multi-label text classification](https://arxiv.org/pdf/1905.02331.pdf), KDD 2020. +```bibtex +@inproceedings{chang2020taming, + title={Taming pretrained transformers for extreme multi-label text classification}, + author={Chang, Wei-Cheng and Yu, Hsiang-Fu and Zhong, Kai and Yang, Yiming and Dhillon, Inderjit S}, + booktitle={Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining}, + pages={3163--3171}, + year={2020} +} +``` + +## License + +Copyright (2021) Amazon.com, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + diff --git a/THIRD-PARTY-LICENSES.txt b/THIRD-PARTY-LICENSES.txt new file mode 100644 index 00000000..b2970c39 --- /dev/null +++ b/THIRD-PARTY-LICENSES.txt @@ -0,0 +1,123 @@ +The Amazon PECOS Product includes the following third-party software/licensing: + +** exp-trmf-nips16; version 0.1.0 -- https://github.com/rofuyu/exp-trmf-nips16 +Copyright (c) 2015-2019 The TRMF Project. +All rights reserved. +** liblinear; version 2.43 -- https://github.com/cjlin1/liblinear +Copyright (c) 2007-2021 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------ + +** X-Transformer; version 0.1.0 -- +https://github.com/OctoberChang/X-Transformer +Copyright (c) 2020 The X-Transformer Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------ + +** Robin Hood Hashing; version 3.9.0 -- +https://github.com/martinus/robin-hood-hashing +Copyright (c) 2018-2019 Martin Ankerl + +MIT License + +Copyright (c) 2018-2019 Martin Ankerl + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------ + +** nlohmann-json; version 3.7.3 -- https://nlohmann.github.io/json/ +Copyright © 2013-2019 Niels Lohmann. The code is licensed under the MIT +License. + +MIT License + +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/pecos/__init__.py b/pecos/__init__.py new file mode 100644 index 00000000..69801d3f --- /dev/null +++ b/pecos/__init__.py @@ -0,0 +1,100 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +try: + from ._version import __version__ # noqa +except ImportError: + # For raw code without installing, use a dummy version + __version__ = "0.0.0" + +import dataclasses as dc +import copy + + +_class_mapping_ = {} + + +class MetaClass(type): + @staticmethod + def class_fullname(cls): + return f"{cls.__module__}###{cls.__qualname__}" + + def __new__(cls, name, bases, attr): + cls = super().__new__(cls, name, bases, attr) + _class_mapping_[MetaClass.class_fullname(cls)] = cls + return cls + + +class BaseClass(metaclass=MetaClass): + @classmethod + def class_fullname(cls): + return MetaClass.class_fullname(cls) + + @classmethod + def append_meta(cls, d: dict = None): + meta = {"__meta__": {"class_fullname": cls.class_fullname()}} + if d is not None: + meta.update(d) + return meta + + +@dc.dataclass +class BaseParams(BaseClass): + @classmethod + def from_dict(cls, param=None, recursive=False): + def get_param(x, type_hint=None): + if isinstance(x, BaseParams): + return copy.deepcopy(x) + elif isinstance(x, dict): + meta = x.get("__meta__", None) + if meta: + cls = _class_mapping_[meta["class_fullname"]] + x = cls.from_dict(x) + elif type_hint and issubclass(type_hint, BaseParams): + x = type_hint.from_dict(x) + elif isinstance(x, (list, tuple)): + x = [get_param(y, type_hint=type_hint) for y in x] + else: + x = copy.deepcopy(x) + return x + + if param is None: + return cls() + elif isinstance(param, cls): + return copy.deepcopy(param) + elif isinstance(param, dict): + d = {} + for f in dc.fields(cls): + if f.name not in param: + if recursive and issubclass(f.type, BaseParams): + d[f.name] = f.type.from_dict(param, recursive=recursive) + continue + d[f.name] = get_param(param[f.name], type_hint=f.type) + return cls(**d) + raise ValueError(f"{param} is not a valid parameter dictionary for {cls.name}") + + def to_dict(self, with_meta=True): + d = {} + for f in dc.fields(self): + value = getattr(self, f.name) + if isinstance(value, BaseParams): + d[f.name] = value.to_dict(with_meta) + elif isinstance(value, (tuple, list)): + d[f.name] = [ + x.to_dict(with_meta) if isinstance(x, BaseParams) else x for x in value + ] + elif isinstance(value, dict): + d[f.name] = { + k: v.to_dict(with_meta) if isinstance(v, BaseParams) else v + for k, v in value.items() + } + else: + d[f.name] = value + return self.append_meta(d) if with_meta else d diff --git a/pecos/apps/__init__.py b/pecos/apps/__init__.py new file mode 100644 index 00000000..695dd79a --- /dev/null +++ b/pecos/apps/__init__.py @@ -0,0 +1,10 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. diff --git a/pecos/apps/text2text/README.md b/pecos/apps/text2text/README.md new file mode 100644 index 00000000..a705a412 --- /dev/null +++ b/pecos/apps/text2text/README.md @@ -0,0 +1,88 @@ +# PECOS for text2text Applications + +Given an input text, pecos.apps.text2text can generate a subset of labels relevant to this input from a fixed set of output labels. +The input should be a text sequence, while the output labels can be either text-based or symbol-based (although the symbols are usually represented in text format). +In text classification, for example, the input text can be a paragraph and the relevant labels can be categories that are tagged to this paragraph. +Another example is retrieval, where the input text can be natural question and the relevant labels can be paragraphs that contain answer span to that natural question. + +## Getting started +### Usage +```bash + > python3 -m pecos.apps.text2text.train --help + > python3 -m pecos.apps.text2text.predict --help + > python3 -m pecos.apps.text2text.evaluate --help +``` + + +### Usage example: Multi-label Category Tagging for Web Documents +This toy example demonstrates how to run the training and prediction of PECOS text2text applications. +Note that we use utf-8 encoding for all text files. + +First, consider the following input text file `training-data.txt`: +``` +0,1,2Alan Turing is widely considered to be the father of theoretical computer science and artificial intelligence. +0,2,3Hinton was co-author of a highly cited paper published in 1986 that popularized the backpropagation algorithm for training multi-layer neural networks. +3,4,5Hinton received the 2018 Turing Award, together with Yoshua Bengio and Yann LeCun, for their work on artificial intelligence and deep learning. +3,4,5In 1989, Yann LeCun et al. applied the standard backpropagation algorithm on neural networks for hand digit recognition. +``` +Each line contain two fields, separated by ``, the former is relevant output label ids and the latter is the input text: +``` +OUTPUT_ID1,OUTPUT_ID2,OUTPUT_ID3,...INPUT_TEXT +``` + +The output ids are zero-based and correspond to the line numbers in the output label file. +In particular, the corresponding output label file `output-labels.txt` takes the format of: +``` +Artificial intelligence researchers +Computability theorists +British computer scientists +Machine learning researchers +Turing Award laureates +Deep Learning +``` +Each line in the file is a representation of an output label, categories in Wikipedia. + +(Optionally) we can provide the vectorization json file to finer control of n-gram TF-IDF features. +We will use the exemplar config `config.json` as in the README.md of pecos.utils.featurization.text/ + +Now, we training the text2text model, which include feature vectorization and learning PECOS model. +``` +python3 -m pecos.apps.text2text.train \ + --input-text-path ./training-data.txt \ + --vectorizer-config-path ./config.josn \ + --output-item-path ./output-labels.txt \ + --model-folder ./pecos-text2text-model +``` +The models are saved into the `./pecos-text2text-model`. + +For batch Predicting, user should give the input text file `test-data.txt`, which has the same format as `training-data.txt`: +``` +python3 -m pecos.apps.text2text.predict \ + --input-text-path ./test-data.txt \ + --model-folder ./pecos-text2text-model \ + --predicted-output-item-path ./test-prediction.txt +``` +The predictions are saved in the `./test-prediction.txt`. +Each line contains the generated output labels and score as a json-format dictionary for the corresponding input from the input file. + +For Online Predicting (interactive mode) +``` +python3 -m pecos.apps.text2text.predict --model-folder ./pecos-text2text-model +``` + +*** + +Copyright (2021) Amazon.com, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/pecos/apps/text2text/__init__.py b/pecos/apps/text2text/__init__.py new file mode 100644 index 00000000..d9ad4e0c --- /dev/null +++ b/pecos/apps/text2text/__init__.py @@ -0,0 +1,11 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +from .model import Text2Text # noqa diff --git a/pecos/apps/text2text/evaluate.py b/pecos/apps/text2text/evaluate.py new file mode 100644 index 00000000..a046514c --- /dev/null +++ b/pecos/apps/text2text/evaluate.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 -u +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. + +import argparse +import json + +import scipy.sparse as smat +from pecos.utils import smat_util + + +def parse_arguments(): + """Parse Text2Text model evaluation arguments""" + + parser = argparse.ArgumentParser( + description="Calculate precision and recall for text item outputs, where both ground truth target items and predicted items are text-based. " + ) + + parser.add_argument( + "-p", + "--pred-path", + type=str, + required=True, + metavar="PATH", + help="path to the file of predicted output. Format follows the output from pecos.apps.text2text.predict", + ) + + parser.add_argument( + "-y", + "--truth-path", + type=str, + required=True, + metavar="PATH", + help="path to the file of with ground truth output. \ + Format 1 (Only has ground truth text items): each line is a list of tab-separated sorted text items. \ + Format 2 (Same as training input format): each line is OUTPUT_ID1,OUTPUT_ID2,OUTPUT_ID3,...\t INPUT_TEXT. \ + where OUTPUT_IDs are the zero-based output text item indices corresponding to the line numbers of TEXT_ITEM_PATH.", + ) + + parser.add_argument( + "-q", + "--text-item-path", + type=str, + default=None, + metavar="TEXT_ITEM_PATH", + help="Text item file name. Format: each line corresponds to a text item. If this path is given, we assume TRUTH_PATH uses Format 2. Otherwise, TRUTH_PATH uses Format 1", + ) + + parser.add_argument("-k", "--topk", type=int, default=10, metavar="INT", help="evaluate @k") + + return parser + + +def do_evaluation(args): + """Evaluate predicted labels for Text2Text model + + Args: + args (argparse.Namespace): Command line arguments parsed by `parser.parse_args()` + """ + + # Initialize an item-to-index dictionary, item_dict + if args.text_item_path is None: + item_dict = {} + else: + item_dict = { + line.strip("\n"): i + for i, line in enumerate(open(args.text_item_path, "r", encoding="utf-8")) + } + + # parse the ground truth items + col_id_t = [] + row_id_t = [] + val_t = [] + num_samples_t = 0 + with open(args.truth_path, "r", encoding="utf-8") as fg: + for i, line in enumerate(fg): + num_samples_t = num_samples_t + 1 + # If text_item_path is not given, use Format 1 for truth_path: each line is a list of tab-separated sorted text items + if args.text_item_path is None: + for item in line.strip().split("\t"): + if item not in item_dict: + item_dict[item] = len(item_dict) + row_id_t.append(i) + col_id_t.append(item_dict[item]) + val_t.append(1.0) + # If text_item_path is given, use Format 2 for truth_path: each line is OUTPUT_ID1,OUTPUT_ID2,OUTPUT_ID3,...\t INPUT_TEXT + else: + for idx in line.strip().split("\t")[0].split(","): + row_id_t.append(i) + col_id_t.append(int(idx)) + val_t.append(1.0) + + # parse the predicted items + col_id_p = [] + row_id_p = [] + val_p = [] + num_samples_p = 0 + with open(args.pred_path, "r", encoding="utf-8") as fp: + for i, line in enumerate(fp): + num_samples_p = num_samples_p + 1 + item_scores = json.loads(line.strip())["data"] + for j, x in enumerate(item_scores): + item = x[0] + if item not in item_dict: + item_dict[item] = len(item_dict) + row_id_p.append(i) + col_id_p.append(item_dict[item]) + val_p.append(1.0 / (j + 1)) + + assert num_samples_t == num_samples_p + Y_true = smat.csr_matrix((val_t, (row_id_t, col_id_t)), shape=(num_samples_t, len(item_dict))) + Y_pred = smat.csr_matrix((val_p, (row_id_p, col_id_p)), shape=(num_samples_p, len(item_dict))) + + metric = smat_util.Metrics.generate(Y_true, Y_pred, args.topk) + print("==== evaluation results ====") + print(metric) + + +if __name__ == "__main__": + parser = parse_arguments() + args = parser.parse_args() + do_evaluation(args) diff --git a/pecos/apps/text2text/model.py b/pecos/apps/text2text/model.py new file mode 100644 index 00000000..e0fc0911 --- /dev/null +++ b/pecos/apps/text2text/model.py @@ -0,0 +1,488 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +import gc +import hashlib +import itertools +import json +import logging +import pathlib +import tempfile +from os import makedirs, path + +import numpy as np +from pecos.utils import smat_util +from pecos.utils.cluster_util import ClusterChain +from pecos.utils.featurization.text.preprocess import Preprocessor +from pecos.xmc import Indexer, LabelEmbeddingFactory +from pecos.xmc.xlinear import XLinearModel + +LOGGER = logging.getLogger(__name__) + + +class CachedWorkspace(object): + """Generate a folder name for caching intermediate variables""" + + def __init__(self, ws=None): + """Initialization + + Args: + ws (str, optional): Workspace folder + If not given, will use a temporary folder. + """ + if ws is None: + self.tmp_folder = tempfile.TemporaryDirectory() + ws = self.tmp_folder.name + self.ws = ws + + def get_path_for_name_and_kwargs(self, name, kwargs): + """Generate a hashed path in the workspace (self.ws) + + Args: + name (str): a basename (e.g., the intermediate variable's name) for this folder. + kwargs (dict): args that generates the variable + + Returns: + A str: a hashed path for the given name and kwargs + """ + folder = path.join(self.ws, name) + makedirs(folder, exist_ok=True) + # mapping.json is for debug purpose + mapping_path = path.join(folder, "mapping.json") + if path.exists(mapping_path): + with open(mapping_path, "r", encoding="utf-8") as f_map: + mapping = json.loads(f_map.read()) + else: + mapping = {} + + key = hashlib.sha224(json.dumps(kwargs, sort_keys=True).encode("utf-8")).hexdigest() + mapping[key] = kwargs + + with open(mapping_path, "w", encoding="utf-8") as fout: + fout.write(json.dumps(mapping, sort_keys=True, indent=2)) + return path.join(folder, key) + + +class Text2Text(object): + """Given an input text, generate a subset of items relevant to this input from a fixed set of output items. + + The input should be a text sequence, while the output items can be either text-based or symbol-based + (although the symbols are usually represented in text format). + """ + + def __init__(self, preprocessor=None, xlinear_models=None, output_items=None): + """Initialization + + Args: + preprocessor (Preprocessor): Text preprocessor. + xlinear_models (list): List of XLinear models. + output_items (list): List of output items name. + """ + self.preprocessor = preprocessor + self.xlinear_models = xlinear_models + self.output_items = output_items + + def save(self, model_folder): + """Save the Text2Text model + + Args: + model_folder (str): folder name to save + """ + + LOGGER.info("Saving the model...") + self.preprocessor.save(path.join(model_folder, "preprocessor")) + xlinear_folder = path.join(model_folder, "xlinear_ensemble") + ensemble_config = {"nr_ensembles": len(self.xlinear_models), "kwargs": []} + for i, (m, kwargs) in enumerate(self.xlinear_models): + ensemble_config["kwargs"] += [kwargs] + folder = path.join(xlinear_folder, "{}".format(i)) + m.save(folder) + with open(path.join(xlinear_folder, "config.json"), "w", encoding="utf-8") as fout: + fout.write(json.dumps(ensemble_config, indent=True)) + with open(path.join(model_folder, "output_items.json"), "w", encoding="utf-8") as fp: + json.dump(self.output_items, fp) + + @classmethod + def load(cls, model_folder, is_predict_only=False, **kwargs): + """Load the Text2Text model + + Args: + model_folder (str): folder name to load + is_predict_only (bool): if the loaded model will be used for prediction only in the batch mode. + + Returns: + A Text2Text object + """ + + preprocessor = Preprocessor.load(path.join(model_folder, "preprocessor")) + xlinear_folder = path.join(model_folder, "xlinear_ensemble") + with open(path.join(xlinear_folder, "config.json"), "r", encoding="utf-8") as fin: + ensemble_config = json.loads(fin.read()) + xlinear_models = [] + for i, model_kwargs in enumerate(ensemble_config["kwargs"]): + folder = path.join(xlinear_folder, "{}".format(i)) + xlinear_models += [(XLinearModel.load(folder, is_predict_only, **kwargs), model_kwargs)] + output_items = None + folder_path = pathlib.Path(model_folder) + json_output_items_filepath = folder_path / "output_items.json" + with open(str(json_output_items_filepath), "r", encoding="utf-8") as fin: + output_items = json.load(fin) + if not output_items: + raise ValueError("Could not read output items saved in json format") + + return cls(preprocessor, xlinear_models, output_items) + + @classmethod + def train( + cls, + input_text_path, + output_text_path, + vectorizer_config=None, + dtype=np.float32, + label_embed_type=["pifa"], + indexer_algo=["hierarchicalkmeans"], + imbalanced_ratio=0.0, + imbalanced_depth=100, + spherical=True, + nr_splits=2, + max_leaf_size=[100], + seed=[0], + max_iter=[20], + solver_type=["L2R_L2LOSS_SVC_DUAL"], + Cp=[1.0], + Cn=[1.0], + bias=1.0, + threshold=[0.1], + negative_sampling_scheme="tfn", + pred_kwargs=None, + threads=-1, + workspace_folder=None, + ): + """Train a Text2Text model + + Args: + + input_text_path (str): Text input file name. + Format: in each line, OUTPUT_ID1,OUTPUT_ID2,OUTPUT_ID3,...\t INPUT_TEXT + where OUTPUT_IDs are the zero-based output item indices + corresponding to the line numbers of OUTPUT_ITEM_PATH. + We assume utf-8 encoding for text. + output_text_path (str): The file path for output text items. + Format: each line corresponds to a representation + of the output item. We assume utf-8 encoding for text. + vectorizer_config_json (str): Json_format string for vectorizer config (default None) + dtype (float32 | float64): data type (default float32) + label_embed_type (list of str): Label embedding types. (default pifa). + Multiple values will lead to different individual models for ensembling. + indexer_algo (list of str): Indexer algorithm (default ["hierarchicalkmeans"]). + imbalanced_ratio (float): Value between 0.0 and 0.5 (inclusive). Indicates how relaxed the balancedness + constraint of 2_means can be. Specifically, if an iteration of 2_means is clustering L labels, + the size of the output 2 clusters will be within approx imbalanced_ratio * 2 * L of each other. + (default 0.0) + imbalanced_depth (int): After hierarchical 2_means clustering has reached this depth, + it will continue clustering as if imbalanced_ratio is set to 0.0. (default 100) + spherical (bool): Do l2_normalize cluster centers while clustering (default True). + nr_splits (int): number of splits used to construct hierarchy (a power of 2 is recommended, default 2) + max_leaf_size (list of int): The max size of the leaf nodes of hierarchical 2_means clustering. + Multiple values (separated by comma) are supported and will lead to different + individual models for ensembling. (default [100]) + seed (list of int): Random seeds (default [0]). Multiple values will lead to different individual + models for ensembling. + max_iter (int): The max iteration for indexing (default 20) + solver_type (list of string): solver type for ranking (default ["L2R_L2LOSS_SVC_DUAL"]) + Cp (float): Coefficient for positive class in the loss function (default 1.0) + Cn (float): Coefficient for negative class in the loss function (default 1.0) + bias (float): bias for the ranking model (default=1.0) + threshold (float): Threshold to sparsify the model weights (default 0.1) + negative_sampling (str, choices=[tfn, man, tfn+man]): Negative Sampling Schemes (default tfn) + pred_kwargs (dict): kwargs for prediction used in matching-aware training + only_topk (int): the default number of top labels used in the prediction + beam_size (int): the default size of beam search used in the prediction + post_processor (str): the default post processor used in the prediction + workspace_folder: (str, default=None): A folder name for storing intermediate + variables during training + + Returns: + A Text2Text object + """ + + ws = CachedWorkspace(workspace_folder) + + # Train Preprocessor and obtain X, Y + XY_kwargs = dict( + input_text_path=input_text_path, + output_text_path=output_text_path, + vectorizer_config=vectorizer_config, + dtype=str(dtype), + ) + + # Prepare Preprocessor + preprocessor_path = ws.get_path_for_name_and_kwargs("preprocessor", XY_kwargs) + if path.exists(preprocessor_path): + LOGGER.info("Loading existing preprocessor...") + preprocessor = Preprocessor.load(preprocessor_path) + else: + LOGGER.info("Parsing text files...") + Y, corpus = Preprocessor.load_data_from_file(input_text_path, output_text_path) + LOGGER.info( + f"Training {vectorizer_config['type']} vectorizer on {len(corpus)} input texts..." + ) + preprocessor = Preprocessor.train(corpus, vectorizer_config, dtype=dtype) + preprocessor.save(preprocessor_path) + + # Prepare X, X could be dense or sparse + X_path = ws.get_path_for_name_and_kwargs("X", XY_kwargs) + + if path.exists(X_path): + X = XLinearModel.load_feature_matrix(X_path) + else: + if "corpus" not in locals(): + Y, corpus = Preprocessor.load_data_from_file(input_text_path, output_text_path) + LOGGER.info(f"Vectorizing {len(corpus)} texts...") + X = preprocessor.predict(corpus) + XLinearModel.save_feature_matrix(X_path, X) + LOGGER.info( + f"{vectorizer_config['type']} input X loaded: {X.shape[0]} samples with {X.shape[1]} features." + ) + + # Prepare Y, Y is always sparse + Y_path = ws.get_path_for_name_and_kwargs("Y", XY_kwargs) + ".npz" + if path.exists(Y_path): + Y = smat_util.load_matrix(Y_path) + else: + if "Y" not in locals(): + Y, corpus = Preprocessor.load_data_from_file(input_text_path, output_text_path) + smat_util.save_matrix(Y_path, Y) + LOGGER.info(f"Output label Y loaded: {Y.shape[0]} samples with {Y.shape[1]} labels.") + + # Grid Parameters for XLinearModel + ranker_param_names = [ + "bias", + "Cp", + "Cn", + "solver_type", + "threshold", + "negative_sampling_scheme", + "pred_kwargs", + ] + + ranker_grid_params = {} + for name in ranker_param_names: + tmp = locals()[name] + ranker_grid_params[name] = tmp if isinstance(tmp, (list, tuple)) else [tmp] + + indexer_param_names = [ + "indexer_algo", + "imbalanced_ratio", + "imbalanced_depth", + "spherical", + "seed", + "max_iter", + "max_leaf_size", + "nr_splits", + "label_embed_type", + ] + + indexer_grid_params = {} + for name in indexer_param_names: + tmp = locals()[name] + indexer_grid_params[name] = tmp if isinstance(tmp, (list, tuple)) else [tmp] + + # Generate various label features + label_feat_set = {} + for embed_type in indexer_grid_params["label_embed_type"]: + label_embed_kwargs = dict( + input_text_path=input_text_path, + output_text_path=output_text_path, + dtype=str(dtype), + vectorizer_config=vectorizer_config, + embed_type=embed_type, + ) + label_embed_path = ws.get_path_for_name_and_kwargs("L", label_embed_kwargs) + if path.exists(label_embed_path): + LOGGER.info(f"Loading existing {embed_type} features for {Y.shape[1]} labels...") + label_feat_set[embed_type] = XLinearModel.load_feature_matrix(label_embed_path) + else: + LOGGER.info(f"Generating {embed_type} features for {Y.shape[1]} labels...") + # Create label features + label_feat_set[embed_type] = LabelEmbeddingFactory.create(Y, X, method=embed_type) + XLinearModel.save_feature_matrix(label_embed_path, label_feat_set[embed_type]) + + for indexer_values in itertools.product( + *[indexer_grid_params[k] for k in indexer_param_names] + ): + # Indexing + indexer_kwargs = dict(zip(indexer_param_names, indexer_values)) + indexer_kwargs_local = indexer_kwargs.copy() + C_path = ws.get_path_for_name_and_kwargs("C", indexer_kwargs_local) + if path.exists(C_path): + LOGGER.info(f"Loading existing clustering code with params {indexer_kwargs_local}") + C = ClusterChain.load(C_path) + else: + label_embed_type = indexer_kwargs.pop( + "label_embed_type", None + ) # as label_embed_type is not a valid argument for XLinearModel.train + LOGGER.info(f"Clustering with params {indexer_kwargs_local}...") + C = Indexer.gen( + label_feat_set[indexer_kwargs_local["label_embed_type"]], + indexer_kwargs.pop("indexer_algo"), + threads=threads, + **indexer_kwargs, + ) + LOGGER.info(f"Created {C[-1].shape[1]} clusters.") + C.save(C_path) + + # Ensemble Models + for ranker_values in itertools.product( + *[ranker_grid_params[k] for k in ranker_param_names] + ): + ranker_kwargs = dict(zip(ranker_param_names, ranker_values)) + ranker_kwargs_local = ranker_kwargs.copy() + # Model Training + ranker_kwargs_local.update(indexer_kwargs_local) + + model_path = ws.get_path_for_name_and_kwargs("model", ranker_kwargs_local) + if path.exists(model_path): + LOGGER.info(f"Model with params {ranker_kwargs_local} exists") + else: + LOGGER.info(f"Training model with params {ranker_kwargs_local}...") + m = XLinearModel.train( + X, + Y, + C, + threads=threads, + **ranker_kwargs, + ) + m.save(model_path) + del m + gc.collect() + + del C + gc.collect() + + del X, Y, label_feat_set + gc.collect() + + xlinear_models = [] + for indexer_values in itertools.product( + *[indexer_grid_params[k] for k in indexer_param_names] + ): + indexer_kwargs = dict(zip(indexer_param_names, indexer_values)) + indexer_kwargs_local = indexer_kwargs.copy() + for ranker_values in itertools.product( + *[ranker_grid_params[k] for k in ranker_param_names] + ): + ranker_kwargs = dict(zip(ranker_param_names, ranker_values)) + ranker_kwargs_local = ranker_kwargs.copy() + ranker_kwargs_local.update(indexer_kwargs_local) + model_path = ws.get_path_for_name_and_kwargs("model", ranker_kwargs_local) + xlinear_models += [(XLinearModel.load(model_path), ranker_kwargs_local)] + + # Load output items + with open(output_text_path, "r", encoding="utf-8") as f: + output_items = [q.strip() for q in f] + + return cls(preprocessor, xlinear_models, output_items) + + def predict( + self, corpus, topk=10, beam_size=None, post_processor=None, threshold=None, **kwargs + ): + """Predict labels for given inputs + + Args: + corpus (list of strings): input strings. + topk (int, optional): override the only topk specified in the model + Default None to disable overriding + beam_size (int, optional): override the beam size specified in the model + Default None to disable overriding + post_processor (str, optional): override the post_processor specified in the model + Default None to disable overriding + threshold (float, optional): Drop output items with scores less than this threshold among top-k items + Default None to not threshold + kwargs: + post_processor (str, optional): override the post_processor specified in the model + Default None to disable overriding + threads (int, optional): the number of threads to use for predicting. + Default to -1 to use all. + Returns: + csr_matrix: predicted label matrix (num_samples x num_labels) + """ + + X = self.preprocessor.predict(corpus) + Y_pred = smat_util.CsrEnsembler.average( + *[ + m.predict( + X, only_topk=topk, beam_size=beam_size, post_processor=post_processor, **kwargs + ) + for m, _ in self.xlinear_models + ] + ) + + if threshold is not None: + Y_pred.data[Y_pred.data <= threshold] = 0 + Y_pred.eliminate_zeros() + + return smat_util.sorted_csr(Y_pred, topk) + + def set_output_constraint(self, output_items_to_keep): + """Prune the tree + + Args: + output_items_to_keep (list of strings): A list of output items to be kept in the tree. The rest will be pruned. + """ + output_items = dict() + for i, item in enumerate(self.output_items): + output_items[item] = i + output_labels_to_keep = set() + for item in output_items_to_keep: + if item in output_items: + output_labels_to_keep.add(output_items[item]) + for xlm, _ in self.xlinear_models: + xlm.set_output_constraint(output_labels_to_keep) + + def get_output_item(self, output_id): + """Get output item given an output id + + Args: + output_id (int): output index + + Returns: + A string: the output item corresponds to the input index + """ + return self.output_items[output_id] + + def print_predictions(self, Y, fout, meta_info=None): + """Dump predicted items + + Args: + Y (csr_matrix): predicted label matrix + fout (a file object): The file (or stdout) to dump + meta_info (list of string, optional): meta_info to be incorporated into the output for each input. + Default None + + """ + if meta_info is not None: + assert Y.shape[0] == len(meta_info), "meta_info and Y should have the same sample size" + + for i in range(Y.shape[0]): + output_data = {} + output_data["schema"] = ["output_item", "score"] + item_score = [] + idx = slice(Y.indptr[i], Y.indptr[i + 1]) + for output_id, score in zip(Y.indices[idx], Y.data[idx]): + item_score.append( + [self.get_output_item(output_id), float(format(float(score), ".5g"))] + ) + output_data["data"] = item_score + if meta_info is not None: + output_data["meta_info"] = meta_info[i] + ss = json.dumps(output_data) + fout.write(ss + "\n") + fout.flush() diff --git a/pecos/apps/text2text/predict.py b/pecos/apps/text2text/predict.py new file mode 100644 index 00000000..dcb10d86 --- /dev/null +++ b/pecos/apps/text2text/predict.py @@ -0,0 +1,194 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +import argparse +import itertools +import sys + +from pecos.xmc import PostProcessor + +from .model import Text2Text + + +def parse_arguments(): + """Parse Text2Text model prediction arguments""" + + parser = argparse.ArgumentParser(description="Text2Text: online prediction or batch prediction") + + parser.add_argument( + "-m", + "--model-folder", + type=str, + required=True, + metavar="MODEL_FOLDER", + help="model folder name", + ) + + parser.add_argument( + "-i", + "--input-text-path", + type=str, + default="-", + metavar="INPUT_TEXT_PATH", + help='path to text input file name. (default "-" to denote stdin). We assume utf-8 encoding for text.', + ) + + parser.add_argument( + "-o", + "--predicted-output-item-path", + type=str, + default="-", + metavar="OUTPUT_PATH", + help='path to the predicted output item file name. (default "-" to denote stdout). We assume utf-8 encoding for text.', + ) + + parser.add_argument( + "--output-to-keep-path", + type=str, + default=None, + metavar="OUTPUT_TO_KEEP_PATH", + help="path to valid output texts file name. (default None to keep all output items). We assume utf-8 encoding for text.", + ) + + parser.add_argument( + "-k", + "--only-topk", + type=int, + default=20, + help="Output top-k items for each input (default 20)", + ) + + parser.add_argument( + "-b", + "--beam-size", + type=int, + default=None, + metavar="INT", + help="Override the beam size specified in the model (default None to disable overriding)", + ) + + parser.add_argument( + "-pp", + "--post-processor", + type=str, + choices=PostProcessor.valid_list(), + default=None, + metavar="STR", + help="Override the post processor specified in the model (default None to disable overriding)", + ) + + parser.add_argument( + "-B", + "--batch-size", + type=int, + default=2048, + help="Batch size for prediction (default 2048)", + ) + + parser.add_argument( + "-T", + "--threshold", + type=float, + default=None, + help="Drop output items with scores less than this threshold among top-k items (default None)", + ) + + parser.add_argument( + "--meta-info-path", + type=str, + default=None, + help="Path to the file of meta info for each line in input_text_path. (default None)", + ) + + return parser + + +def predict(args): + """Predict labels for given text inputs using Text2Text model + + Args: + args (argparse.Namespace): Command line arguments parsed by `parser.parse_args()` + """ + + t2t_model = Text2Text.load( + args.model_folder, is_predict_only=(args.output_to_keep_path is None) + ) + + if args.output_to_keep_path is not None: + outputs_to_keep = [] + with open(args.output_to_keep_path, "r", encoding="utf-8") as f: + for line in f: + outputs_to_keep += [line.strip()] + t2t_model.set_output_constraint(outputs_to_keep) + + if args.predicted_output_item_path != "-": + fout = open(args.predicted_output_item_path, "w", encoding="utf-8") + else: + fout = sys.stdout + + if args.input_text_path != "-": + if args.meta_info_path is not None: + fmeta = open(args.meta_info_path, "r", encoding="utf-8") + else: + fmeta = [] + + with open(args.input_text_path, "r", encoding="utf-8") as fin: + # Divide the test file into small batches to avoid out-of-memory issue. + corpus = [] + meta_info = [] + for input_line, meta_line in itertools.zip_longest(fin, fmeta, fillvalue=""): + corpus += [input_line.strip("\n").split("\t")[-1]] + meta_info += [meta_line.strip("\n")] + if len(corpus) == args.batch_size: + Y = t2t_model.predict( + corpus, + beam_size=args.beam_size, + topk=args.only_topk, + threshold=args.threshold, + ) + if args.meta_info_path is None: + t2t_model.print_predictions(Y, fout) + else: + t2t_model.print_predictions(Y, fout, meta_info=meta_info) + corpus = [] + meta_info = [] + + if len(corpus) > 0: + Y = t2t_model.predict( + corpus, + topk=args.only_topk, + beam_size=args.beam_size, + post_processor=args.post_processor, + threshold=args.threshold, + ) + if args.meta_info_path is None: + t2t_model.print_predictions(Y, fout) + else: + t2t_model.print_predictions(Y, fout, meta_info=meta_info) + else: + fin = sys.stdin + for line in fin: + Y = t2t_model.predict( + [line.strip()], + topk=args.only_topk, + beam_size=args.beam_size, + post_processor=args.post_processor, + threshold=args.threshold, + ) + t2t_model.print_predictions(Y, fout) + + fin.close() + fout.close() + + +if __name__ == "__main__": + parser = parse_arguments() + args = parser.parse_args() + predict(args) diff --git a/pecos/apps/text2text/train.py b/pecos/apps/text2text/train.py new file mode 100644 index 00000000..0619533d --- /dev/null +++ b/pecos/apps/text2text/train.py @@ -0,0 +1,308 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +import argparse +import logging +import sys + +import numpy as np +from pecos.core import XLINEAR_SOLVERS +from pecos.utils import cli, logging_util +from pecos.utils.featurization.text.vectorizers import Vectorizer +from pecos.xmc import Indexer, PostProcessor + +from .model import Text2Text + + +def parse_arguments(args): + """Parse Text2Text model training arguments""" + + parser = argparse.ArgumentParser( + description="Text2Text: Read input text training files, output item files and train a model" + ) + + parser.add_argument( + "-i", + "--input-text-path", + type=str, + required=True, + metavar="INPUT_TEXT_PATH", + help="Text input file name. Format: in each line, OUTPUT_ID1,OUTPUT_ID2,OUTPUT_ID3,...\t INPUT_TEXT \ + where OUTPUT_IDs are the zero-based output item indices corresponding to the line numbers of OUTPUT_ITEM_PATH. We assume utf-8 encoding for text.", + ) + + parser.add_argument( + "-q", + "--output-item-path", + type=str, + required=True, + metavar="OUTPUT_ITEM_PATH", + help="Output item file name. Format: each line corresponds to a representation of the output item. We assume utf-8 encoding for text.", + ) + + parser.add_argument( + "-m", + "--model-folder", + type=str, + required=True, + metavar="MODEL_FOLDER", + help="Output model folder name", + ) + + parser.add_argument( + "--workspace-folder", + type=str, + default=None, + metavar="WORKSPACE_FOLDER", + help="A folder name for storing intermediate variables during training", + ) + + vectorizer_config_group_parser = parser.add_mutually_exclusive_group() + vectorizer_config_group_parser.add_argument( + "--vectorizer-config-path", + type=str, + default=None, + metavar="VECTORIZER_CONFIG_PATH", + help="Json file for vectorizer config (default tfidf vectorizer)", + ) + + vectorizer_config_group_parser.add_argument( + "--vectorizer-config-json", + type=str, + default='{"type":"tfidf", "kwargs":{}}', + metavar="VECTORIZER_CONFIG_JSON", + help='Json-format string for vectorizer config (default {"type":"tfidf", "kwargs":{}})', + ) + + parser.add_argument( + "--dtype", + type=lambda x: np.float32 if "32" in x else np.float64, + default=np.float32, + help="data type for the csr matrix. float32 | float64. (default float32)", + ) + + parser.add_argument( + "--max-leaf-size", + type=cli.comma_separated_type(int), + default=[100], + metavar="INT-LIST", + help="The max size of the leaf nodes of hierarchical 2-means clustering. Multiple values (separated by comma) are supported and will lead to different individual models for ensembling. (default [100])", + ) + + parser.add_argument( + "--nr-splits", + type=int, + default=2, + metavar="INT", + help="number of splits used to construct hierarchy (a power of 2 is recommended, default 2)", + ) + + parser.add_argument( + "--imbalanced-ratio", + type=float, + default=0.0, + metavar="FLOAT", + help="Value between 0.0 and 0.5 (inclusive). Indicates how relaxed the balancedness constraint of 2-means can be. Specifically, if an iteration of 2-means is clustering L labels, the size of the output 2 clusters will be within approx imbalanced_ratio * 2 * L of each other. (default 0.0)", + ) + + parser.add_argument( + "--imbalanced-depth", + type=int, + default=100, + metavar="INT", + help="After hierarchical 2-means clustering has reached this depth, it will continue clustering as if --imbalanced-ratio is set to 0.0. (default 100)", + ) + + parser.add_argument( + "--label-embed-type", + type=cli.comma_separated_type(str), + default="pifa", + metavar="STR-LIST", + help="Label embedding types. (default pifa). Multiple values (separated by comma) are supported and will lead to different individual models for ensembling.", + ) + + parser.add_argument( + "--indexer", + choices=Indexer.indexer_dict.keys(), + default="hierarchicalkmeans", + metavar="STR", + help=f"Indexer algorithm (default hierarchicalkmeans). Available choices are {', '.join(Indexer.indexer_dict.keys())}", + ) + + parser.add_argument( + "--no-spherical", + action="store_true", + default=False, + help="Do not l2-normalize cluster centers while clustering", + ) + + parser.add_argument( + "--seed", + type=cli.comma_separated_type(int), + default=[0], + metavar="INT-LIST", + help="Random seeds (default 0). Multiple values (separated by comma) are supported and will lead to different individual models for ensembling.", + ) + + parser.add_argument( + "--max-iter", + type=int, + default=20, + metavar="INT", + help="The max iteration for indexing (default 20)", + ) + + parser.add_argument( + "-n", + "--threads", + type=int, + default=-1, + metavar="INT", + help="Number of threads to use (default -1 to denote all the CPUs)", + ) + + # Linear matching/ranking parameters + parser.add_argument( + "-s", + "--solver-type", + type=str, + default="L2R_L2LOSS_SVC_DUAL", + metavar="STR", + help="{} (default L2R_L2LOSS_SVC_DUAL)".format(" | ".join(XLINEAR_SOLVERS.keys())), + ) + + parser.add_argument( + "--Cp", + type=float, + default=1.0, + metavar="VAL", + help="Coefficient for positive class in the loss function (default 1.0)", + ) + + parser.add_argument( + "--Cn", + type=float, + default=1.0, + metavar="VAL", + help="Coefficient for negative class in the loss function (default 1.0)", + ) + + parser.add_argument( + "--bias", type=float, default=1.0, metavar="VAL", help="bias term (default 1.0)" + ) + + parser.add_argument( + "-ns", + "--negative-sampling", + type=str, + choices=["tfn", "man", "tfn+man"], + default="tfn", + metavar="STR", + help="Negative Sampling Schemes", + ) + + parser.add_argument( + "-t", + "--threshold", + type=float, + default=0.1, + metavar="VAL", + help="Threshold to sparsify the model weights (default 0.1)", + ) + + # Prediction kwargs + parser.add_argument( + "-k", + "--only-topk", + type=int, + default=20, + metavar="INT", + help="the default number of top labels used in the prediction", + ) + + parser.add_argument( + "-b", + "--beam-size", + type=int, + default=10, + metavar="INT", + help="the default size of beam search used in the prediction", + ) + + parser.add_argument( + "-pp", + "--post-processor", + type=str, + choices=PostProcessor.valid_list(), + default="l3-hinge", + metavar="STR", + help="the default post processor used in the prediction", + ) + + parser.add_argument( + "--verbose-level", + type=int, + choices=logging_util.log_levels.keys(), + default=1, + metavar="INT", + help=f"the verbose level, {', '.join([str(k) + ' for ' + logging.getLevelName(v) for k, v in logging_util.log_levels.items()])}, default 1", + ) + + parsed_args = parser.parse_args(args) + return parsed_args + + +def train(args): + """Train Text2Text model + + Args: + args (argparse.Namespace): Command line arguments parsed by `parser.parse_args()` + """ + + pred_kwargs = { + "beam_size": args.beam_size, + "only_topk": args.only_topk, + "post_processor": args.post_processor, + } + + vectorizer_config = Vectorizer.load_config_from_args(args) + + t2t_model = Text2Text.train( + args.input_text_path, + args.output_item_path, + label_embed_type=args.label_embed_type, + max_leaf_size=args.max_leaf_size, + nr_splits=args.nr_splits, + vectorizer_config=vectorizer_config, + dtype=args.dtype, + indexer_algo=[args.indexer], + imbalanced_ratio=args.imbalanced_ratio, + imbalanced_depth=args.imbalanced_depth, + spherical=not args.no_spherical, + seed=args.seed, + max_iter=args.max_iter, + threads=args.threads, + solver_type=args.solver_type, + Cp=args.Cp, + Cn=args.Cn, + bias=args.bias, + threshold=args.threshold, + negative_sampling_scheme=args.negative_sampling, + pred_kwargs=pred_kwargs, + workspace_folder=args.workspace_folder, + ) + + t2t_model.save(args.model_folder) + + +if __name__ == "__main__": + args = parse_arguments(sys.argv[1:]) + logging_util.setup_logging_config(level=args.verbose_level) + train(args) diff --git a/pecos/core/__init__.py b/pecos/core/__init__.py new file mode 100644 index 00000000..f47a8e4d --- /dev/null +++ b/pecos/core/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +from .base import ( # noqa + XLINEAR_SOLVERS, # noqa + XLINEAR_INFERENCE_MODEL_TYPES, # noqa + ScipyCscF32, # noqa + ScipyCsrF32, # noqa + ScipyDrmF32, # noqa + ScipyDcmF32, # noqa + ScipyCoordinateSparseAllocator, # noqa + ScipyCompressedSparseAllocator, # noqa + clib, # noqa +) # noqa diff --git a/pecos/core/base.py b/pecos/core/base.py new file mode 100644 index 00000000..f76e5db0 --- /dev/null +++ b/pecos/core/base.py @@ -0,0 +1,1300 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +import copy +import ctypes +import logging +import os +from ctypes import ( + CDLL, + CFUNCTYPE, + POINTER, + byref, + c_bool, + c_char_p, + c_double, + c_float, + c_int, + c_int32, + c_uint32, + c_uint64, + c_void_p, + cast, +) +from glob import glob +from subprocess import check_output + +import numpy as np +import pecos +import scipy.sparse as smat +from pecos.utils import smat_util + +LOGGER = logging.getLogger("__name__") + +XLINEAR_SOLVERS = {"L2R_L2LOSS_SVC_DUAL": 1, "L2R_L1LOSS_SVC_DUAL": 3, "L2R_LR_DUAL": 7} +XLINEAR_INFERENCE_MODEL_TYPES = {"CSC": 0, "HASH_CHUNKED": 1, "BINARY_SEARCH_CHUNKED": 2} +TFIDF_TOKENIZER_CODES = {"word": 10, "char": 20, "char_wb": 30} + + +class TfidfBaseVectorizerParam(ctypes.Structure): + """ + python class for handling struct TfidfBaseVectorizerParam in tfidf.hpp + """ + + _fields_ = [ + ("min_ngram", c_int32), + ("max_ngram", c_int32), + ("max_length", c_int32), + ("max_feature", c_int32), + ("min_df_ratio", c_float), + ("max_df_ratio", c_float), + ("min_df_cnt", c_int32), + ("max_df_cnt", c_int32), + ("binary", c_bool), + ("use_idf", c_bool), + ("smooth_idf", c_bool), + ("sublinear_tf", c_bool), + ("keep_frequent_feature", c_bool), + ("norm_p", c_int32), + ("tok_type", c_int32), + ] + + DEFAULTS = { + "min_ngram": 1, + "max_ngram": 1, + "max_length": -1, + "max_feature": 0, + "min_df_ratio": 0.0, + "max_df_ratio": 1.0, + "min_df_cnt": 0, + "max_df_cnt": -1, + "binary": False, + "use_idf": True, + "smooth_idf": True, + "sublinear_tf": False, + "keep_frequent_feature": True, + "norm_p": 2, + "tok_type": TFIDF_TOKENIZER_CODES["word"], + } + + @classmethod + def get_default(cls, name): + return copy.deepcopy(cls.DEFAULTS[name]) + + def __init__(self, config_dict=None): + if config_dict is None: + config_dict = {} + + def extract_dict_key(config_dict, key, alias): + return config_dict.get(key, config_dict.get(alias, self.get_default(key))) + + config_dict["norm_p"] = extract_dict_key(config_dict, "norm_p", "norm") + # to support norm_p being "l1" or "l2" + if isinstance(config_dict["norm_p"], str): + config_dict["norm_p"] = int(config_dict["norm_p"][1:]) + if not (config_dict["norm_p"] == 1 or config_dict["norm_p"] == 2): + raise NotImplementedError("norm_p only support 1 or 2") + + config_dict["tok_type"] = extract_dict_key(config_dict, "tok_type", "analyzer") + if isinstance(config_dict["tok_type"], str): + config_dict["tok_type"] = TFIDF_TOKENIZER_CODES[config_dict["tok_type"]] + + config_dict["max_length"] = extract_dict_key(config_dict, "max_length", "truncate_length") + + if "ngram_range" in config_dict: + config_dict["min_ngram"] = config_dict["ngram_range"][0] + config_dict["max_ngram"] = config_dict["ngram_range"][1] + + name2type = dict(TfidfBaseVectorizerParam._fields_) + for name in name2type: + setattr(self, name, name2type[name](config_dict.get(name, self.get_default(name)))) + + +class TfidfVectorizerParam(ctypes.Structure): + """ + python class for handling struct TfidfVectorizerParam in tfidf.hpp + """ + + _fields_ = [ + ("base_param_ptr", POINTER(TfidfBaseVectorizerParam)), + ("num_base_vect", c_int32), + ("norm_p", c_int32), + ] + + def __init__(self, base_vect_param_list, norm_p): + + self.num_base_vect = len(base_vect_param_list) + self.c_base_params = (TfidfBaseVectorizerParam * self.num_base_vect)() + for i, base_vect_param in enumerate(base_vect_param_list): + self.c_base_params[i] = base_vect_param + + self.base_param_ptr = cast(self.c_base_params, POINTER(TfidfBaseVectorizerParam)) + self.num_base_vect = c_int32(self.num_base_vect) + self.norm_p = c_int32(norm_p) + + +class ScipyCscF32(ctypes.Structure): + """ + PyMatrix for scipy.sparse.csc_matrix + """ + + _fields_ = [ + ("rows", c_uint32), + ("cols", c_uint32), + ("col_ptr", POINTER(c_uint64)), + ("row_idx", POINTER(c_uint32)), + ("val", POINTER(c_float)), + ] + + def __init__(self, A): + assert isinstance(A, smat.csc_matrix) + assert A.dtype == np.float32 + self.py_buf = { + "col_ptr": A.indptr.astype(np.uint64, copy=False), + "row_idx": A.indices.astype(np.uint32, copy=False), + "val": A.data.astype(np.float32, copy=False), + } + + self.rows = c_uint32(A.shape[0]) + self.cols = c_uint32(A.shape[1]) + name2type = dict(ScipyCscF32._fields_) + for name in self.py_buf: + setattr(self, name, self.py_buf[name].ctypes.data_as(name2type[name])) + self.buf = A + + @property + def dtype(self): + return self.buf.dtype + + @property + def shape(self): + return self.buf.shape + + @classmethod + def init_from(cls, A): + if A is None: + return None + elif isinstance(A, cls): + return A + else: + return cls(A) + + +class ScipyCsrF32(ctypes.Structure): + """ + PyMatrix for scipy.sparse.csr_matrix + """ + + _fields_ = [ + ("rows", c_uint32), + ("cols", c_uint32), + ("row_ptr", POINTER(c_uint64)), + ("col_idx", POINTER(c_uint32)), + ("val", POINTER(c_float)), + ] + + def __init__(self, A): + assert isinstance(A, smat.csr_matrix) + assert A.dtype == np.float32 + self.py_buf = { + "row_ptr": A.indptr.astype(np.uint64, copy=False), + "col_idx": A.indices.astype(np.uint32, copy=False), + "val": A.data.astype(np.float32, copy=False), + } + + self.rows = c_uint32(A.shape[0]) + self.cols = c_uint32(A.shape[1]) + name2type = dict(ScipyCsrF32._fields_) + for name in self.py_buf: + setattr(self, name, self.py_buf[name].ctypes.data_as(name2type[name])) + self.buf = A + + @classmethod + def init_from(cls, A): + if A is None: + return None + elif isinstance(A, cls): + return A + else: + return cls(A) + + @property + def dtype(self): + return self.buf.dtype + + @property + def shape(self): + return self.buf.shape + + def dot(self, other): + return self.buf.dot(other) + + +class ScipyDrmF32(ctypes.Structure): + """ + PyMatrix for row-major scipy.ndarray + """ + + _fields_ = [("rows", c_uint32), ("cols", c_uint32), ("val", POINTER(c_float))] + + def __init__(self, A): + assert isinstance(A, np.ndarray) + assert A.dtype == np.float32 + assert A.flags.c_contiguous is True + self.py_buf = {"val": A} + + self.rows = c_uint32(A.shape[0]) + self.cols = c_uint32(A.shape[1]) + name2type = dict(ScipyDrmF32._fields_) + for name in self.py_buf: + setattr(self, name, self.py_buf[name].ctypes.data_as(name2type[name])) + self.buf = A + + @classmethod + def init_from(cls, A): + if A is None: + return None + elif isinstance(A, cls): + return A + else: + return cls(A) + + @property + def dtype(self): + return self.buf.dtype + + @property + def shape(self): + return self.buf.shape + + def dot(self, other): + if isinstance(other, smat.spmatrix): + return other.T.dot(self.buf.T).T + else: + return self.buf.dot(other) + + +class ScipyDcmF32(ctypes.Structure): + """ + PyMatrix for col-major scipy.ndarray + """ + + _fields_ = [("rows", c_uint32), ("cols", c_uint32), ("val", POINTER(c_float))] + + def __init__(self, A): + assert isinstance(A, np.ndarray) + assert A.dtype == np.float32 + assert A.flags.f_contiguous is True + self.py_buf = {"val": A} + + self.rows = c_uint32(A.shape[0]) + self.cols = c_uint32(A.shape[1]) + name2type = dict(ScipyDcmF32._fields_) + for name in self.py_buf: + setattr(self, name, self.py_buf[name].ctypes.data_as(name2type[name])) + self.buf = A + + @classmethod + def init_from(cls, A): + if A is None: + return None + elif isinstance(A, cls): + return A + else: + return cls(A) + + @property + def dtype(self): + return self.buf.dtype + + @property + def shape(self): + return self.buf.shape + + def dot(self, other): + if isinstance(other, smat.spmatrix): + return other.T.dot(self.buf.T).T + else: + return self.buf.dot(other) + + +class ScipyCoordinateSparseAllocator(object): + """ + Scipy Coordinate Sparse Matrix Allocator for C++/C code + """ + + CFUNCTYPE = CFUNCTYPE(None, c_uint32, c_uint32, c_uint64, c_void_p, c_void_p, c_void_p) + + def __init__(self, rows=0, cols=0, dtype=np.float64): + self.rows = rows + self.cols = cols + self.row_idx = None + self.col_idx = None + self.data = None + self.dtype = dtype + assert dtype == np.float32 or dtype == np.float64 + + def __call__(self, rows, cols, nnz, row_ptr, col_ptr, val_ptr): + self.rows = rows + self.cols = cols + self.row_idx = np.zeros(nnz, dtype=np.uint64) + self.col_idx = np.zeros(nnz, dtype=np.uint64) + self.data = np.zeros(nnz, dtype=self.dtype) + cast(row_ptr, POINTER(c_uint64)).contents.value = self.row_idx.ctypes.data_as( + c_void_p + ).value + cast(col_ptr, POINTER(c_uint64)).contents.value = self.col_idx.ctypes.data_as( + c_void_p + ).value + cast(val_ptr, POINTER(c_uint64)).contents.value = self.data.ctypes.data_as(c_void_p).value + + def tocoo(self): + return smat.coo_matrix( + (self.data, (self.row_idx, self.col_idx)), shape=(self.rows, self.cols) + ) + + def tocsr(self): + return smat.csr_matrix( + (self.data, (self.row_idx, self.col_idx)), shape=(self.rows, self.cols) + ) + + def tocsc(self): + return smat.csc_matrix( + (self.data, (self.row_idx, self.col_idx)), shape=(self.rows, self.cols) + ) + + @property + def cfunc(self): + return self.CFUNCTYPE(self) + + +class ScipyCompressedSparseAllocator(object): + """ + Scipy Compressed Sparse Matrix Allocator for C++/C code, + which supports both smat.csr_matrix and smat.csc_matrix. + + Whether it is row or column major is controlled by self.is_col_major, + which is passed in by the first argument in the __call__(). + + Attributes: + CFUNCTYPE (ctypes.CFUNCTYPE): a function prototype creates functions that uses the standard C calling convention + """ + + CFUNCTYPE = CFUNCTYPE(None, c_bool, c_uint64, c_uint64, c_uint64, c_void_p, c_void_p, c_void_p) + + def __init__(self, rows=0, cols=0, dtype=np.float32): + self.cols = cols + self.rows = rows + self.indices = None + self.indptr = None + self.data = None + self.dtype = dtype + self.is_col_major = None + assert dtype == np.float32 + + def __call__(self, is_col_major, rows, cols, nnz, indices_ptr, indptr_ptr, data_ptr): + """ + Allocate memory for the members + + Parameters: + is_col_major (bool): specifying whether the to-be allocated matrix is row-majored or col-majored. + rows (int): the number of rows of the sparse matrix. + cols (int): the number of cols of the sparse matrix. + nnz (int): the number of non-zeros of the sparse matrix. + indptr_ptr (pointer): the pointer to the nnz array, of length (rows+1) or (cols+1). + indices_ptr (pointer): the pointer to the row/col indices array, of length nnz. + data_ptr (pointer): the pointer to the non-zero values array, of length nnz. + + Returns: + None + """ + + self.cols = cols + self.rows = rows + self.is_col_major = is_col_major + if is_col_major: + self.indptr = np.zeros(cols + 1, dtype=np.uint64) + else: + self.indptr = np.zeros(rows + 1, dtype=np.uint64) + self.indices = np.zeros(nnz, dtype=np.uint32) + self.data = np.zeros(nnz, dtype=self.dtype) + + cast(indices_ptr, POINTER(c_uint64)).contents.value = self.indices.ctypes.data_as( + c_void_p + ).value + cast(indptr_ptr, POINTER(c_uint64)).contents.value = self.indptr.ctypes.data_as( + c_void_p + ).value + cast(data_ptr, POINTER(c_uint64)).contents.value = self.data.ctypes.data_as(c_void_p).value + + def get(self): + if self.is_col_major: + return smat_util.csc_matrix( + (self.data, self.indices, self.indptr), shape=(self.rows, self.cols) + ) + else: + return smat_util.csr_matrix( + (self.data, self.indices, self.indptr), shape=(self.rows, self.cols) + ) + + @property + def cfunc(self): + return self.CFUNCTYPE(self) + + +class corelib(object): + """ + The core functions for linear problems + """ + + @staticmethod + def fillprototype(f, restype, argtypes): + """ + Specify corelib function's return type and argument types. + + Args: + restype (single or list of ctypes): The return type. + argtypes (list of ctypes): The argument types. + """ + f.restype = restype + f.argtypes = argtypes + + @staticmethod + def load_dynamic_library(dirname, soname, forced_rebuild=False): + """ + Load compiled C library into Python. + If not found, will build upon loading. + + Args: + dirname (str): The directory of C library. + soname (str): The name of C library. + force_rebuild (bool, optional): Whether to force rebuild C library upon calling. + + Return: + c_lib (CDLL): Ctypes CDLL library. + """ + try: + if forced_rebuild: + check_output("make -C {} clean lib".format(dirname), shell=True) + path_to_so = glob(os.path.join(dirname, soname) + "*.so")[0] + _c_lib = CDLL(path_to_so) + except BaseException: + try: + check_output("make -C {} clean lib".format(dirname), shell=True) + path_to_so = glob(os.path.join(dirname, soname) + "*.so")[0] + _c_lib = CDLL(path_to_so) + except BaseException: + raise Exception("{soname} library cannot be found and built.".format(soname=soname)) + return _c_lib + + def __init__(self, dirname, soname, forced_rebuild=False): + self.clib_float32 = corelib.load_dynamic_library( + dirname, soname + "_float32", forced_rebuild=forced_rebuild + ) + self.link_xlinear_methods() + self.link_sparse_operations() + self.link_clustering() + self.link_tfidf_vectorizer() + + def link_xlinear_methods(self): + """ + Specify C-lib's Xlinear methods argument and return type. + """ + arg_list = [ + POINTER(ScipyCsrF32), # CSR X + POINTER(ScipyCscF32), # CSC Y + POINTER(ScipyCscF32), # CSC C + POINTER(ScipyCscF32), # CSC M + POINTER(ScipyCscF32), # CSC R + ScipyCoordinateSparseAllocator.CFUNCTYPE, # py_coo_allocator + c_double, # threshold + c_uint32, # max_nonzeros_per_label + c_int, # solver_type + c_double, # Cp + c_double, # Cn + c_uint64, # max_iter + c_double, # eps + c_double, # bias + c_int, # threads + ] + corelib.fillprototype( + self.clib_float32.c_xlinear_single_layer_train_csr_f32, + None, + [POINTER(ScipyCsrF32)] + arg_list[1:], + ) + corelib.fillprototype( + self.clib_float32.c_xlinear_single_layer_train_drm_f32, + None, + [POINTER(ScipyDrmF32)] + arg_list[1:], + ) + + arg_list = [c_void_p] + corelib.fillprototype(self.clib_float32.c_xlinear_destruct_model, None, arg_list) + + # Interface for sparse prediction + arg_list = [ + c_void_p, + POINTER(ScipyCsrF32), + c_uint32, + c_char_p, + c_uint32, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype(self.clib_float32.c_xlinear_predict_csr_f32, None, arg_list) + + # Interface for dense prediction + arg_list = [ + c_void_p, + POINTER(ScipyDrmF32), + c_uint32, + c_char_p, + c_uint32, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype(self.clib_float32.c_xlinear_predict_drm_f32, None, arg_list) + + # c interface for loading just model tree directly (no tfidf) + res_list = c_void_p + arg_list = [c_char_p] + corelib.fillprototype(self.clib_float32.c_xlinear_load_model_from_disk, res_list, arg_list) + + res_list = c_void_p + arg_list = [c_char_p, c_int] + corelib.fillprototype( + self.clib_float32.c_xlinear_load_model_from_disk_ext, res_list, arg_list + ) + + # c interface for per-layer prediction + arg_list = [ + POINTER(ScipyCsrF32), + POINTER(ScipyCsrF32), + POINTER(ScipyCscF32), + POINTER(ScipyCscF32), + c_char_p, + c_uint32, + c_int, + c_float, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype( + self.clib_float32.c_xlinear_single_layer_predict_csr_f32, None, arg_list + ) + + arg_list = [ + POINTER(ScipyDrmF32), + POINTER(ScipyCsrF32), + POINTER(ScipyCscF32), + POINTER(ScipyCscF32), + c_char_p, + c_uint32, + c_int, + c_float, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype( + self.clib_float32.c_xlinear_single_layer_predict_drm_f32, None, arg_list + ) + + res_list = c_uint32 + arg_list = [c_void_p, c_char_p] + corelib.fillprototype(self.clib_float32.c_xlinear_get_int_attr, res_list, arg_list) + + def xlinear_load_predict_only( + self, + folder, + weight_matrix_type="BINARY_SEARCH_CHUNKED", + ): + """ + Load xlinear model in predict only mode. + + Args: + folder (str): The folder path for xlinear model. + weight_matrix_type (str, optional): The xlinear inference model types. + + Return: + cmodel (ptr): The pointer to xlinear model. + """ + weight_matrix_type_id = XLINEAR_INFERENCE_MODEL_TYPES[weight_matrix_type] + cmodel = self.clib_float32.c_xlinear_load_model_from_disk_ext( + c_char_p(folder.encode("utf-8")), c_int(int(weight_matrix_type_id)) + ) + return cmodel + + def xlinear_destruct_model(self, c_model): + """ + Destruct xlinear model. + + Args: + cmodel (ptr): The pointer to xlinear model. + """ + self.clib_float32.c_xlinear_destruct_model(c_model) + + def xlinear_predict( + self, + c_model, + X, + overriden_beam_size, + overriden_post_processor_str, + overriden_only_topk, + threads, + pred_alloc, + ): + """ + Performs a full prediction using the given model and queries. + + Args: + c_model (c_pointer): A C pointer to the model to use for prediction. This pointer + is returned by the c_load_xlinear_model_from_disk and + c_load_xlinear_model_from_disk_ext functions in corelib.clib_float32. + X: The query matrix (admissible formats are smat.csr_matrix, + np.ndarray, ScipyCsrF32, or ScipyDrmF32). Note that if this is smat.csr_matrix, + the matrix must have sorted indices. You can call sort_indices() to ensure this. + overriden_beam_size (uint): Overrides the beam size to use for prediction. Use None for + model defaults. + overriden_post_processor_str (string): Overrides the post processor to use by name. Use + None for model defaults. + overriden_only_topk (uint): Overrides the number of results to return for each query. Use + None for model defaults. + threads (int): Sets the number of threads to use in computation. Use + -1 to use the maximum amount of available threads. + pred_alloc (ScipyCompressedSparseAllocator): The allocator to store the result in. + """ + clib = self.clib_float32 + + if isinstance(X, smat.csr_matrix): + if not X.has_sorted_indices: + raise ValueError("Query matrix does not have sorted indices!") + X = ScipyCsrF32.init_from(X) + elif isinstance(X, np.ndarray): + X = ScipyDrmF32.init_from(X) + + if isinstance(X, ScipyCsrF32): + c_predict = clib.c_xlinear_predict_csr_f32 + elif isinstance(X, ScipyDrmF32): + c_predict = clib.c_xlinear_predict_drm_f32 + else: + raise NotImplementedError("type(X) = {} not implemented".format(type(X))) + + c_predict( + c_model, + byref(X), + overriden_beam_size if overriden_beam_size else 0, + overriden_post_processor_str.encode("utf-8") if overriden_post_processor_str else None, + overriden_only_topk if overriden_only_topk else 0, + threads, + pred_alloc.cfunc, + ) + + def xlinear_single_layer_predict( + self, + X, + csr_codes, + W, + C, + post_processor_str, + only_topk, + num_threads, + bias, + pred_alloc, + ): + """ + Performs a single layer prediction in C++ using matrices owned by Python. + + Args: + X (csr_matrix): The query matrix. + Note that if this is smat.csr_matrix, the matrix must have sorted indices. + You can call sort_indices() to ensure this. + csr_codes (smat.csr_matrix or ScipyCsrF32): The prediction for the previous layer, None if this is the first layer. + W (smat.csc_matrix, ScipyCscF32): The weight matrix for this layer. + C (smat.csc_matrix, ScipyCscF32): The child/parent map for this layer. + post_processor_str (str): A string specifying which post processor to use. + only_topk (uint): How many results to return for each query. + num_threads (uint): How many threads to use in this computation. Set to -1 to use defaults. + bias (float): The bias of the model. + pred_alloc (ScipyCompressedSparseAllocator): The allocator to store the result in. + """ + clib = self.clib_float32 + + post_processor_str = post_processor_str.encode("utf-8") + + W = ScipyCscF32.init_from(W) + + if isinstance(X, smat.csr_matrix): + if not X.has_sorted_indices: + raise ValueError("Query matrix does not have sorted indices!") + X = ScipyCsrF32.init_from(X) + elif isinstance(X, np.ndarray): + X = ScipyDrmF32.init_from(X) + + if isinstance(X, ScipyCsrF32): + c_single_layer_predict = clib.c_xlinear_single_layer_predict_csr_f32 + elif isinstance(X, ScipyDrmF32): + c_single_layer_predict = clib.c_xlinear_single_layer_predict_drm_f32 + else: + raise NotImplementedError("type(X) = {} not implemented".format(type(X))) + + # csr_codes and pC might be null + if csr_codes is not None: + csr_codes = ScipyCsrF32.init_from(csr_codes) + + if C is None: + C = smat.csc_matrix(np.ones((W.shape[1], 1), dtype=W.dtype)) + C = ScipyCscF32.init_from(C) + + c_single_layer_predict( + byref(X), + byref(csr_codes) if csr_codes is not None else None, + byref(W), + byref(C), + post_processor_str, + only_topk, + num_threads, + bias, + pred_alloc.cfunc, + ) + + def xlinear_single_layer_train( + self, + pX, + pY, + pC, + pM, + pR, + threshold=0.1, + max_nonzeros_per_label=None, + solver_type="L2R_L2LOSS_SVC_DUAL", + Cp=1.0, + Cn=1.0, + max_iter=1000, + eps=0.1, + bias=1.0, + threads=-1, + verbose=0, + **kwargs, + ): + """ + Performs a single layer training in C++ using matrices owned by Python. + + Args: + pX (ScipyCsrF32 or ScipyDrmF32): Instance feature matrix of shape (nr_inst, nr_feat). + pY (ScipyCscF32): Label matrix of shape (nr_inst, nr_labels). + pC (ScipyCscF32): Single matrix from clustering chain, representing a hierarchical clustering. + pM (ScipyCsrF32): Single matrix from matching chain. + pR (ScipyCscF32): Relevance matrix for cost-sensitive learning, of shape (nr_inst, nr_labels). + threshold (float, optional): sparsify the final model by eliminating all entrees with abs value less than threshold. + Default to 0.1. + max_nonzeros_per_label (int, optional): keep at most NONZEROS weight parameters per label in model. + Default None to set to (nr_feat + 1) + solver_type (string, optional): backend linear solver type. + Options: L2R_L2LOSS_SVC_DUAL(default), L2R_L1LOSS_SVC_DUAL. + Cp (float, optional): positive penalty parameter. Defaults to 1.0 + Cn (float, optional): negative penalty parameter. Defaults to 1.0 + max_iter (int, optional): maximum iterations. Defaults to 100 + eps (float, optional): epsilon. Defaults to 0.1 + bias (float, optional): if >0, append the bias value to each instance feature. Defaults to 1.0 + threads (int, optional): the number of threads to use for training. Defaults to -1 to use all + verbose (int, optional): verbose level. Defaults to 0 + + Return: + layer_train_res (smat.csc_matrix): The layer training result. + """ + clib = self.clib_float32 + coo_alloc = ScipyCoordinateSparseAllocator(dtype=np.float32) + if isinstance(pX, ScipyCsrF32): + c_xlinear_single_layer_train = clib.c_xlinear_single_layer_train_csr_f32 + elif isinstance(pX, ScipyDrmF32): + c_xlinear_single_layer_train = clib.c_xlinear_single_layer_train_drm_f32 + else: + raise NotImplementedError("type(pX) = {} not implemented".format(type(pX))) + + c_xlinear_single_layer_train( + byref(pX), + byref(pY), + byref(pC) if pC is not None else None, + byref(pM) if pM is not None else None, + byref(pR) if pR is not None else None, + coo_alloc.cfunc, + threshold, + 0 if max_nonzeros_per_label is None else max_nonzeros_per_label, + XLINEAR_SOLVERS[solver_type], + Cp, + Cn, + max_iter, + eps, + bias, + threads, + ) + return coo_alloc.tocsc().astype(np.float32) + + def xlinear_get_int_attr(self, c_model, attr): + """ + Get int attribute from C xlinear model. + + Args: + c_model (ptr): The C xlinear model pointer. + attr (str): The attribute name to get. + + Return: + int_attr (int): The int attribute under given name. + """ + assert attr in { + "depth", + "nr_features", + "nr_labels", + "nr_codes", + }, f"attr {attr} not implemented" + return self.clib_float32.c_xlinear_get_int_attr(c_model, c_char_p(attr.encode("utf-8"))) + + def link_sparse_operations(self): + """ + Specify C-lib's sparse matrix operation methods argument and return type. + """ + arg_list = [ + POINTER(ScipyCscF32), # pX (should support both CSC and CSR) + POINTER(ScipyCscF32), # pY (should support both CSC and CSR) + ScipyCompressedSparseAllocator.CFUNCTYPE, # allocator for pZ + c_bool, # eliminate_zeros + c_bool, # sorted_indices + c_int, # threads + ] + corelib.fillprototype( + self.clib_float32.c_sparse_matmul_csc_f32, + None, + [POINTER(ScipyCscF32), POINTER(ScipyCscF32)] + arg_list[2:], + ) + corelib.fillprototype( + self.clib_float32.c_sparse_matmul_csr_f32, + None, + [POINTER(ScipyCsrF32), POINTER(ScipyCsrF32)] + arg_list[2:], + ) + + arg_list = [ + POINTER(ScipyCsrF32), # pX + POINTER(ScipyCscF32), # pW + c_uint64, # len + POINTER(c_uint32), # X_row_idx + POINTER(c_uint32), # W_col_idx + POINTER(c_float), # val + c_int, # threads + ] + corelib.fillprototype( + self.clib_float32.c_sparse_inner_products_csr_f32, + None, + [POINTER(ScipyCsrF32)] + arg_list[1:], + ) + corelib.fillprototype( + self.clib_float32.c_sparse_inner_products_drm_f32, + None, + [POINTER(ScipyDrmF32)] + arg_list[1:], + ) + + def sparse_matmul(self, X, Y, eliminate_zeros=False, sorted_indices=True, threads=-1): + """ + Sparse-Sparse matrix multiplication with multithreading (shared-memory). + + Args: + X (smat.csc_matrix, smat.csr_matrix, ScipyCscF32, ScipyCsrF32): The first sparse matrix. + Y (smat.csc_matrix, smat.csr_matrix, ScipyCscF32, ScipyCsrF32): The second sparse matrix. + eliminate_zeros (bool, optional): if true, then eliminate (potential) zeros created by maxnnz in output matrix Z. Default is false. + sorted_indices (bool, optional): if true, then sort the Z.indices for the output matrix Z. Default is true. + threads (int, optional): The number of threads. Default -1 to use all cores. + + Return: + matmul_res (smat.csc_matrix or smat.csr_matrix): The matrix multiplication results of X and Y + """ + + if X.shape[1] != Y.shape[0]: + raise ValueError("X.shape[1]={} != Y.shape[0]={}".format(X.shape[1], Y.shape[0])) + + clib = self.clib_float32 + pred_alloc = ScipyCompressedSparseAllocator() + + def is_col_major(X): + return isinstance(X, smat.csc_matrix) or isinstance(X, ScipyCscF32) + + def is_row_major(X): + return isinstance(X, smat.csr_matrix) or isinstance(X, ScipyCsrF32) + + if is_col_major(X) and is_col_major(Y): + pX = ScipyCscF32.init_from(X) + pY = ScipyCscF32.init_from(Y) + clib.c_sparse_matmul_csc_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + elif is_row_major(X) and is_row_major(Y): + pX = ScipyCsrF32.init_from(X) + pY = ScipyCsrF32.init_from(Y) + clib.c_sparse_matmul_csr_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + elif is_col_major(X) and is_row_major(Y): + if X.nnz > Y.nnz: + Y = Y.tocsc() + pX = ScipyCscF32.init_from(X) + pY = ScipyCscF32.init_from(Y) + clib.c_sparse_matmul_csc_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + else: + X = X.tocsr() + pX = ScipyCsrF32.init_from(X) + pY = ScipyCsrF32.init_from(Y) + clib.c_sparse_matmul_csr_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + elif is_row_major(X) and is_col_major(Y): + if X.nnz > Y.nnz: + Y = Y.tocsr() + pX = ScipyCsrF32.init_from(X) + pY = ScipyCsrF32.init_from(Y) + clib.c_sparse_matmul_csr_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + else: + X = X.tocsc() + pX = ScipyCscF32.init_from(X) + pY = ScipyCscF32.init_from(Y) + clib.c_sparse_matmul_csc_f32( + pX, pY, pred_alloc.cfunc, eliminate_zeros, sorted_indices, threads + ) + else: + raise ValueError( + "X and Y should be either csr_matrix/csc_matrix/ScipyCscF32/ScipyCsrF32 !" + ) + + return pred_alloc.get() + + def sparse_inner_products(self, pX, pW, X_row_idx, W_col_idx, pred_values=None, threads=-1): + """ + Sparse-Sparse matrix batch inner product with multithreading (shared-memory). + Do inner product for rows from `pX` indicated by `X_row_idx`, and columns from `pW` indicated by `W_col_idx`. + Results will be written in `pred_values` if provided; Otherwise, create a new array for results. + + Args: + pX (ScipyCsrF32, ScipyDrmF32): The first sparse matrix. + pW (ScipyCscF32, ScipyDcmF32): The second sparse matrix. + X_row_idx (ndarray): Row indexes for `pX`. + W_col_idx (ndarray): Column indexes for `pW`. + pred_values (ndarray, optional): The inner product result array. + threads (int, optional): The number of threads. Default -1 to use all cores. + + Return: + pred_values (ndarray): The matrix batch inner product results. + If `pred_values` not given, return a new allocated ndarray, dtype same as `pW`. + """ + clib = self.clib_float32 + + nnz = len(X_row_idx) + assert nnz == len(W_col_idx) + + if not isinstance(pW, ScipyCscF32): + raise NotImplementedError("type(pW) = {} no implemented".format(type(pW))) + + if isinstance(pX, ScipyCsrF32): + c_sparse_inner_products = clib.c_sparse_inner_products_csr_f32 + elif isinstance(pX, ScipyDrmF32): + c_sparse_inner_products = clib.c_sparse_inner_products_drm_f32 + else: + raise NotImplementedError("type(pX) = {} no implemented".format(type(pX))) + + if pred_values is None or len(pred_values) != nnz or pred_values.dtype != np.float32: + pred_values = np.zeros(nnz, pW.dtype) + + c_sparse_inner_products( + byref(pX), + byref(pW), + nnz, + X_row_idx.ctypes.data_as(POINTER(c_uint32)), + W_col_idx.ctypes.data_as(POINTER(c_uint32)), + pred_values.ctypes.data_as(POINTER(c_float)), + threads, + ) + return pred_values + + def link_clustering(self): + """ + Specify C-lib's clustering method argument and return type. + """ + arg_list = [ + POINTER(ScipyCsrF32), + c_uint32, + c_uint32, + c_int, + c_uint32, + c_int, + POINTER(c_uint32), + ] + corelib.fillprototype( + self.clib_float32.c_run_clustering_csr_f32, None, [POINTER(ScipyCsrF32)] + arg_list[1:] + ) + corelib.fillprototype( + self.clib_float32.c_run_clustering_drm_f32, None, [POINTER(ScipyDrmF32)] + arg_list[1:] + ) + + def run_clustering(self, py_feat_mat, depth, algo, seed, codes=None, max_iter=10, threads=-1): + """ + Run clustering with given label embedding matrix and parameters in C++. + + Args: + py_feat_mat (ScipyCsrF32, ScipyDrmF32): label embedding matrix. (num_labels x num_features). + depth (int): Depth of K-means clustering N-nary tree. + algo (str): The algorithm for clustering, either `KMEANS` or `SKMEANS`. + seed (int): Randoms seed. + codes (ndarray, optional): Label clustering results. + max_iter (int, optional): Maximum number of iter for reordering each node based on score. + threads (int, optional): The number of threads. Default -1 to use all cores. + + Return: + codes (ndarray): The clustering result. + If `codes` not given, return a new allocated ndarray, dtype `np.uint32`. + """ + clib = self.clib_float32 + if isinstance(py_feat_mat, ScipyCsrF32): + run_clustering = clib.c_run_clustering_csr_f32 + elif isinstance(py_feat_mat, ScipyDrmF32): + run_clustering = clib.c_run_clustering_drm_f32 + else: + raise NotImplementedError( + "type(py_feat_mat) = {} no implemented".format(type(py_feat_mat)) + ) + + if codes is None or len(codes) != py_feat_mat.shape[0] or codes.dtype != np.uint32: + codes = np.zeros(py_feat_mat.rows, dtype=np.uint32) + run_clustering( + byref(py_feat_mat), + depth, + algo, + seed, + max_iter, + threads, + codes.ctypes.data_as(POINTER(c_uint32)), + ) + return codes + + def link_tfidf_vectorizer(self): + """ + Specify C-lib's Tfidf vectorizer method argument and return type. + """ + res_list = c_void_p + arg_list = [c_char_p] + corelib.fillprototype(self.clib_float32.c_tfidf_load, res_list, arg_list) + + arg_list = [c_void_p, c_char_p] + corelib.fillprototype(self.clib_float32.c_tfidf_save, None, arg_list) + + arg_list = [c_void_p] + corelib.fillprototype(self.clib_float32.c_tfidf_destruct, None, arg_list) + + arg_list = [ + c_int, # threads + ScipyCompressedSparseAllocator.CFUNCTYPE, # pred_alloc for result + ] + + # model, fname, fname_len, buffer_size + corelib.fillprototype( + self.clib_float32.c_tfidf_predict_from_file, + None, + [c_void_p, c_void_p, c_uint64, c_uint64] + arg_list, + ) + + # model, corpus, doc_lens, nr_docs + corelib.fillprototype( + self.clib_float32.c_tfidf_predict, + None, + [c_void_p, c_void_p, POINTER(c_uint64), c_uint64] + arg_list, + ) + + res_list = c_void_p + + # file-list, fname_lens, nr_files, param, buffer_size, threads + corelib.fillprototype( + self.clib_float32.c_tfidf_train_from_file, + res_list, + [c_void_p, POINTER(c_uint64), c_uint64, POINTER(TfidfVectorizerParam), c_uint64, c_int], + ) + # corpus, doc_lens, nr_docs, params, threads + corelib.fillprototype( + self.clib_float32.c_tfidf_train, + res_list, + [c_void_p, POINTER(c_uint64), c_uint64, POINTER(TfidfVectorizerParam), c_int], + ) + + def tfidf_destruct(self, model): + """ + Destruct Tfdif model. + + Args: + model (ptr): Pointer to C Tfdif model. + """ + if type(model) == c_void_p: + self.clib_float32.c_tfidf_destruct(model) + + def tfidf_save(self, model, save_dir): + """ + Save trained tfidf vectorizer to disk. + + Args: + save_dir (str): Folder to save the model. + """ + self.clib_float32.c_tfidf_save(model, c_char_p(save_dir.encode("utf-8"))) + + def tfidf_load(self, load_dir): + """ + Load a CppTfidf vectorizer from disk. + + Args: + load_dir (str): Folder inside which the model is loaded. + + Returns: + pointer to C instance tfidf::Vectorizer + """ + return self.clib_float32.c_tfidf_load(c_char_p(load_dir.encode("utf-8"))) + + def tfidf_train(self, trn_corpus, config=None): + """ + Train on a corpus. + + Args: + trn_corpus (list of str or str): Training corpus in the form of a list of strings or path to corpus file/folder. + config (dict): Dict with keyword arguments to pass to C++ class tfidf::Vectorizer. None to use default in TfidfVectorizerParam. + For TfidfVectorizerParam, the config should contain + base_vect_configs (List(Dict)): list of config (list[TfidfBaseVectorizerParam]) to be used for TfidfBaseVectorizerParam. + norm_p (int): after ensembling feature sub matrices, do row-wise normalization with norm_p. + buffer_size (int): if train from file, number of bytes allocated for file I/O. Set to 0 to use default value. + threads (int): number of threads to use, set to negative to use all + For TfidfBaseVectorizerParam, the config should contain + ngram_range (tuple of int): (min_ngram, max_ngram) + truncate_length (int): sequence truncation length, set to negative to disable + max_feature (int): maximum number of features allowed, set to 0 to disable + min_df_ratio (float, [0, max_df_ratio)): min ratio for document frequency truncation + max_df_ratio (float, (min_df_ratio, 1]): max ratio for document frequency truncation + min_df_cnt (int, [0, max_df_cnt)): min count for document frequency truncation + max_df_cnt (float, (min_df_cnt, Inf)): max count for document frequency truncation. Default -1 to disable. + binary (bool): whether to binarize term frequency, default False + use_idf (bool): whether to use inverse document frequency, default True + smooth_idf (bool): whether to smooth IDF by adding 1 to all DF counts, default True + sublinear_tf (bool): whether to use sublinear mapping (log) on term frequency, default False + keep_frequent_feature (bool): if max_feature > 0, will only keep max_feature features by + ignoring features with low document frequency (if True, default), + ignoring features with high document frequency (if False) + norm (str, 'l1' or 'l2'): feature vector will have unit l1 or l2 norm + analyzer (str, 'word', 'char' or 'char_wb'): Whether to use word or character n-grams. + Option ‘char_wb’ creates character n-grams only from text inside word boundaries, + n-grams at the edges of words are padded with space. + buffer_size (int): if train from file, number of bytes allocated for file I/O. Set to 0 to use default value. + threads (int): number of threads to use, set to negative to use all + + Returns: + pointer to C instance tfidf::Vectorizer + """ + + # Check whether "base_vect_configs" is in config.keys() + # If not, this config is for TfidfBaseVectorizerParam. + # Otherwise, this config is for TfidfVectorizerParam. + if "base_vect_configs" not in config: + base_vect_param_list = [TfidfBaseVectorizerParam(config)] + norm_p = base_vect_param_list[0].norm_p + else: + base_vect_param_list = [ + TfidfBaseVectorizerParam(base_vect_config) + for base_vect_config in config["base_vect_configs"] + ] + norm_p = config["norm_p"] + params = TfidfVectorizerParam(base_vect_param_list, norm_p) + + if isinstance(trn_corpus, str): + if os.path.isfile(trn_corpus): # train from a single corpus file + corpus_files = [trn_corpus] + elif os.path.isdir(trn_corpus): # train from a folder of corpus files + corpus_files = [ + os.path.join(trn_corpus, f) + for f in sorted(os.listdir(trn_corpus)) + if os.path.isfile(os.path.join(trn_corpus, f)) + ] + else: + raise Exception("Failed to load training corpus from {}".format(trn_corpus)) + nr_files = len(corpus_files) + c_corpusf_arr = (c_char_p * nr_files)() + c_corpusf_arr[:] = [line.encode("utf-8") for line in corpus_files] + fname_lens = np.array([len(line) for line in c_corpusf_arr], dtype=np.uint64) + + model = self.clib_float32.c_tfidf_train_from_file( + c_corpusf_arr, + fname_lens.ctypes.data_as(POINTER(c_uint64)), + nr_files, + params, + config["buffer_size"], + config["threads"], + ) + else: + nr_doc = len(trn_corpus) + c_corpus_arr = (c_char_p * nr_doc)() + c_corpus_arr[:] = [line.encode("utf-8") for line in trn_corpus] + doc_lens = np.array([len(line) for line in c_corpus_arr], dtype=np.uint64) + + model = self.clib_float32.c_tfidf_train( + c_corpus_arr, + doc_lens.ctypes.data_as(POINTER(c_uint64)), + nr_doc, + params, + config["threads"], + ) + + return model + + def tfidf_predict(self, model, corpus, buffer_size=0, threads=-1): + """ + Vectorize a corpus. + + Args: + model (ctypes.c_void_p): pointer to tfidf::Vectorizer model + corpus (list): List of strings to vectorize. + buffer_size (int, default 0): number of bytes used for file I/O while train from file, set to 0 to use default value + threads (int, default -1): number of threads to use for predict, set to negative to use all + + Returns: + scipy.sparse.csr.csr_matrix: Matrix of features. + """ + pred_alloc = ScipyCompressedSparseAllocator() + if isinstance(corpus, str): + # train from file + assert os.path.isfile(corpus), "Cannot predict from {}!".format(corpus) + corpus_utf8 = corpus.encode("utf-8") + + self.clib_float32.c_tfidf_predict_from_file( + model, + c_char_p(corpus_utf8), + len(corpus_utf8), + buffer_size, + threads, + pred_alloc.cfunc, + ) + + else: + # in memory predict + nr_doc = len(corpus) + c_corpus_arr = (c_char_p * nr_doc)() + c_corpus_arr[:] = [line.encode("utf-8") for line in corpus] + doc_lens = np.array([len(line) for line in c_corpus_arr], dtype=np.uint64) + + self.clib_float32.c_tfidf_predict( + model, + c_corpus_arr, + doc_lens.ctypes.data_as(POINTER(c_uint64)), + nr_doc, + threads, + pred_alloc.cfunc, + ) + return pred_alloc.get() + + +clib = corelib(os.path.join(os.path.dirname(os.path.abspath(pecos.__file__)), "core"), "libpecos") diff --git a/pecos/core/libpecos.cpp b/pecos/core/libpecos.cpp new file mode 100644 index 00000000..35281c58 --- /dev/null +++ b/pecos/core/libpecos.cpp @@ -0,0 +1,276 @@ +/* + * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +#include "utils/clustering.hpp" +#include "utils/matrix.hpp" +#include "utils/tfidf.hpp" +#include "xmc/inference.hpp" +#include "xmc/linear_solver.hpp" + +// ===== C Interface of Functions ====== +// C Interface of Types/Structures can be found in utils/matrix.hpp + +extern "C" { + // ==== C Interface of XMC Models ==== + void* c_xlinear_load_model_from_disk(const char* model_path) { + auto model = new pecos::HierarchicalMLModel(model_path); + return static_cast(model); + } + + void* c_xlinear_load_model_from_disk_ext(const char* model_path, + int weight_matrix_type) { + pecos::layer_type_t type = static_cast(weight_matrix_type); + auto model = new pecos::HierarchicalMLModel(model_path, type); + return static_cast(model); + } + + void c_xlinear_destruct_model(void* ptr) { + pecos::HierarchicalMLModel* mc = static_cast(ptr); + delete mc; + } + + // Obtain attribute values of the model. + // Allowed attr: depth, nr_features, nr_labels, nr_codes + uint32_t c_xlinear_get_int_attr(void* ptr, const char* attr) { + pecos::HierarchicalMLModel* mc = static_cast(ptr); + return mc->get_int_attr(attr); + } + + #define C_XLINEAR_PREDICT(SUFFIX, PY_MAT, C_MAT) \ + void c_xlinear_predict ## SUFFIX( \ + void* ptr, \ + const PY_MAT* input_x, \ + const uint32_t overridden_beam_size, \ + const char* overridden_post_processor_str, \ + const uint32_t overridden_only_topk, \ + const int threads, \ + py_sparse_allocator_t pred_alloc) { \ + pecos::HierarchicalMLModel* mc = static_cast(ptr); \ + C_MAT X(input_x); \ + pecos::csr_t result; \ + mc->predict(X, result, overridden_beam_size, overridden_post_processor_str, \ + overridden_only_topk, threads); \ + result.create_pycsr(pred_alloc); \ + result.free_underlying_memory(); \ + } + C_XLINEAR_PREDICT(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_XLINEAR_PREDICT(_drm_f32, ScipyDrmF32, pecos::drm_t) + + + #define C_XLINEAR_SINGLE_LAYER_PREDICT(SUFFIX, PY_MAT, C_MAT) \ + void c_xlinear_single_layer_predict ## SUFFIX( \ + const PY_MAT* input_x, \ + const ScipyCsrF32* csr_codes, \ + ScipyCscF32* W, \ + ScipyCscF32* C, \ + const char* post_processor_str, \ + const uint32_t only_topk, \ + const int num_threads, \ + const float bias, \ + py_sparse_allocator_t pred_alloc) { \ + C_MAT X(input_x); \ + pecos::csr_t last_layer_pred; \ + bool is_first_layer; \ + if (csr_codes) { \ + last_layer_pred = pecos::csr_t(csr_codes).deep_copy(); \ + is_first_layer = false; \ + } else { \ + last_layer_pred.fill_ones(X.rows, 1); \ + is_first_layer = true; \ + } \ + pecos::csc_t C_; \ + C_ = pecos::csc_t(C); \ + pecos::csr_t cur_layer_pred; \ + pecos::csc_t W_ = pecos::csc_t(W); \ + pecos::MLModelMetadata metadata(bias, only_topk, post_processor_str); \ + pecos::MLModel layer(W_, C_, 0, false, metadata); \ + layer.predict(X, last_layer_pred, is_first_layer, only_topk, \ + post_processor_str, cur_layer_pred, num_threads); \ + cur_layer_pred.create_pycsr(pred_alloc); \ + cur_layer_pred.free_underlying_memory(); \ + } + C_XLINEAR_SINGLE_LAYER_PREDICT(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_XLINEAR_SINGLE_LAYER_PREDICT(_drm_f32, ScipyDrmF32, pecos::drm_t) + + + #define C_XLINEAR_SINGLE_LAYER_TRAIN(SUFFIX, PY_MAT, C_MAT) \ + void c_xlinear_single_layer_train ## SUFFIX( \ + const PY_MAT *pX, \ + const ScipyCscF32 *pY, \ + const ScipyCscF32 *pC, \ + const ScipyCscF32 *pM, \ + const ScipyCscF32 *pR, \ + py_coo_allocator_t coo_alloc, \ + double threshold, \ + uint32_t max_nonzeros_per_label, \ + int solver_type, \ + double Cp, \ + double Cn, \ + size_t max_iter, \ + double eps, \ + double bias, \ + int threads) { \ + const C_MAT feat_mat(pX); \ + const pecos::csc_t Y(pY); \ + const pecos::csc_t& C = (pC == NULL) ? pecos::csc_t() : pecos::csc_t(pC); \ + const pecos::csc_t& M = (pM == NULL) ? pecos::csc_t() : pecos::csc_t(pM); \ + const pecos::csc_t& R = (pR == NULL) ? pecos::csc_t() : pecos::csc_t(pR); \ + pecos::linear_solver::SVMParameter param(solver_type, Cp, Cn, max_iter, eps, bias); \ + pecos::coo_t model; \ + pecos::linear_solver::multilabel_train_with_codes(\ + &feat_mat, \ + &Y, \ + (pC == NULL) ? NULL : &C, \ + (pM == NULL) ? NULL : &M, \ + (pR == NULL) ? NULL : &R, \ + &model, \ + threshold, \ + max_nonzeros_per_label, \ + ¶m, \ + threads \ + ); \ + model.create_pycoo(coo_alloc); \ + } + C_XLINEAR_SINGLE_LAYER_TRAIN(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_XLINEAR_SINGLE_LAYER_TRAIN(_drm_f32, ScipyDrmF32, pecos::drm_t) + + // ==== C Interface of Sparse Matrix/Vector Operations ==== + + #define C_SPARSE_MATMUL(SUFFIX, PY_MAT, C_MAT) \ + void c_sparse_matmul ## SUFFIX( \ + const PY_MAT* pX, \ + const PY_MAT* pY, \ + py_sparse_allocator_t pred_alloc, \ + const bool eliminate_zeros, \ + const bool sorted_indices, \ + int threads) { \ + C_MAT X(pX); \ + C_MAT Y(pY); \ + pecos::spmm_mat_t Z(pred_alloc); \ + smat_x_smat(X, Y, Z, eliminate_zeros, sorted_indices, threads); \ + } + C_SPARSE_MATMUL(_csc_f32, ScipyCscF32, pecos::csc_t) + C_SPARSE_MATMUL(_csr_f32, ScipyCsrF32, pecos::csr_t) + + + #define C_SPARSE_INNER_PRODUCTS(SUFFIX, PY_MAT, C_MAT) \ + void c_sparse_inner_products ## SUFFIX( \ + const PY_MAT *pX, \ + const ScipyCscF32 *pW, \ + uint64_t len, \ + uint32_t *X_row_idx, \ + uint32_t *W_col_idx, \ + float32_t *val, \ + int threads) { \ + C_MAT X(pX); \ + pecos::csc_t W(pW); \ + compute_sparse_entries_from_rowmajored_X_and_colmajored_M( \ + X, W, len, X_row_idx, W_col_idx, val, threads \ + ); \ + } + C_SPARSE_INNER_PRODUCTS(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_SPARSE_INNER_PRODUCTS(_drm_f32, ScipyDrmF32, pecos::drm_t) + + // ==== C Interface of Clustering ==== + + #define C_RUN_CLUSTERING(SUFFIX, PY_MAT, C_MAT) \ + void c_run_clustering ## SUFFIX( \ + const PY_MAT* py_mat_ptr, \ + uint32_t depth, \ + uint32_t partition_algo, \ + int seed, \ + uint32_t max_iter, \ + int threads, \ + uint32_t* label_codes) { \ + C_MAT feat_mat(py_mat_ptr); \ + pecos::clustering::Tree tree(depth); \ + tree.run_clustering(feat_mat, partition_algo, seed, label_codes, max_iter, threads); \ + } + C_RUN_CLUSTERING(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_RUN_CLUSTERING(_drm_f32, ScipyDrmF32, pecos::drm_t) + + // ==== C Interface of TFIDF vectorizer ==== + + void* c_tfidf_train_from_file( + void* corpus_files_ptr, + const size_t* fname_lens, + size_t nr_files, + const pecos::tfidf::TfidfVectorizerParam* param_ptr, + size_t buffer_size, + int threads) { + const char** corpus_files = static_cast(corpus_files_ptr); + pecos::tfidf::Vectorizer* vect = new pecos::tfidf::Vectorizer(param_ptr); + vect->train_from_file(corpus_files, fname_lens, nr_files, buffer_size, threads); + return static_cast(vect); + } + + void* c_tfidf_train( + void* corpus_ptr, + const size_t* doc_lens, + size_t nr_doc, + const pecos::tfidf::TfidfVectorizerParam* param_ptr, + int threads) { + const char** corpus = static_cast(corpus_ptr); + pecos::tfidf::Vectorizer* vect = new pecos::tfidf::Vectorizer(param_ptr); + vect->train(corpus, doc_lens, nr_doc, threads); + return static_cast(vect); + } + + void* c_tfidf_load(const char* model_dir) { + pecos::tfidf::Vectorizer* vect = new pecos::tfidf::Vectorizer(model_dir); + return static_cast(vect); + } + + void c_tfidf_save(void* ptr, const char* model_dir) { + pecos::tfidf::Vectorizer* vect = static_cast(ptr); + vect->save(model_dir); + } + + void c_tfidf_destruct(void* ptr) { + pecos::tfidf::Vectorizer* vect = static_cast(ptr); + delete vect; + } + + void c_tfidf_predict_from_file( + void* ptr, + void* corpus_fname_ptr, + size_t fname_len, + size_t buffer_size, + int threads, + py_sparse_allocator_t pred_alloc) { + pecos::tfidf::Vectorizer* vect = static_cast(ptr); + const char* corpus = static_cast(corpus_fname_ptr); + pecos::spmm_mat_t feat_mat(pred_alloc); + vect->predict_from_file(corpus, fname_len, feat_mat, buffer_size, threads); + } + + void c_tfidf_predict( + void* ptr, + void* corpus_ptr, + const size_t* doc_lens, + size_t nr_doc, + int threads, + py_sparse_allocator_t pred_alloc) { + pecos::tfidf::Vectorizer* vect = static_cast(ptr); + const char** corpus = static_cast(corpus_ptr); + pecos::spmm_mat_t feat_mat(pred_alloc); + if(nr_doc > 1) { + vect->predict(corpus, doc_lens, nr_doc, feat_mat, threads); + } else if(nr_doc == 1) { + std::string_view cur_doc(corpus[0], doc_lens[0]); + vect->predict(cur_doc, feat_mat); + } else { + throw std::runtime_error("Invalid nr_doc " + std::to_string(nr_doc)); + } + } +} diff --git a/pecos/core/third_party/nlohmann_json/json.hpp b/pecos/core/third_party/nlohmann_json/json.hpp new file mode 100644 index 00000000..7fc88726 --- /dev/null +++ b/pecos/core/third_party/nlohmann_json/json.hpp @@ -0,0 +1,22875 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ +| | |__ | | | | | | version 3.7.3 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#define NLOHMANN_JSON_VERSION_MAJOR 3 +#define NLOHMANN_JSON_VERSION_MINOR 7 +#define NLOHMANN_JSON_VERSION_PATCH 3 + +#include // all_of, find, for_each +#include // assert +#include // and, not, or +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#include // istream, ostream +#include // random_access_iterator_tag +#include // unique_ptr +#include // accumulate +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include + + +#include + +// #include + + +#include // transform +#include // array +#include // and, not +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include + + +#include // exception +#include // runtime_error +#include // to_string + +// #include + + +#include // size_t + +namespace nlohmann +{ +namespace detail +{ +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +} // namespace nlohmann + +// #include + + +#include // pair +// #include +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 11) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 11 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(_MSC_VER) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(VER / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(__COMPCERT__) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) __has_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) __has_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) && JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +#else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif JSON_HEDLEY_TI_VERSION_CHECK(8,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,3,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(__cplusplus) && (__cplusplus >= 201703L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(17,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define JSON_HEDLEY_UNREACHABLE() __assume(0) +#elif JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_UNREACHABLE() std::_nassert(0) + #else + #define JSON_HEDLEY_UNREACHABLE() _nassert(0) + #endif + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return value +#elif defined(EXIT_FAILURE) + #define JSON_HEDLEY_UNREACHABLE() abort() +#else + #define JSON_HEDLEY_UNREACHABLE() + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return value +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() +#endif + +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && !defined(JSON_HEDLEY_ARM_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) + #define JSON_HEDLEY_ASSUME(expr) ((void) ((expr) ? 1 : (__builtin_unreachable(), 1))) +#else + #define JSON_HEDLEY_ASSUME(expr) ((void) (expr)) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable(!!(expr)) +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability(expr, value, probability) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1, probability) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0, probability) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#if !defined(JSON_HEDLEY_BUILTIN_UNPREDICTABLE) + #define JSON_HEDLEY_BUILTIN_UNPREDICTABLE(expr) __builtin_expect_with_probability(!!(expr), 1, 0.5) +#endif +#elif \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect(!!(expr), (expected)) : (((void) (expected)), !!(expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + JSON_HEDLEY_CONSTEXPR double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + JSON_HEDLEY_CONSTEXPR double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (((void) (expected)), !!(expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else + #define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) + #define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif JSON_HEDLEY_TI_VERSION_CHECK(7,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else + #define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) + #define JSON_HEDLEY_PRIVATE + #define JSON_HEDLEY_PUBLIC __declspec(dllexport) + #define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else + #if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_TI_VERSION_CHECK(7,3,0) && defined(__TI_EABI__) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) + #define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) + #define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) + #else + #define JSON_HEDLEY_PRIVATE + #define JSON_HEDLEY_PUBLIC + #endif + #define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(fallthrough,7,0,0) && !defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(JSON_HEDLEY_SUNPRO_VERSION) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + JSON_HEDLEY_HAS_FEATURE(c_static_assert) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + (defined(__cplusplus) && JSON_HEDLEY_TI_VERSION_CHECK(8,3,0)) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (*((T*) &(expr))) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_CPP_CAST(T, expr) static_cast(expr) +#else + #define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 +#elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 +#endif + +// disable float-equal warnings on GCC/clang +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdocumentation" +#endif + +// allow to disable exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + + +namespace nlohmann +{ +namespace detail +{ +//////////////// +// exceptions // +//////////////// + +/*! +@brief general exception of the @ref basic_json class + +This class is an extension of `std::exception` objects with a member @a id for +exception ids. It is used as the base class for all exceptions thrown by the +@ref basic_json class. This class can hence be used as "wildcard" to catch +exceptions. + +Subclasses: +- @ref parse_error for exceptions indicating a parse error +- @ref invalid_iterator for exceptions indicating errors with iterators +- @ref type_error for exceptions indicating executing a member function with + a wrong type +- @ref out_of_range for exceptions indicating access out of the defined range +- @ref other_error for exceptions indicating other library errors + +@internal +@note To have nothrow-copy-constructible exceptions, we internally use + `std::runtime_error` which can cope with arbitrary-length error messages. + Intermediate strings are built with static functions and then passed to + the actual constructor. +@endinternal + +@liveexample{The following code shows how arbitrary library exceptions can be +caught.,exception} + +@since version 3.0.0 +*/ +class exception : public std::exception +{ + public: + /// returns the explanatory string + JSON_HEDLEY_RETURNS_NON_NULL + const char* what() const noexcept override + { + return m.what(); + } + + /// the id of the exception + const int id; + + protected: + JSON_HEDLEY_NON_NULL(3) + exception(int id_, const char* what_arg) : id(id_), m(what_arg) {} + + static std::string name(const std::string& ename, int id_) + { + return "[json.exception." + ename + "." + std::to_string(id_) + "] "; + } + + private: + /// an exception object as storage for error messages + std::runtime_error m; +}; + +/*! +@brief exception indicating a parse error + +This exception is thrown by the library when a parse error occurs. Parse errors +can occur during the deserialization of JSON text, CBOR, MessagePack, as well +as when using JSON Patch. + +Member @a byte holds the byte index of the last read character in the input +file. + +Exceptions have ids 1xx. + +name / id | example message | description +------------------------------ | --------------- | ------------------------- +json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position. +json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point. +json.exception.parse_error.103 | parse error: code points above 0x10FFFF are invalid | Unicode supports code points up to 0x10FFFF. Code points above 0x10FFFF are invalid. +json.exception.parse_error.104 | parse error: JSON patch must be an array of objects | [RFC 6902](https://tools.ietf.org/html/rfc6902) requires a JSON Patch document to be a JSON document that represents an array of objects. +json.exception.parse_error.105 | parse error: operation must have string member 'op' | An operation of a JSON Patch document must contain exactly one "op" member, whose value indicates the operation to perform. Its value must be one of "add", "remove", "replace", "move", "copy", or "test"; other values are errors. +json.exception.parse_error.106 | parse error: array index '01' must not begin with '0' | An array index in a JSON Pointer ([RFC 6901](https://tools.ietf.org/html/rfc6901)) may be `0` or any number without a leading `0`. +json.exception.parse_error.107 | parse error: JSON pointer must be empty or begin with '/' - was: 'foo' | A JSON Pointer must be a Unicode string containing a sequence of zero or more reference tokens, each prefixed by a `/` character. +json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences. +json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number. +json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. +json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. +json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). + +@note For an input with n bytes, 1 is the index of the first character and n+1 + is the index of the terminating null byte or the end of file. This also + holds true when reading a byte vector (CBOR or MessagePack). + +@liveexample{The following code shows how a `parse_error` exception can be +caught.,parse_error} + +@sa - @ref exception for the base class of the library exceptions +@sa - @ref invalid_iterator for exceptions indicating errors with iterators +@sa - @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa - @ref out_of_range for exceptions indicating access out of the defined range +@sa - @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class parse_error : public exception +{ + public: + /*! + @brief create a parse error exception + @param[in] id_ the id of the exception + @param[in] pos the position where the error occurred (or with + chars_read_total=0 if the position cannot be + determined) + @param[in] what_arg the explanatory string + @return parse_error object + */ + static parse_error create(int id_, const position_t& pos, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + position_string(pos) + ": " + what_arg; + return parse_error(id_, pos.chars_read_total, w.c_str()); + } + + static parse_error create(int id_, std::size_t byte_, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + (byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") + + ": " + what_arg; + return parse_error(id_, byte_, w.c_str()); + } + + /*! + @brief byte index of the parse error + + The byte index of the last read character in the input file. + + @note For an input with n bytes, 1 is the index of the first character and + n+1 is the index of the terminating null byte or the end of file. + This also holds true when reading a byte vector (CBOR or MessagePack). + */ + const std::size_t byte; + + private: + parse_error(int id_, std::size_t byte_, const char* what_arg) + : exception(id_, what_arg), byte(byte_) {} + + static std::string position_string(const position_t& pos) + { + return " at line " + std::to_string(pos.lines_read + 1) + + ", column " + std::to_string(pos.chars_read_current_line); + } +}; + +/*! +@brief exception indicating errors with iterators + +This exception is thrown if iterators passed to a library function do not match +the expected semantics. + +Exceptions have ids 2xx. + +name / id | example message | description +----------------------------------- | --------------- | ------------------------- +json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion. +json.exception.invalid_iterator.203 | iterators do not fit current value | Either iterator passed to function @ref erase(IteratorType first, IteratorType last) does not belong to the JSON value from which values shall be erased. It hence does not define a valid range to delete values from. +json.exception.invalid_iterator.204 | iterators out of range | When an iterator range for a primitive type (number, boolean, or string) is passed to a constructor or an erase function, this range has to be exactly (@ref begin(), @ref end()), because this is the only way the single stored value is expressed. All other ranges are invalid. +json.exception.invalid_iterator.205 | iterator out of range | When an iterator for a primitive type (number, boolean, or string) is passed to an erase function, the iterator has to be the @ref begin() iterator, because it is the only way to address the stored value. All other iterators are invalid. +json.exception.invalid_iterator.206 | cannot construct with iterators from null | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) belong to a JSON null value and hence to not define a valid range. +json.exception.invalid_iterator.207 | cannot use key() for non-object iterators | The key() member function can only be used on iterators belonging to a JSON object, because other types do not have a concept of a key. +json.exception.invalid_iterator.208 | cannot use operator[] for object iterators | The operator[] to specify a concrete offset cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.209 | cannot use offsets with object iterators | The offset operators (+, -, +=, -=) cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to. +json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container. +json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compared, because JSON objects are unordered. +json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin(). + +@liveexample{The following code shows how an `invalid_iterator` exception can be +caught.,invalid_iterator} + +@sa - @ref exception for the base class of the library exceptions +@sa - @ref parse_error for exceptions indicating a parse error +@sa - @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa - @ref out_of_range for exceptions indicating access out of the defined range +@sa - @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class invalid_iterator : public exception +{ + public: + static invalid_iterator create(int id_, const std::string& what_arg) + { + std::string w = exception::name("invalid_iterator", id_) + what_arg; + return invalid_iterator(id_, w.c_str()); + } + + private: + JSON_HEDLEY_NON_NULL(3) + invalid_iterator(int id_, const char* what_arg) + : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating executing a member function with a wrong type + +This exception is thrown in case of a type error; that is, a library function is +executed on a JSON value whose type does not match the expected semantics. + +Exceptions have ids 3xx. + +name / id | example message | description +----------------------------- | --------------- | ------------------------- +json.exception.type_error.301 | cannot create object from initializer list | To create an object from an initializer list, the initializer list must consist only of a list of pairs whose first element is a string. When this constraint is violated, an array is created instead. +json.exception.type_error.302 | type must be object, but is array | During implicit or explicit value conversion, the JSON type must be compatible to the target type. For instance, a JSON string can only be converted into string types, but not into numbers or boolean types. +json.exception.type_error.303 | incompatible ReferenceType for get_ref, actual type is object | To retrieve a reference to a value stored in a @ref basic_json object with @ref get_ref, the type of the reference must match the value type. For instance, for a JSON array, the @a ReferenceType must be @ref array_t &. +json.exception.type_error.304 | cannot use at() with string | The @ref at() member functions can only be executed for certain JSON types. +json.exception.type_error.305 | cannot use operator[] with string | The @ref operator[] member functions can only be executed for certain JSON types. +json.exception.type_error.306 | cannot use value() with string | The @ref value() member functions can only be executed for certain JSON types. +json.exception.type_error.307 | cannot use erase() with string | The @ref erase() member functions can only be executed for certain JSON types. +json.exception.type_error.308 | cannot use push_back() with string | The @ref push_back() and @ref operator+= member functions can only be executed for certain JSON types. +json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types. +json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types. +json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types. +json.exception.type_error.312 | cannot use update() with string | The @ref update() member functions can only be executed for certain JSON types. +json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined. +json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. +json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. +json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | + +@liveexample{The following code shows how a `type_error` exception can be +caught.,type_error} + +@sa - @ref exception for the base class of the library exceptions +@sa - @ref parse_error for exceptions indicating a parse error +@sa - @ref invalid_iterator for exceptions indicating errors with iterators +@sa - @ref out_of_range for exceptions indicating access out of the defined range +@sa - @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class type_error : public exception +{ + public: + static type_error create(int id_, const std::string& what_arg) + { + std::string w = exception::name("type_error", id_) + what_arg; + return type_error(id_, w.c_str()); + } + + private: + JSON_HEDLEY_NON_NULL(3) + type_error(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating access out of the defined range + +This exception is thrown in case a library function is called on an input +parameter that exceeds the expected range, for instance in case of array +indices or nonexisting object keys. + +Exceptions have ids 4xx. + +name / id | example message | description +------------------------------- | --------------- | ------------------------- +json.exception.out_of_range.401 | array index 3 is out of range | The provided array index @a i is larger than @a size-1. +json.exception.out_of_range.402 | array index '-' (3) is out of range | The special array index `-` in a JSON Pointer never describes a valid element of the array, but the index past the end. That is, it can only be used to add elements at this position, but not to read it. +json.exception.out_of_range.403 | key 'foo' not found | The provided key was not found in the JSON object. +json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. +json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. +json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | +json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | + +@liveexample{The following code shows how an `out_of_range` exception can be +caught.,out_of_range} + +@sa - @ref exception for the base class of the library exceptions +@sa - @ref parse_error for exceptions indicating a parse error +@sa - @ref invalid_iterator for exceptions indicating errors with iterators +@sa - @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa - @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class out_of_range : public exception +{ + public: + static out_of_range create(int id_, const std::string& what_arg) + { + std::string w = exception::name("out_of_range", id_) + what_arg; + return out_of_range(id_, w.c_str()); + } + + private: + JSON_HEDLEY_NON_NULL(3) + out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating other library errors + +This exception is thrown in case of errors that cannot be classified with the +other exception types. + +Exceptions have ids 5xx. + +name / id | example message | description +------------------------------ | --------------- | ------------------------- +json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed. + +@sa - @ref exception for the base class of the library exceptions +@sa - @ref parse_error for exceptions indicating a parse error +@sa - @ref invalid_iterator for exceptions indicating errors with iterators +@sa - @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa - @ref out_of_range for exceptions indicating access out of the defined range + +@liveexample{The following code shows how an `other_error` exception can be +caught.,other_error} + +@since version 3.0.0 +*/ +class other_error : public exception +{ + public: + static other_error create(int id_, const std::string& what_arg) + { + std::string w = exception::name("other_error", id_) + what_arg; + return other_error(id_, w.c_str()); + } + + private: + JSON_HEDLEY_NON_NULL(3) + other_error(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; +} // namespace detail +} // namespace nlohmann + +// #include + +// #include + + +#include // not +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type + +namespace nlohmann +{ +namespace detail +{ +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +template +using uncvref_t = typename std::remove_cv::type>::type; + +// implementation of C++14 index_sequence and affiliates +// source: https://stackoverflow.com/a/32223343 +template +struct index_sequence +{ + using type = index_sequence; + using value_type = std::size_t; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +template +struct merge_and_renumber; + +template +struct merge_and_renumber, index_sequence> + : index_sequence < I1..., (sizeof...(I1) + I2)... > {}; + +template +struct make_index_sequence + : merge_and_renumber < typename make_index_sequence < N / 2 >::type, + typename make_index_sequence < N - N / 2 >::type > {}; + +template<> struct make_index_sequence<0> : index_sequence<> {}; +template<> struct make_index_sequence<1> : index_sequence<0> {}; + +template +using index_sequence_for = make_index_sequence; + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static constexpr T value{}; +}; + +template +constexpr T static_const::value; +} // namespace detail +} // namespace nlohmann + +// #include + + +#include // not +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval + +// #include + + +#include // random_access_iterator_tag + +// #include + + +namespace nlohmann +{ +namespace detail +{ +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; +} // namespace detail +} // namespace nlohmann + +// #include + + +namespace nlohmann +{ +namespace detail +{ +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; +} // namespace detail +} // namespace nlohmann + +// #include + +// #include + +// #include + + +#include + +// #include + + +// http://en.cppreference.com/w/cpp/experimental/is_detected +namespace nlohmann +{ +namespace detail +{ +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template