From 50230621c423aff2d5cefe880fc4cb087b7d68e3 Mon Sep 17 00:00:00 2001 From: MeikWimm <110353873+MeikWimm@users.noreply.github.com> Date: Sun, 30 Jun 2024 10:52:37 +0000 Subject: [PATCH] Changed scoring with bm25 search() function --- .../baseline-retrieval-system.ipynb | 4564 +---------------- evaluation/initial-evaluation.ipynb | 146 +- 2 files changed, 213 insertions(+), 4497 deletions(-) diff --git a/baseline-retrieval-system/baseline-retrieval-system.ipynb b/baseline-retrieval-system/baseline-retrieval-system.ipynb index c8eed51..dc023db 100644 --- a/baseline-retrieval-system/baseline-retrieval-system.ipynb +++ b/baseline-retrieval-system/baseline-retrieval-system.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -33,43 +33,43 @@ "Requirement already satisfied: tira in /usr/local/lib/python3.10/dist-packages (0.0.134)\n", "Requirement already satisfied: ir-datasets in /usr/local/lib/python3.10/dist-packages (0.5.5)\n", "Requirement already satisfied: python-terrier in /usr/local/lib/python3.10/dist-packages (0.10.0)\n", - "Requirement already satisfied: requests==2.*,>=2.26 in /usr/local/lib/python3.10/dist-packages (from tira) (2.31.0)\n", - "Requirement already satisfied: docker==7.*,>=7.1.0 in /usr/local/lib/python3.10/dist-packages (from tira) (7.1.0)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from tira) (2.1.3)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tira) (4.66.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tira) (23.2)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from tira) (2.1.3)\n", + "Requirement already satisfied: docker==7.*,>=7.1.0 in /usr/local/lib/python3.10/dist-packages (from tira) (7.1.0)\n", "Requirement already satisfied: numpy==1.* in /usr/local/lib/python3.10/dist-packages (from tira) (1.26.2)\n", + "Requirement already satisfied: requests==2.*,>=2.26 in /usr/local/lib/python3.10/dist-packages (from tira) (2.31.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tira) (23.2)\n", "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from docker==7.*,>=7.1.0->tira) (2.1.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.6)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (2023.11.17)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.3.2)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (6.0.1)\n", - "Requirement already satisfied: lz4>=3.1.10 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.3.2)\n", - "Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.9.3)\n", - "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.5)\n", - "Requirement already satisfied: ijson>=3.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (3.2.3)\n", - "Requirement already satisfied: unlzw3>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.2)\n", - "Requirement already satisfied: warc3-wet>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.3)\n", "Requirement already satisfied: beautifulsoup4>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.12.2)\n", + "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.5)\n", "Requirement already satisfied: pyautocorpus>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.12)\n", - "Requirement already satisfied: zlib-state>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.6)\n", "Requirement already satisfied: inscriptis>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.3.2)\n", + "Requirement already satisfied: warc3-wet>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.3)\n", + "Requirement already satisfied: lz4>=3.1.10 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.3.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (6.0.1)\n", "Requirement already satisfied: trec-car-tools>=2.5.4 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.6)\n", - "Requirement already satisfied: nptyping==1.4.4 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.4.4)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.11.4)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: unlzw3>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.2)\n", + "Requirement already satisfied: zlib-state>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.6)\n", + "Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.9.3)\n", + "Requirement already satisfied: ijson>=3.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (3.2.3)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.7)\n", - "Requirement already satisfied: pyjnius>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.6.1)\n", - "Requirement already satisfied: deprecated in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.2.14)\n", "Requirement already satisfied: ir-measures>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.1.2)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from python-terrier) (10.1.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.11.4)\n", "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.2)\n", - "Requirement already satisfied: chest in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.2.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.1.2)\n", "Requirement already satisfied: statsmodels in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.14.0)\n", - "Requirement already satisfied: matchpy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.5)\n", + "Requirement already satisfied: nptyping==1.4.4 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.4.4)\n", + "Requirement already satisfied: chest in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.2.3)\n", "Requirement already satisfied: pytrec-eval-terrier>=0.5.3 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.6)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: pyjnius>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.6.1)\n", + "Requirement already satisfied: deprecated in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.2.14)\n", + "Requirement already satisfied: matchpy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.5)\n", "Requirement already satisfied: typish>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from nptyping==1.4.4->python-terrier) (1.9.3)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4>=4.4.1->ir-datasets) (2.5)\n", "Requirement already satisfied: cwl-eval>=1.0.10 in /usr/local/lib/python3.10/dist-packages (from ir-measures>=0.3.1->python-terrier) (1.0.12)\n", @@ -78,92 +78,92 @@ "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated->python-terrier) (1.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->python-terrier) (2.1.3)\n", "Requirement already satisfied: multiset<3.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from matchpy->python-terrier) (2.1.1)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2023.3.post1)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2.8.2)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2023.3)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->tira) (2023.3.post1)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->python-terrier) (3.2.0)\n", "Requirement already satisfied: patsy>=0.5.2 in /usr/local/lib/python3.10/dist-packages (from statsmodels->python-terrier) (0.5.4)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.2->statsmodels->python-terrier) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0mRequirement already satisfied: spacy in /usr/local/lib/python3.10/dist-packages (3.7.5)\n", "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.5)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.7.4)\n", + "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (8.2.5)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.12)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.4.8)\n", "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.12.3)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.9)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.1.2)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.7.4)\n", + "Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.26.2)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.8)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.10)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (4.66.1)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.10)\n", "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.31.0)\n", "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy) (59.6.0)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.12)\n", "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.1.3)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.8)\n", "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.4.0)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.1.2)\n", - "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (8.2.5)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (4.66.1)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.10)\n", - "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.4.8)\n", - "Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.26.2)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (23.2)\n", - "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.9)\n", "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.4.1)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.10)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (23.2)\n", "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.4)\n", "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.8.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.1.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.6)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2023.11.17)\n", "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.5)\n", "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n", - "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.7)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (13.7.1)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (1.5.4)\n", - "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.0.4)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.7)\n", "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (0.18.1)\n", + "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.0.4)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy) (2.1.3)\n", "Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (2.17.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (3.0.0)\n", "Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy) (1.16.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (0.1.2)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0mCollecting en-core-web-md==3.7.1\n", " Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)\n", "Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.10/dist-packages (from en-core-web-md==3.7.1) (3.7.5)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.12)\n", "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.10)\n", - "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.10)\n", - "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.4.1)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.7.4)\n", + "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.12.3)\n", "Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.26.2)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.31.0)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (23.2)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.66.1)\n", + "Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.4.1)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.1.3)\n", "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.2.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (23.2)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.5)\n", "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.9)\n", - "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (59.6.0)\n", - "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.1.3)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.1.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.8)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.31.0)\n", "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.4.8)\n", "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.4.0)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.12)\n", - "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.5)\n", - "Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.12.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.1.2)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (59.6.0)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.7.4)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.66.1)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.10)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.8)\n", "Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.2.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.18.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.0)\n", "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.8.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2023.11.17)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.1.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.6)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2023.11.17)\n", - "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.5)\n", "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.11)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.5)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (13.7.1)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.1.7)\n", - "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.18.1)\n", "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (7.0.4)\n", + "Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.18.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.1.3)\n", "Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.2.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.0)\n", @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -201,2791 +201,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", - "\n", - "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Testing word: retrieval in query: retrieval system improving effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.83578 1.0 0.966667\n", - "Testing word: system in query: retrieval system improving effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.83578 1.0 0.966667\n", - "Testing word: improving in query: retrieval system improving effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.83578 1.0 0.966667\n", - "Testing word: improving in query: retrieval system improving improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.271088 0.333333 0.866667\n", - "Testing word: effectiveness in query: retrieval system improving effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.83578 1.0 0.966667\n", - "Testing word: that in query: retrieval system improving that\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.268455 0.333333 0.833333\n", - "Testing word: machine in query: machine learning language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158409 0.25 1.0\n", - "Testing word: improving in query: machine improving language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 1.0\n", - "Testing word: learning in query: machine learning language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158409 0.25 1.0\n", - "Testing word: language in query: machine learning language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158409 0.25 1.0\n", - "Testing word: effectiveness in query: machine learning language effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001495 0.333333\n", - "Testing word: identification in query: machine learning language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158409 0.25 1.0\n", - "Testing word: social in query: social media detect self harm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.710477 0.5 1.0\n", - "Testing word: media in query: social media detect self harm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.710477 0.5 1.0\n", - "Testing word: detect in query: social media detect self harm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.710477 0.5 1.0\n", - "Testing word: self in query: social media detect self harm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.710477 0.5 1.0\n", - "Testing word: harm in query: social media detect self harm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.710477 0.5 1.0\n", - "Testing word: stemming in query: stemming for arabic languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.56489 0.5 1.0\n", - "Testing word: for in query: stemming for arabic languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.56489 0.5 1.0\n", - "Testing word: language in query: stemming for language languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.923077\n", - "Testing word: arabic in query: stemming for arabic languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.56489 0.5 1.0\n", - "Testing word: language in query: stemming for arabic language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.56489 0.5 1.0\n", - "Testing word: languages in query: stemming for arabic languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.56489 0.5 1.0\n", - "Testing word: arabic in query: stemming for arabic arabic\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.617284 1.0 1.0\n", - "Testing word: audio in query: audio based animal recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.818182\n", - "Testing word: based in query: audio based animal recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.818182\n", - "Testing word: animal in query: audio based animal recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.818182\n", - "Testing word: effectiveness in query: audio based animal effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 1.0\n", - "Testing word: recognition in query: audio based animal recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.818182\n", - "Testing word: identification in query: audio based animal identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 1.0\n", - "Testing word: effectiveness in query: effectiveness different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.347826\n", - "Testing word: comparison in query: comparison different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.73913\n", - "Testing word: identification in query: identification different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001211 0.130435\n", - "Testing word: these in query: comparison these retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.73913\n", - "Testing word: different in query: comparison different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.73913\n", - "Testing word: comparison in query: comparison comparison retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.565217\n", - "Testing word: retrieval in query: comparison different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.73913\n", - "Testing word: comparison in query: comparison different retrieval comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.025641 0.434783\n", - "Testing word: models in query: comparison different retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.73913\n", - "Testing word: cache in query: cache architecture\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.930569 1.0 1.0\n", - "Testing word: architecture in query: cache architecture\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.930569 1.0 1.0\n", - "Testing word: document in query: document scoping formula\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 1.0\n", - "Testing word: identification in query: identification scoping formula\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 1.0\n", - "Testing word: scoping in query: document scoping formula\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 1.0\n", - "Testing word: formula in query: document scoping formula\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 1.0\n", - "Testing word: pseudo in query: pseudo relevance feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.083725 0.166667 1.0\n", - "Testing word: document in query: pseudo document feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: improving in query: pseudo improving feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.101229 0.25 1.0\n", - "Testing word: effectiveness in query: pseudo effectiveness feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: recognition in query: pseudo recognition feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.074149 0.125 1.0\n", - "Testing word: relevance in query: pseudo relevance feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.083725 0.166667 1.0\n", - "Testing word: that in query: pseudo that feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078349 0.142857 1.0\n", - "Testing word: comparison in query: pseudo comparison feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.074149 0.125 1.0\n", - "Testing word: identification in query: pseudo identification feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078349 0.142857 1.0\n", - "Testing word: feedback in query: pseudo relevance feedback\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.083725 0.166667 1.0\n", - "Testing word: why in query: why to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: how in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: what in query: what to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: to in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: represent in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: natural in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: conversations in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: learning in query: how to represent natural learning in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001647 0.111111\n", - "Testing word: recognition in query: how to represent natural recognition in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001497 0.222222\n", - "Testing word: relevance in query: how to represent natural relevance in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: identification in query: how to represent natural identification in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: and in query: how to represent natural and in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: in in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: word in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: nets in query: how to represent natural conversations in word nets\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.222222\n", - "Testing word: algorithm in query: algorithm acceleration with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: improving in query: algorithm improving with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.699215 1.0 1.0\n", - "Testing word: effectiveness in query: algorithm effectiveness with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.598282 1.0 1.0\n", - "Testing word: recognition in query: algorithm recognition with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.361219 0.25 1.0\n", - "Testing word: relevance in query: algorithm relevance with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.629552 1.0 1.0\n", - "Testing word: acceleration in query: algorithm acceleration with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: system in query: algorithm system with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.598282 1.0 1.0\n", - "Testing word: identification in query: algorithm identification with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.629552 1.0 1.0\n", - "Testing word: with in query: algorithm acceleration with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: and in query: algorithm acceleration and nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: nvidia in query: algorithm acceleration with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: cuda in query: algorithm acceleration with nvidia cuda\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.459972 1.0 1.0\n", - "Testing word: recognition in query: recognition of algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.018868 0.035714\n", - "Testing word: mention in query: mention of algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.392857\n", - "Testing word: identification in query: identification of algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006944 0.071429\n", - "Testing word: of in query: mention of algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.392857\n", - "Testing word: algorithm in query: mention of algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.392857\n", - "Testing word: at in query: at least three authors\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.29026 0.5 0.666667\n", - "Testing word: least in query: at least three authors\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.29026 0.5 0.666667\n", - "Testing word: three in query: at least three authors\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.29026 0.5 0.666667\n", - "Testing word: authors in query: at least three authors\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.29026 0.5 0.666667\n", - "Testing word: german in query: german domain\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: domain in query: german domain\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recognition in query: recognition of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.764706\n", - "Testing word: mention in query: mention of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.823529\n", - "Testing word: identification in query: identification of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 0.764706\n", - "Testing word: of in query: mention of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.823529\n", - "Testing word: open in query: mention of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.823529\n", - "Testing word: source in query: mention of open source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.823529\n", - "Testing word: document in query: document of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.473684\n", - "Testing word: improving in query: improving of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.526316\n", - "Testing word: conversations in query: conversations of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.026316 0.421053\n", - "Testing word: effectiveness in query: effectiveness of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.043478 0.5\n", - "Testing word: recognition in query: recognition of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.03125 0.421053\n", - "Testing word: mention in query: mention of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012346 0.447368\n", - "Testing word: inclusion in query: inclusion of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.684211\n", - "Testing word: represent in query: represent of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.5\n", - "Testing word: relevance in query: relevance of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.473684\n", - "Testing word: acceleration in query: acceleration of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.026316 0.421053\n", - "Testing word: comparison in query: comparison of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.04 0.421053\n", - "Testing word: identification in query: identification of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.447368\n", - "Testing word: of in query: inclusion of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.684211\n", - "Testing word: text in query: inclusion of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.684211\n", - "Testing word: mining in query: inclusion of text mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.684211\n", - "Testing word: the in query: the ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: recognition in query: recognition ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.129875 0.111111 0.892857\n", - "Testing word: acceleration in query: acceleration ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: identification in query: identification ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.142795 0.142857 0.892857\n", - "Testing word: ethics in query: the ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: of in query: the ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: natural in query: the ethics of natural intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.058824 0.892857\n", - "Testing word: artificial in query: the ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: conversations in query: the ethics of artificial conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.857143\n", - "Testing word: effectiveness in query: the ethics of artificial effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.133052 0.125 0.892857\n", - "Testing word: recognition in query: the ethics of artificial recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.785714\n", - "Testing word: intelligence in query: the ethics of artificial intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.892857\n", - "Testing word: relevance in query: the ethics of artificial relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.857143\n", - "Testing word: acceleration in query: the ethics of artificial acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.139618 0.142857 0.821429\n", - "Testing word: identification in query: the ethics of artificial identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.785714\n", - "Testing word: machine in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: improving in query: machine improving for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.400671 0.5 0.85\n", - "Testing word: conversations in query: machine conversations for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006135 0.7\n", - "Testing word: learning in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: for in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: more in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: document in query: machine learning for more document results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.033333 0.5\n", - "Testing word: inclusion in query: machine learning for more inclusion results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005181 0.3\n", - "Testing word: relevant in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: relevance in query: machine learning for more relevance results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: effectiveness in query: machine learning for more relevant effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.286346 1.0 0.9\n", - "Testing word: results in query: machine learning for more relevant results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.439389 0.5 1.0\n", - "Testing word: crawling in query: crawling websites using machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144904 0.125 0.888889\n", - "Testing word: websites in query: crawling websites using machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144904 0.125 0.888889\n", - "Testing word: using in query: crawling websites using machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144904 0.125 0.888889\n", - "Testing word: machine in query: crawling websites using machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144904 0.125 0.888889\n", - "Testing word: improving in query: crawling websites using machine improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078349 0.142857 0.888889\n", - "Testing word: conversations in query: crawling websites using machine conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078349 0.142857 0.777778\n", - "Testing word: learning in query: crawling websites using machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144904 0.125 0.888889\n", - "Testing word: improving in query: improving influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.017241 0.5\n", - "Testing word: recommenders in query: recommenders influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 1.0\n", - "Testing word: effectiveness in query: effectiveness influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.7\n", - "Testing word: inclusion in query: inclusion influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.019231 0.5\n", - "Testing word: relevant in query: relevant influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014706 0.3\n", - "Testing word: relevance in query: relevance influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014706 0.3\n", - "Testing word: identification in query: identification influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.017241 0.5\n", - "Testing word: influence in query: recommenders influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 1.0\n", - "Testing word: effectiveness in query: recommenders effectiveness on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.9\n", - "Testing word: recognition in query: recommenders recognition on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.9\n", - "Testing word: inclusion in query: recommenders inclusion on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.9\n", - "Testing word: intelligence in query: recommenders intelligence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020408 0.9\n", - "Testing word: relevance in query: recommenders relevance on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006711 0.8\n", - "Testing word: on in query: recommenders influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 1.0\n", - "Testing word: websites in query: recommenders influence on websites\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.249664 0.333333 0.9\n", - "Testing word: users in query: recommenders influence on users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 1.0\n", - "Testing word: search in query: search engine caching effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.726413 1.0 1.0\n", - "Testing word: engine in query: search engine caching effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.726413 1.0 1.0\n", - "Testing word: improving in query: search engine improving effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.019231 0.636364\n", - "Testing word: recommenders in query: search engine recommenders effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003876 0.409091\n", - "Testing word: caching in query: search engine caching effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.726413 1.0 1.0\n", - "Testing word: conversations in query: search engine conversations effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005181 0.454545\n", - "Testing word: using in query: search engine using effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005076 0.727273\n", - "Testing word: acceleration in query: search engine acceleration effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006061 0.5\n", - "Testing word: identification in query: search engine identification effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005988 0.590909\n", - "Testing word: effects in query: search engine caching effects\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.726413 1.0 1.0\n", - "Testing word: effectiveness in query: search engine caching effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.726413 1.0 1.0\n", - "Testing word: effectiveness in query: effectiveness product reviews\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.6\n", - "Testing word: consumer in query: consumer product reviews\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 1.0\n", - "Testing word: product in query: consumer product reviews\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 1.0\n", - "Testing word: consumer in query: consumer consumer reviews\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 1.0\n", - "Testing word: reviews in query: consumer product reviews\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 1.0\n", - "Testing word: document in query: document machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003247 0.333333\n", - "Testing word: recommenders in query: recommenders machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00142 0.333333\n", - "Testing word: effects in query: effects machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004926 0.5\n", - "Testing word: the in query: the machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.107789 0.166667 0.666667\n", - "Testing word: caching in query: caching machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004237 0.5\n", - "Testing word: conversations in query: conversations machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001779 0.333333\n", - "Testing word: effectiveness in query: effectiveness machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004926 0.5\n", - "Testing word: recognition in query: recognition machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007092 0.5\n", - "Testing word: mention in query: mention machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004651 0.5\n", - "Testing word: limitations in query: limitations machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.472546 0.5 0.833333\n", - "Testing word: inclusion in query: inclusion machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006061 0.5\n", - "Testing word: relevance in query: relevance machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007874 0.5\n", - "Testing word: acceleration in query: acceleration machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005882 0.5\n", - "Testing word: comparison in query: comparison machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003984 0.333333\n", - "Testing word: identification in query: identification machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00216 0.333333\n", - "Testing word: and in query: and machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.107789 0.166667 0.666667\n", - "Testing word: machine in query: limitations machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.472546 0.5 0.833333\n", - "Testing word: improving in query: limitations machine improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009009 0.833333\n", - "Testing word: conversations in query: limitations machine conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00346 0.5\n", - "Testing word: learning in query: limitations machine learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.472546 0.5 0.833333\n", - "Testing word: medicine in query: medicine related research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327761 0.25 0.666667\n", - "Testing word: stemming in query: medicine stemming research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.129875 0.111111 0.666667\n", - "Testing word: related in query: medicine related research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327761 0.25 0.666667\n", - "Testing word: limitations in query: medicine limitations research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.284814 0.166667 0.666667\n", - "Testing word: inclusion in query: medicine inclusion research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.284814 0.166667 0.666667\n", - "Testing word: relevant in query: medicine relevant research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.303062 0.25 0.62963\n", - "Testing word: relevance in query: medicine relevance research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.303062 0.25 0.62963\n", - "Testing word: identification in query: medicine identification research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.504906 1.0 0.555556\n", - "Testing word: research in query: medicine related research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327761 0.25 0.666667\n", - "Testing word: relevance in query: medicine related relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.322272 0.5 0.592593\n", - "Testing word: identification in query: medicine related identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.432318 1.0 0.518519\n", - "Testing word: natural in query: natural language processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.714286\n", - "Testing word: artificial in query: artificial language processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006623 0.464286\n", - "Testing word: language in query: natural language processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.714286\n", - "Testing word: languages in query: natural languages processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.714286\n", - "Testing word: arabic in query: natural arabic processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001786 0.357143\n", - "Testing word: improving in query: natural language improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.019608 0.321429\n", - "Testing word: recommenders in query: natural language recommenders\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.286346 1.0 0.071429\n", - "Testing word: caching in query: natural language caching\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002915 0.357143\n", - "Testing word: effectiveness in query: natural language effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.017544 0.464286\n", - "Testing word: related in query: natural language related\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012048 0.5\n", - "Testing word: artificial in query: natural language artificial\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.016129 0.392857\n", - "Testing word: using in query: natural language using\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.058824 0.357143\n", - "Testing word: limitations in query: natural language limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008475 0.321429\n", - "Testing word: mining in query: natural language mining\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005587 0.107143\n", - "Testing word: product in query: natural language product\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: relevance in query: natural language relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014925 0.321429\n", - "Testing word: acceleration in query: natural language acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003367 0.357143\n", - "Testing word: processing in query: natural language processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.714286\n", - "Testing word: consumer in query: natural language consumer\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002288 0.285714\n", - "Testing word: system in query: natural language system\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.026316 0.428571\n", - "Testing word: identification in query: natural language identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.045455 0.071429\n", - "Testing word: algorithm in query: algorithm based ranking\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.736842\n", - "Testing word: graph in query: graph based ranking\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652864 1.0 0.894737\n", - "Testing word: based in query: graph based ranking\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652864 1.0 0.894737\n", - "Testing word: ranking in query: graph based ranking\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652864 1.0 0.894737\n", - "Testing word: research in query: research studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.275862\n", - "Testing word: medicine in query: medicine studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.344828\n", - "Testing word: medical in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: research in query: medical research that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.503324 1.0 0.965517\n", - "Testing word: studies in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: it in query: medical studies it use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: where in query: medical studies where use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: these in query: medical studies these use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: somethin in query: medical studies somethin use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: this in query: medical studies this use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: they in query: medical studies they use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: there in query: medical studies there use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: effectiveness in query: medical studies effectiveness use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.931034\n", - "Testing word: what in query: medical studies what use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: nothin in query: medical studies nothin use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: not in query: medical studies not use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: relevance in query: medical studies relevance use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.542364 1.0 0.931034\n", - "Testing word: that in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: use in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: using in query: medical studies that using information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.606439 1.0 0.931034\n", - "Testing word: document in query: medical studies that use document retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.931034\n", - "Testing word: websites in query: medical studies that use websites retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.287626 0.5 0.931034\n", - "Testing word: research in query: medical studies that use research retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.67587 1.0 0.965517\n", - "Testing word: recommenders in query: medical studies that use recommenders retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173667 0.333333 0.931034\n", - "Testing word: information in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: conversations in query: medical studies that use conversations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.931034\n", - "Testing word: effectiveness in query: medical studies that use effectiveness retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.444097 1.0 0.931034\n", - "Testing word: recognition in query: medical studies that use recognition retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.931034\n", - "Testing word: limitations in query: medical studies that use limitations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.280882 0.5 0.931034\n", - "Testing word: inclusion in query: medical studies that use inclusion retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.931034\n", - "Testing word: relevant in query: medical studies that use relevant retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.542364 1.0 0.931034\n", - "Testing word: intelligence in query: medical studies that use intelligence retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.252065 0.333333 0.931034\n", - "Testing word: relevance in query: medical studies that use relevance retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.542364 1.0 0.931034\n", - "Testing word: acceleration in query: medical studies that use acceleration retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293437 0.5 0.931034\n", - "Testing word: processing in query: medical studies that use processing retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.931034\n", - "Testing word: identification in query: medical studies that use identification retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173187 0.25 0.931034\n", - "Testing word: retrieval in query: medical studies that use information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538411 1.0 0.931034\n", - "Testing word: document in query: document retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.289523 1.0 0.625\n", - "Testing word: websites in query: websites retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.5625\n", - "Testing word: research in query: research retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.625\n", - "Testing word: recommenders in query: recommenders retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.019608 0.46875\n", - "Testing word: information in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: conversations in query: conversations retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012048 0.4375\n", - "Testing word: effectiveness in query: effectiveness retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.428385 1.0 0.5625\n", - "Testing word: recognition in query: recognition retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.46875\n", - "Testing word: limitations in query: limitations retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.59375\n", - "Testing word: inclusion in query: inclusion retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02439 0.59375\n", - "Testing word: relevant in query: relevant retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.4375\n", - "Testing word: intelligence in query: intelligence retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011236 0.46875\n", - "Testing word: relevance in query: relevance retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.4375\n", - "Testing word: acceleration in query: acceleration retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.021739 0.59375\n", - "Testing word: processing in query: processing retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.59375\n", - "Testing word: identification in query: identification retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02 0.46875\n", - "Testing word: retrieval in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: on in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: these in query: information retrieval on these language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: different in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: comparison in query: information retrieval on comparison language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.233651 0.5 0.5\n", - "Testing word: language in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: languages in query: information retrieval on different languages sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: arabic in query: information retrieval on different arabic sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004237 0.21875\n", - "Testing word: information in query: information retrieval on different language information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.16422 0.25 0.59375\n", - "Testing word: sources in query: information retrieval on different language sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: source in query: information retrieval on different language source\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.65625\n", - "Testing word: papers in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: it in query: papers it compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: where in query: papers where compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: these in query: papers these compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: somethin in query: papers somethin compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: this in query: papers this compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: they in query: papers they compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: there in query: papers there compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: effectiveness in query: papers effectiveness compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.548135 0.5 0.848485\n", - "Testing word: what in query: papers what compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: nothin in query: papers nothin compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.486471 0.333333 0.878788\n", - "Testing word: not in query: papers not compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: relevance in query: papers relevance compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.69697\n", - "Testing word: that in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: compare in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: different in query: papers that different multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.60514 1.0 0.818182\n", - "Testing word: comparison in query: papers that comparison multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.437352 1.0 0.818182\n", - "Testing word: three in query: papers that compare three information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.575758\n", - "Testing word: these in query: papers that compare these information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.575758\n", - "Testing word: different in query: papers that compare different information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.575758\n", - "Testing word: multiple in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: document in query: papers that compare multiple document retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.495939 1.0 0.69697\n", - "Testing word: websites in query: papers that compare multiple websites retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.848485\n", - "Testing word: research in query: papers that compare multiple research retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.353598 0.333333 0.909091\n", - "Testing word: recommenders in query: papers that compare multiple recommenders retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.727273\n", - "Testing word: information in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: sources in query: papers that compare multiple sources retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469 1.0 0.787879\n", - "Testing word: conversations in query: papers that compare multiple conversations retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.027778 0.787879\n", - "Testing word: effectiveness in query: papers that compare multiple effectiveness retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.548135 0.5 0.848485\n", - "Testing word: recognition in query: papers that compare multiple recognition retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.848485\n", - "Testing word: limitations in query: papers that compare multiple limitations retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.848485\n", - "Testing word: inclusion in query: papers that compare multiple inclusion retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.045455 0.878788\n", - "Testing word: relevant in query: papers that compare multiple relevant retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.69697\n", - "Testing word: intelligence in query: papers that compare multiple intelligence retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.848485\n", - "Testing word: relevance in query: papers that compare multiple relevance retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.283713 1.0 0.69697\n", - "Testing word: acceleration in query: papers that compare multiple acceleration retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.878788\n", - "Testing word: processing in query: papers that compare multiple processing retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.309352 0.333333 0.878788\n", - "Testing word: identification in query: papers that compare multiple identification retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.848485\n", - "Testing word: retrieval in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: sources in query: papers that compare multiple information retrieval sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.437352 1.0 0.757576\n", - "Testing word: algorithm in query: papers that compare multiple information retrieval algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.727273\n", - "Testing word: effectiveness in query: papers that compare multiple information retrieval effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.455779 0.5 0.818182\n", - "Testing word: limitations in query: papers that compare multiple information retrieval limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.142795 0.142857 0.787879\n", - "Testing word: processing in query: papers that compare multiple information retrieval processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.289977 0.333333 0.848485\n", - "Testing word: methods in query: papers that compare multiple information retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.567682 0.333333 0.878788\n", - "Testing word: models in query: papers that compare multiple information retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.437352 1.0 0.848485\n", - "Testing word: risks in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: of in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: document in query: risks of document retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.476749 0.5 0.916667\n", - "Testing word: websites in query: risks of websites retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.297272 0.5 0.875\n", - "Testing word: research in query: risks of research retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.393278 1.0 0.916667\n", - "Testing word: recommenders in query: risks of recommenders retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.393758 1.0 0.875\n", - "Testing word: information in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: sources in query: risks of sources retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.916667\n", - "Testing word: conversations in query: risks of conversations retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.447932 1.0 0.875\n", - "Testing word: effectiveness in query: risks of effectiveness retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.958333\n", - "Testing word: recognition in query: risks of recognition retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: limitations in query: risks of limitations retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.495939 1.0 0.916667\n", - "Testing word: inclusion in query: risks of inclusion retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: relevant in query: risks of relevant retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.384311 1.0 0.916667\n", - "Testing word: intelligence in query: risks of intelligence retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.875\n", - "Testing word: relevance in query: risks of relevance retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.384311 1.0 0.916667\n", - "Testing word: acceleration in query: risks of acceleration retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.46319 1.0 0.875\n", - "Testing word: processing in query: risks of processing retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.545156 1.0 0.916667\n", - "Testing word: identification in query: risks of identification retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.374666 1.0 0.875\n", - "Testing word: retrieval in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: in in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: social in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: media in query: risks of information retrieval in social media\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.469756 1.0 0.875\n", - "Testing word: actual in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: this in query: this experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.309352 0.333333 0.774194\n", - "Testing word: relevant in query: relevant experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.286346 1.0 0.677419\n", - "Testing word: relevance in query: relevance experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.286346 1.0 0.677419\n", - "Testing word: that in query: that experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.309352 0.333333 0.774194\n", - "Testing word: research in query: actual research that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.741935\n", - "Testing word: effects in query: actual effects that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.133052 0.125 0.774194\n", - "Testing word: studies in query: actual studies that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.349967 1.0 0.806452\n", - "Testing word: conversations in query: actual conversations that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.677419\n", - "Testing word: effectiveness in query: actual effectiveness that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.133052 0.125 0.774194\n", - "Testing word: experiments in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: artificial in query: actual artificial that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.741935\n", - "Testing word: using in query: actual using that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158507 0.2 0.83871\n", - "Testing word: limitations in query: actual limitations that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.234407 0.25 0.709677\n", - "Testing word: intelligence in query: actual intelligence that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.677419\n", - "Testing word: results in query: actual results that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.278481 0.5 0.806452\n", - "Testing word: relevance in query: actual relevance that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.366703 0.5 0.774194\n", - "Testing word: acceleration in query: actual acceleration that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.774194\n", - "Testing word: processing in query: actual processing that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.741935\n", - "Testing word: comparison in query: actual comparison that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.709677\n", - "Testing word: methods in query: actual methods that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144652 0.166667 0.741935\n", - "Testing word: identification in query: actual identification that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.136985 0.142857 0.709677\n", - "Testing word: models in query: actual models that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.147829 0.166667 0.806452\n", - "Testing word: it in query: actual experiments it strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: where in query: actual experiments where strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: actual in query: actual experiments actual strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.507718 1.0 0.612903\n", - "Testing word: these in query: actual experiments these strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: somethin in query: actual experiments somethin strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: this in query: actual experiments this strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: they in query: actual experiments they strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: there in query: actual experiments there strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: effectiveness in query: actual experiments effectiveness strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327761 0.25 0.774194\n", - "Testing word: what in query: actual experiments what strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: nothin in query: actual experiments nothin strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: not in query: actual experiments not strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: relevance in query: actual experiments relevance strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.425208 1.0 0.774194\n", - "Testing word: that in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: improving in query: actual experiments that improving theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.261443 0.333333 0.580645\n", - "Testing word: strengthen in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: effectiveness in query: actual experiments that effectiveness theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.506784 1.0 0.548387\n", - "Testing word: relevance in query: actual experiments that relevance theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.367921 1.0 0.516129\n", - "Testing word: research in query: actual experiments that strengthen research knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144652 0.166667 0.483871\n", - "Testing word: studies in query: actual experiments that strengthen studies knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.271914 0.5 0.483871\n", - "Testing word: influence in query: actual experiments that strengthen influence knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.212226 0.5 0.451613\n", - "Testing word: theoretical in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: effectiveness in query: actual experiments that strengthen effectiveness knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.363526 0.5 0.483871\n", - "Testing word: recognition in query: actual experiments that strengthen recognition knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.451613\n", - "Testing word: experiments in query: actual experiments that strengthen experiments knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.208294 0.5 0.516129\n", - "Testing word: limitations in query: actual experiments that strengthen limitations knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.203239 0.142857 0.451613\n", - "Testing word: inclusion in query: actual experiments that strengthen inclusion knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.419355\n", - "Testing word: intelligence in query: actual experiments that strengthen intelligence knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.419355\n", - "Testing word: architecture in query: actual experiments that strengthen architecture knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.419355\n", - "Testing word: relevance in query: actual experiments that strengthen relevance knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.419355\n", - "Testing word: acceleration in query: actual experiments that strengthen acceleration knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.419355\n", - "Testing word: comparison in query: actual experiments that strengthen comparison knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 0.451613\n", - "Testing word: methods in query: actual experiments that strengthen methods knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.357076 1.0 0.483871\n", - "Testing word: identification in query: actual experiments that strengthen identification knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.129875 0.111111 0.419355\n", - "Testing word: research in query: actual experiments that strengthen theoretical research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.434175 1.0 0.677419\n", - "Testing word: information in query: actual experiments that strengthen theoretical information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.356232 0.333333 0.709677\n", - "Testing word: conversations in query: actual experiments that strengthen theoretical conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.208273 0.166667 0.645161\n", - "Testing word: influence in query: actual experiments that strengthen theoretical influence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.30316 0.2 0.677419\n", - "Testing word: learning in query: actual experiments that strengthen theoretical learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.368376 0.333333 0.645161\n", - "Testing word: theoretical in query: actual experiments that strengthen theoretical theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.489648 0.333333 0.677419\n", - "Testing word: effectiveness in query: actual experiments that strengthen theoretical effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.315617 0.25 0.709677\n", - "Testing word: recognition in query: actual experiments that strengthen theoretical recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.236905 0.2 0.677419\n", - "Testing word: relevant in query: actual experiments that strengthen theoretical relevant\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.522496 1.0 0.677419\n", - "Testing word: intelligence in query: actual experiments that strengthen theoretical intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.645161\n", - "Testing word: knowledge in query: actual experiments that strengthen theoretical knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.227938 0.2 0.774194\n", - "Testing word: relevance in query: actual experiments that strengthen theoretical relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.522496 1.0 0.677419\n", - "Testing word: identification in query: actual experiments that strengthen theoretical identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.321951 0.25 0.645161\n", - "Testing word: fake in query: fake news detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: news in query: fake news detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: information in query: fake news information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: caching in query: fake news caching\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: detection in query: fake news detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: effectiveness in query: fake news effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.154574 0.2 0.965517\n", - "Testing word: recognition in query: fake news recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: detect in query: fake news detect\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: experiments in query: fake news experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.129875 0.111111 0.965517\n", - "Testing word: limitations in query: fake news limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.965517\n", - "Testing word: acceleration in query: fake news acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.965517\n", - "Testing word: processing in query: fake news processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.965517\n", - "Testing word: comparison in query: fake news comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.965517\n", - "Testing word: identification in query: fake news identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158507 0.2 0.965517\n", - "Testing word: multimedia in query: multimedia retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.617284 1.0 0.705882\n", - "Testing word: media in query: media retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.034483 0.117647\n", - "Testing word: multiple in query: multiple retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.032258 0.117647\n", - "Testing word: audio in query: audio retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.176471\n", - "Testing word: retrieval in query: multimedia retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.617284 1.0 0.705882\n", - "Testing word: improving in query: improving natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343697 0.5 0.511628\n", - "Testing word: recommenders in query: recommenders natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.188444 0.333333 0.465116\n", - "Testing word: information in query: information natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327307 0.5 0.581395\n", - "Testing word: caching in query: caching natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00625 0.534884\n", - "Testing word: detection in query: detection natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.44186\n", - "Testing word: effectiveness in query: effectiveness natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.534884\n", - "Testing word: experiments in query: experiments natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.488372\n", - "Testing word: related in query: related natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.327307 0.5 0.581395\n", - "Testing word: artificial in query: artificial natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.511628\n", - "Testing word: using in query: using natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.268553 0.333333 0.55814\n", - "Testing word: limitations in query: limitations natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.465116\n", - "Testing word: mining in query: mining natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.511628\n", - "Testing word: product in query: product natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020408 0.418605\n", - "Testing word: relevance in query: relevance natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011628 0.372093\n", - "Testing word: acceleration in query: acceleration natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02439 0.534884\n", - "Testing word: processing in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: consumer in query: consumer natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.032258 0.465116\n", - "Testing word: system in query: system natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.651163\n", - "Testing word: methods in query: methods natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.488372\n", - "Testing word: identification in query: identification natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.465116\n", - "Testing word: natural in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: artificial in query: processing artificial language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 0.418605\n", - "Testing word: language in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: languages in query: processing natural languages for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: arabic in query: processing natural arabic for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010309 0.511628\n", - "Testing word: for in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: document in query: processing natural language for document retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.287626 0.5 0.627907\n", - "Testing word: websites in query: processing natural language for websites retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 0.697674\n", - "Testing word: research in query: processing natural language for research retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.290625 0.5 0.581395\n", - "Testing word: recommenders in query: processing natural language for recommenders retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388244 1.0 0.697674\n", - "Testing word: information in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: sources in query: processing natural language for sources retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.212226 0.5 0.697674\n", - "Testing word: detection in query: processing natural language for detection retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.393758 1.0 0.674419\n", - "Testing word: conversations in query: processing natural language for conversations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.627907\n", - "Testing word: effectiveness in query: processing natural language for effectiveness retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.144652 0.166667 0.767442\n", - "Testing word: recognition in query: processing natural language for recognition retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.651163\n", - "Testing word: limitations in query: processing natural language for limitations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158507 0.2 0.697674\n", - "Testing word: inclusion in query: processing natural language for inclusion retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.72093\n", - "Testing word: relevant in query: processing natural language for relevant retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.604651\n", - "Testing word: intelligence in query: processing natural language for intelligence retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.05 0.72093\n", - "Testing word: knowledge in query: processing natural language for knowledge retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.627907\n", - "Testing word: relevance in query: processing natural language for relevance retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.604651\n", - "Testing word: acceleration in query: processing natural language for acceleration retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.72093\n", - "Testing word: processing in query: processing natural language for processing retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.396392 1.0 0.674419\n", - "Testing word: identification in query: processing natural language for identification retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.217261 0.5 0.697674\n", - "Testing word: retrieval in query: processing natural language for information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460013 1.0 0.72093\n", - "Testing word: document in query: document systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001218 0.02381\n", - "Testing word: research in query: research systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.190476\n", - "Testing word: recommenders in query: recommenders systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.696162 1.0 0.952381\n", - "Testing word: information in query: information systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.285714\n", - "Testing word: detection in query: detection systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: conversations in query: conversations systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.071429\n", - "Testing word: recommendation in query: recommendation systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.696162 1.0 0.952381\n", - "Testing word: effectiveness in query: effectiveness systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002825 0.047619\n", - "Testing word: recognition in query: recognition systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: mention in query: mention systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: limitations in query: limitations systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.05 0.02381\n", - "Testing word: inclusion in query: inclusion systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011765 0.190476\n", - "Testing word: relevant in query: relevant systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001093 0.02381\n", - "Testing word: relevance in query: relevance systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001093 0.02381\n", - "Testing word: acceleration in query: acceleration systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002128 0.166667\n", - "Testing word: processing in query: processing systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: identification in query: identification systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: detection in query: recommendation detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.017857 0.857143\n", - "Testing word: experiments in query: recommendation experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.208294 0.5 0.928571\n", - "Testing word: limitations in query: recommendation limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.857143\n", - "Testing word: systems in query: recommendation systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.696162 1.0 0.952381\n", - "Testing word: acceleration in query: recommendation acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011364 0.857143\n", - "Testing word: processing in query: recommendation processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.135685 0.125 0.857143\n", - "Testing word: system in query: recommendation system\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.696162 1.0 0.952381\n", - "Testing word: methods in query: recommendation methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.904762\n", - "Testing word: identification in query: recommendation identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.857143\n", - "Testing word: models in query: recommendation models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.833333\n", - "Testing word: recommenders in query: recommenders search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.55\n", - "Testing word: personalised in query: personalised search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.6\n", - "Testing word: relevant in query: relevant search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.289523 1.0 0.45\n", - "Testing word: search in query: personalised search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.6\n", - "Testing word: in in query: personalised search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.6\n", - "Testing word: e in query: personalised search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.6\n", - "Testing word: commerce in query: personalised search in e commerce\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.6\n", - "Testing word: consumer in query: personalised search in e consumer\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.25\n", - "Testing word: sentiment in query: sentiment analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.869565\n", - "Testing word: that in query: that analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010638 0.304348\n", - "Testing word: research in query: sentiment research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.782609\n", - "Testing word: information in query: sentiment information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.032258 0.73913\n", - "Testing word: studies in query: sentiment studies\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.826087\n", - "Testing word: detection in query: sentiment detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.058824 0.782609\n", - "Testing word: analysis in query: sentiment analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.869565\n", - "Testing word: theoretical in query: sentiment theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.136985 0.142857 0.73913\n", - "Testing word: effectiveness in query: sentiment effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011364 0.73913\n", - "Testing word: experiments in query: sentiment experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02381 0.695652\n", - "Testing word: relevance in query: sentiment relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00885 0.73913\n", - "Testing word: processing in query: sentiment processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.695652\n", - "Testing word: comparison in query: sentiment comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.01 0.73913\n", - "Testing word: methods in query: sentiment methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.025 0.782609\n", - "Testing word: identification in query: sentiment identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.015152 0.73913\n", - "Testing word: document in query: document retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.29849 1.0 0.666667\n", - "Testing word: websites in query: websites retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.179931 0.25 0.666667\n", - "Testing word: research in query: research retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.761905\n", - "Testing word: recommenders in query: recommenders retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.571429\n", - "Testing word: information in query: information retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 0.761905\n", - "Testing word: sources in query: sources retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.619048\n", - "Testing word: detection in query: detection retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.619048\n", - "Testing word: conversations in query: conversations retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.058824 0.571429\n", - "Testing word: recommendation in query: recommendation retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.571429\n", - "Testing word: analysis in query: analysis retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.571429\n", - "Testing word: effectiveness in query: effectiveness retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.761905\n", - "Testing word: recognition in query: recognition retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.619048\n", - "Testing word: limitations in query: limitations retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.666667\n", - "Testing word: inclusion in query: inclusion retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.714286\n", - "Testing word: relevant in query: relevant retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.239441 0.25 0.571429\n", - "Testing word: intelligence in query: intelligence retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.16422 0.25 0.666667\n", - "Testing word: informational in query: informational retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.761905\n", - "Testing word: knowledge in query: knowledge retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.666667\n", - "Testing word: relevance in query: relevance retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.239441 0.25 0.571429\n", - "Testing word: acceleration in query: acceleration retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.714286\n", - "Testing word: processing in query: processing retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.453743 1.0 0.619048\n", - "Testing word: identification in query: identification retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.571429\n", - "Testing word: retrieval in query: informational retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.761905\n", - "Testing word: use in query: informational retrieval use neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.217261 0.5 0.714286\n", - "Testing word: caching in query: informational retrieval caching neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.714286\n", - "Testing word: experiments in query: informational retrieval experiments neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.202483 0.5 0.809524\n", - "Testing word: using in query: informational retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.761905\n", - "Testing word: processing in query: informational retrieval processing neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.268737 0.5 0.714286\n", - "Testing word: neural in query: informational retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.761905\n", - "Testing word: websites in query: informational retrieval using neural websites\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.666667\n", - "Testing word: networks in query: informational retrieval using neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 0.761905\n", - "Testing word: caching in query: informational retrieval using neural caching\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.021277 0.666667\n", - "Testing word: multimedia in query: informational retrieval using neural multimedia\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008197 0.47619\n", - "Testing word: systems in query: informational retrieval using neural systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 0.714286\n", - "Testing word: query in query: query log analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.43463 0.333333 0.965517\n", - "Testing word: log in query: query log analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.43463 0.333333 0.965517\n", - "Testing word: research in query: query log research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 0.965517\n", - "Testing word: information in query: query log information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.374666 1.0 0.965517\n", - "Testing word: studies in query: query log studies\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.538886 1.0 0.965517\n", - "Testing word: detection in query: query log detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.055556 0.931034\n", - "Testing word: analysis in query: query log analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.43463 0.333333 0.965517\n", - "Testing word: theoretical in query: query log theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.179477 0.333333 0.931034\n", - "Testing word: effectiveness in query: query log effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.437352 1.0 0.965517\n", - "Testing word: experiments in query: query log experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.965517\n", - "Testing word: informational in query: query log informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.965517\n", - "Testing word: relevance in query: query log relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.862069\n", - "Testing word: processing in query: query log processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.284449 0.2 0.931034\n", - "Testing word: comparison in query: query log comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.135685 0.125 0.931034\n", - "Testing word: methods in query: query log methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.154574 0.2 0.931034\n", - "Testing word: identification in query: query log identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.965517\n", - "Testing word: entity in query: entity recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.923077\n", - "Testing word: information in query: entity information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.230769\n", - "Testing word: the in query: entity the\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.230769\n", - "Testing word: detection in query: entity detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007874 0.282051\n", - "Testing word: conversations in query: entity conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001266 0.051282\n", - "Testing word: influence in query: entity influence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002083 0.205128\n", - "Testing word: recommendation in query: entity recommendation\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: theoretical in query: entity theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003534 0.230769\n", - "Testing word: effectiveness in query: entity effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.055556 0.358974\n", - "Testing word: recognition in query: entity recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.923077\n", - "Testing word: mention in query: entity mention\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.102564\n", - "Testing word: limitations in query: entity limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010638 0.25641\n", - "Testing word: inclusion in query: entity inclusion\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005102 0.230769\n", - "Testing word: intelligence in query: entity intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001949 0.205128\n", - "Testing word: informational in query: entity informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006897 0.25641\n", - "Testing word: knowledge in query: entity knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004545 0.076923\n", - "Testing word: relevance in query: entity relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009709 0.25641\n", - "Testing word: acceleration in query: entity acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02381 0.25641\n", - "Testing word: identification in query: entity identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.230769\n", - "Testing word: document in query: document assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.705882\n", - "Testing word: improving in query: improving assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.676471\n", - "Testing word: actual in query: actual assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014706 0.617647\n", - "Testing word: strengthen in query: strengthen assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.026316 0.676471\n", - "Testing word: research in query: research assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.027027 0.647059\n", - "Testing word: recommenders in query: recommenders assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007519 0.294118\n", - "Testing word: information in query: information assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.735294\n", - "Testing word: conversations in query: conversations assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008621 0.323529\n", - "Testing word: influence in query: influence assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007937 0.588235\n", - "Testing word: recommendation in query: recommendation assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007519 0.294118\n", - "Testing word: analysis in query: analysis assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020408 0.647059\n", - "Testing word: theoretical in query: theoretical assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010753 0.617647\n", - "Testing word: effectiveness in query: effectiveness assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.647059\n", - "Testing word: recognition in query: recognition assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012346 0.617647\n", - "Testing word: experiments in query: experiments assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 0.676471\n", - "Testing word: related in query: related assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.735294\n", - "Testing word: limitations in query: limitations assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014286 0.617647\n", - "Testing word: inclusion in query: inclusion assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013333 0.617647\n", - "Testing word: relevant in query: relevant assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.371489 1.0 0.852941\n", - "Testing word: intelligence in query: intelligence assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003413 0.558824\n", - "Testing word: informational in query: informational assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011905 0.588235\n", - "Testing word: knowledge in query: knowledge assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.01087 0.617647\n", - "Testing word: relevance in query: relevance assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.371489 1.0 0.852941\n", - "Testing word: that in query: that assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.735294\n", - "Testing word: acceleration in query: acceleration assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010989 0.617647\n", - "Testing word: processing in query: processing assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.032258 0.735294\n", - "Testing word: comparison in query: comparison assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.558824\n", - "Testing word: identification in query: identification assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.015152 0.558824\n", - "Testing word: document in query: relevance document\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.205882\n", - "Testing word: improving in query: relevance improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.352941\n", - "Testing word: research in query: relevance research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.323529\n", - "Testing word: recommenders in query: relevance recommenders\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: information in query: relevance information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173667 0.333333 0.735294\n", - "Testing word: studies in query: relevance studies\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.273587 0.333333 0.411765\n", - "Testing word: detection in query: relevance detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003125 0.294118\n", - "Testing word: conversations in query: relevance conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recommendation in query: relevance recommendation\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: learning in query: relevance learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006667 0.323529\n", - "Testing word: analysis in query: relevance analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009901 0.470588\n", - "Testing word: theoretical in query: relevance theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002732 0.323529\n", - "Testing word: effectiveness in query: relevance effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.352941\n", - "Testing word: recognition in query: relevance recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005025 0.147059\n", - "Testing word: experiments in query: relevance experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.273587 0.333333 0.411765\n", - "Testing word: related in query: relevance related\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173667 0.333333 0.735294\n", - "Testing word: limitations in query: relevance limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00463 0.352941\n", - "Testing word: inclusion in query: relevance inclusion\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004255 0.558824\n", - "Testing word: relevant in query: relevance relevant\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173667 0.333333 0.735294\n", - "Testing word: systems in query: relevance systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.470588\n", - "Testing word: informational in query: relevance informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002242 0.264706\n", - "Testing word: knowledge in query: relevance knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002551 0.294118\n", - "Testing word: assessments in query: relevance assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.371489 1.0 0.852941\n", - "Testing word: results in query: relevance results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.5\n", - "Testing word: relevance in query: relevance relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.173667 0.333333 0.735294\n", - "Testing word: acceleration in query: relevance acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005618 0.647059\n", - "Testing word: processing in query: relevance processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.204834 0.333333 0.382353\n", - "Testing word: comparison in query: relevance comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.05 0.264706\n", - "Testing word: methods in query: relevance methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.352941\n", - "Testing word: identification in query: relevance identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001608 0.058824\n", - "Testing word: deep in query: deep neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.690194 1.0 0.941176\n", - "Testing word: neural in query: deep neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.690194 1.0 0.941176\n", - "Testing word: websites in query: deep neural websites\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.369336 0.5 0.941176\n", - "Testing word: networks in query: deep neural networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.690194 1.0 0.941176\n", - "Testing word: caching in query: deep neural caching\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.018182 0.941176\n", - "Testing word: multimedia in query: deep neural multimedia\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.289977 0.333333 0.911765\n", - "Testing word: systems in query: deep neural systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.203239 0.142857 0.941176\n", - "Testing word: document in query: document retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.275\n", - "Testing word: websites in query: websites retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001631 0.375\n", - "Testing word: research in query: research retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.384311 1.0 0.25\n", - "Testing word: recommenders in query: recommenders retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00216 0.025\n", - "Testing word: information in query: information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.768227 1.0 0.525\n", - "Testing word: sources in query: sources retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.01 0.1\n", - "Testing word: detection in query: detection retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006623 0.05\n", - "Testing word: conversations in query: conversations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recommendation in query: recommendation retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00216 0.025\n", - "Testing word: analysis in query: analysis retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.3\n", - "Testing word: effectiveness in query: effectiveness retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.15\n", - "Testing word: recognition in query: recognition retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: limitations in query: limitations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012821 0.375\n", - "Testing word: inclusion in query: inclusion retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002488 0.425\n", - "Testing word: relevant in query: relevant retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.3\n", - "Testing word: intelligence in query: intelligence retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.125\n", - "Testing word: informational in query: informational retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0013 0.275\n", - "Testing word: knowledge in query: knowledge retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.043478 0.1\n", - "Testing word: assessments in query: assessments retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00641 0.05\n", - "Testing word: relevance in query: relevance retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.3\n", - "Testing word: acceleration in query: acceleration retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002941 0.45\n", - "Testing word: processing in query: processing retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002597 0.1\n", - "Testing word: identification in query: identification retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001704 0.025\n", - "Testing word: retrieval in query: information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.768227 1.0 0.525\n", - "Testing word: research in query: research for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.684383 1.0 1.0\n", - "Testing word: information in query: information for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: studies in query: studies for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.712552 1.0 1.0\n", - "Testing word: detection in query: detection for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.65948 1.0 1.0\n", - "Testing word: analysis in query: analysis for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.668761 1.0 1.0\n", - "Testing word: theoretical in query: theoretical for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: effectiveness in query: effectiveness for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.640591 1.0 1.0\n", - "Testing word: experiments in query: experiments for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.672239 1.0 1.0\n", - "Testing word: informational in query: informational for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: assessments in query: assessments for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: relevance in query: relevance for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: processing in query: processing for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: comparison in query: comparison for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.684383 1.0 1.0\n", - "Testing word: methods in query: methods for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.647336 1.0 1.0\n", - "Testing word: identification in query: identification for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.652371 1.0 1.0\n", - "Testing word: for in query: analysis for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.668761 1.0 1.0\n", - "Testing word: android in query: analysis for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.668761 1.0 1.0\n", - "Testing word: websites in query: analysis for android websites\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.293456 1.0 1.0\n", - "Testing word: users in query: analysis for android users\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 1.0\n", - "Testing word: android in query: analysis for android android\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.217261 0.5 1.0\n", - "Testing word: apps in query: analysis for android apps\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.668761 1.0 1.0\n", - "Testing word: the in query: the university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: recognition in query: recognition university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.504906 1.0 1.0\n", - "Testing word: limitations in query: limitations university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.504906 1.0 1.0\n", - "Testing word: acceleration in query: acceleration university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: identification in query: identification university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: research in query: the research of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.199306 0.125 1.0\n", - "Testing word: studies in query: the studies of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.284814 0.166667 1.0\n", - "Testing word: university in query: the university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: of in query: the university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: amsterdam in query: the university of amsterdam\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.36678 0.2 1.0\n", - "Testing word: neural in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: ranking in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: for in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: commerce in query: neural ranking for commerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.074149 0.125 0.888889\n", - "Testing word: consumer in query: neural ranking for consumer product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.26582 0.5 0.888889\n", - "Testing word: ecommerce in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: product in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: processing in query: neural ranking for ecommerce processing search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.383343 1.0 0.888889\n", - "Testing word: consumer in query: neural ranking for ecommerce consumer search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.383343 1.0 0.888889\n", - "Testing word: search in query: neural ranking for ecommerce product search\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.517645 1.0 1.0\n", - "Testing word: websites in query: websites pages evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.923077\n", - "Testing word: web in query: web pages evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.681206 1.0 1.0\n", - "Testing word: websites in query: web websites evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.923077\n", - "Testing word: pages in query: web pages evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.681206 1.0 1.0\n", - "Testing word: research in query: web pages research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.052632 0.923077\n", - "Testing word: the in query: web pages the\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.923077\n", - "Testing word: detection in query: web pages detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.055556 0.923077\n", - "Testing word: conversations in query: web pages conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.923077\n", - "Testing word: influence in query: web pages influence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.037037 0.923077\n", - "Testing word: recommendation in query: web pages recommendation\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02439 0.923077\n", - "Testing word: evolution in query: web pages evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.681206 1.0 1.0\n", - "Testing word: analysis in query: web pages analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.923077\n", - "Testing word: theoretical in query: web pages theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.923077\n", - "Testing word: effectiveness in query: web pages effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.923077\n", - "Testing word: recognition in query: web pages recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 0.923077\n", - "Testing word: experiments in query: web pages experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.923077\n", - "Testing word: limitations in query: web pages limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.923077\n", - "Testing word: inclusion in query: web pages inclusion\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.030303 0.923077\n", - "Testing word: intelligence in query: web pages intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.028571 0.923077\n", - "Testing word: architecture in query: web pages architecture\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 0.923077\n", - "Testing word: knowledge in query: web pages knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.022222 0.923077\n", - "Testing word: relevance in query: web pages relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.023256 0.923077\n", - "Testing word: acceleration in query: web pages acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.033333 0.923077\n", - "Testing word: processing in query: web pages processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.923077\n", - "Testing word: identification in query: web pages identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 0.923077\n", - "Testing word: exhaustivity in query: exhaustivity of index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: of in query: exhaustivity of index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: index in query: exhaustivity of index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 1.0\n", - "Testing word: query in query: query optimization\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", - " similarity = token.similarity(vocab_word)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.706544 0.5 0.821429\n", - "Testing word: improving in query: query improving\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.357143\n", - "Testing word: research in query: query research\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.055556 0.107143\n", - "Testing word: recommenders in query: query recommenders\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: information in query: query information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009709 0.464286\n", - "Testing word: networks in query: query networks\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006329 0.25\n", - "Testing word: caching in query: query caching\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011905 0.285714\n", - "Testing word: detection in query: query detection\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012658 0.214286\n", - "Testing word: algorithm in query: query algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.464286\n", - "Testing word: recommendation in query: query recommendation\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: evolution in query: query evolution\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001815 0.214286\n", - "Testing word: analysis in query: query analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006211 0.142857\n", - "Testing word: theoretical in query: query theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.028571 0.142857\n", - "Testing word: effectiveness in query: query effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.392857\n", - "Testing word: recognition in query: query recognition\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: experiments in query: query experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004348 0.107143\n", - "Testing word: related in query: query related\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009709 0.464286\n", - "Testing word: multimedia in query: query multimedia\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001427 0.107143\n", - "Testing word: using in query: query using\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011111 0.428571\n", - "Testing word: limitations in query: query limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0125 0.25\n", - "Testing word: inclusion in query: query inclusion\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002451 0.25\n", - "Testing word: intelligence in query: query intelligence\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.071429\n", - "Testing word: systems in query: query systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008621 0.285714\n", - "Testing word: informational in query: query informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001447 0.107143\n", - "Testing word: assessments in query: query assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.037037 0.142857\n", - "Testing word: product in query: query product\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: relevance in query: query relevance\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009615 0.142857\n", - "Testing word: optimization in query: query optimization\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.706544 0.5 0.821429\n", - "Testing word: acceleration in query: query acceleration\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002688 0.321429\n", - "Testing word: processing in query: query processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.357143\n", - "Testing word: system in query: query system\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008621 0.285714\n", - "Testing word: comparison in query: query comparison\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001517 0.071429\n", - "Testing word: methods in query: query methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007042 0.214286\n", - "Testing word: identification in query: query identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: cosine in query: cosine similarity vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.636364\n", - "Testing word: actual in query: cosine actual vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.818182\n", - "Testing word: influence in query: cosine influence vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.727273\n", - "Testing word: evolution in query: cosine evolution vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.818182\n", - "Testing word: theoretical in query: cosine theoretical vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.727273\n", - "Testing word: effectiveness in query: cosine effectiveness vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.727273\n", - "Testing word: similarity in query: cosine similarity vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.636364\n", - "Testing word: different in query: cosine different vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.818182\n", - "Testing word: inclusion in query: cosine inclusion vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.818182\n", - "Testing word: relevance in query: cosine relevance vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.307015 0.5 0.818182\n", - "Testing word: that in query: cosine that vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.818182\n", - "Testing word: comparison in query: cosine comparison vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.727273\n", - "Testing word: graph in query: cosine similarity graph\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.727273\n", - "Testing word: vector in query: cosine similarity vector\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.636364\n", - "Testing word: reverse in query: reverse indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.7\n", - "Testing word: index in query: reverse index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.7\n", - "Testing word: indexing in query: reverse indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.7\n", - "Testing word: index in query: index compression techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.826333 1.0 0.956522\n", - "Testing word: indexing in query: indexing compression techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.826333 1.0 0.956522\n", - "Testing word: improving in query: index improving techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343697 0.5 0.695652\n", - "Testing word: effects in query: index effects techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.204834 0.333333 0.608696\n", - "Testing word: caching in query: index caching techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.565217\n", - "Testing word: detection in query: index detection techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.565217\n", - "Testing word: recommendation in query: index recommendation techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.565217\n", - "Testing word: evolution in query: index evolution techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.565217\n", - "Testing word: analysis in query: index analysis techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.565217\n", - "Testing word: theoretical in query: index theoretical techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.565217\n", - "Testing word: effectiveness in query: index effectiveness techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.204834 0.333333 0.608696\n", - "Testing word: recognition in query: index recognition techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.565217\n", - "Testing word: compression in query: index compression techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.826333 1.0 0.956522\n", - "Testing word: experiments in query: index experiments techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.158409 0.25 0.695652\n", - "Testing word: using in query: index using techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.695652\n", - "Testing word: limitations in query: index limitations techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.565217\n", - "Testing word: inclusion in query: index inclusion techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.565217\n", - "Testing word: systems in query: index systems techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.205117 0.5 0.608696\n", - "Testing word: assessments in query: index assessments techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.565217\n", - "Testing word: relevance in query: index relevance techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.565217\n", - "Testing word: optimization in query: index optimization techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.642187 1.0 0.565217\n", - "Testing word: acceleration in query: index acceleration techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.066667 0.565217\n", - "Testing word: processing in query: index processing techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.408536 1.0 0.608696\n", - "Testing word: system in query: index system techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.205117 0.5 0.608696\n", - "Testing word: comparison in query: index comparison techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.565217\n", - "Testing word: methods in query: index methods techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.239921 0.333333 0.652174\n", - "Testing word: identification in query: index identification techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.565217\n", - "Testing word: models in query: index models techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.239921 0.333333 0.608696\n", - "Testing word: learning in query: index compression learning\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.765593 1.0 1.0\n", - "Testing word: theoretical in query: index compression theoretical\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.413884 0.333333 0.956522\n", - "Testing word: effectiveness in query: index compression effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.545156 1.0 1.0\n", - "Testing word: compression in query: index compression compression\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.769526 1.0 1.0\n", - "Testing word: experiments in query: index compression experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.712552 1.0 1.0\n", - "Testing word: using in query: index compression using\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.535254 1.0 1.0\n", - "Testing word: systems in query: index compression systems\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.716485 1.0 0.956522\n", - "Testing word: knowledge in query: index compression knowledge\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.643768 1.0 0.956522\n", - "Testing word: assessments in query: index compression assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.750336 1.0 0.956522\n", - "Testing word: optimization in query: index compression optimization\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.784617 1.0 0.956522\n", - "Testing word: techniques in query: index compression techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.826333 1.0 0.956522\n", - "Testing word: processing in query: index compression processing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.784982 1.0 1.0\n", - "Testing word: methods in query: index compression methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.767027 1.0 1.0\n", - "Testing word: models in query: index compression models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.838957 1.0 0.956522\n", - "Testing word: search in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: engine in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: improving in query: search engine improving with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.239441 0.25 1.0\n", - "Testing word: research in query: search engine research with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 1.0\n", - "Testing word: recommenders in query: search engine recommenders with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 1.0\n", - "Testing word: information in query: search engine information with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: networks in query: search engine networks with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.179477 0.333333 1.0\n", - "Testing word: caching in query: search engine caching with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.027778 1.0\n", - "Testing word: detection in query: search engine detection with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 1.0\n", - "Testing word: algorithm in query: search engine algorithm with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.249664 0.333333 1.0\n", - "Testing word: recommendation in query: search engine recommendation with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 1.0\n", - "Testing word: evolution in query: search engine evolution with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.212226 0.5 1.0\n", - "Testing word: analysis in query: search engine analysis with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 1.0\n", - "Testing word: theoretical in query: search engine theoretical with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: effectiveness in query: search engine effectiveness with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.923077\n", - "Testing word: recognition in query: search engine recognition with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: compression in query: search engine compression with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.399569 1.0 1.0\n", - "Testing word: experiments in query: search engine experiments with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: related in query: search engine related with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: multimedia in query: search engine multimedia with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.18341 0.333333 1.0\n", - "Testing word: using in query: search engine using with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 1.0\n", - "Testing word: limitations in query: search engine limitations with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 1.0\n", - "Testing word: inclusion in query: search engine inclusion with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: intelligence in query: search engine intelligence with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.268553 0.333333 1.0\n", - "Testing word: systems in query: search engine systems with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.212226 0.5 1.0\n", - "Testing word: informational in query: search engine informational with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 1.0\n", - "Testing word: assessments in query: search engine assessments with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.154574 0.2 1.0\n", - "Testing word: product in query: search engine product with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.16422 0.25 1.0\n", - "Testing word: relevance in query: search engine relevance with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.205117 0.5 0.923077\n", - "Testing word: optimization in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: acceleration in query: search engine acceleration with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 1.0\n", - "Testing word: techniques in query: search engine techniques with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 1.0\n", - "Testing word: processing in query: search engine processing with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 1.0\n", - "Testing word: system in query: search engine system with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.212226 0.5 1.0\n", - "Testing word: comparison in query: search engine comparison with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.161043 0.25 1.0\n", - "Testing word: methods in query: search engine methods with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.305235 1.0 1.0\n", - "Testing word: identification in query: search engine identification with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 1.0\n", - "Testing word: with in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: and in query: search engine optimization and query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: query in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: log in query: search engine optimization with query log\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n", - "Testing word: logs in query: search engine optimization with query logs\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.343018 0.333333 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + "/tmp/ipykernel_491/3712029078.py:16: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + " similarity = token.similarity(vocab_word)\n", + "/tmp/ipykernel_491/3712029078.py:16: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", + " similarity = token.similarity(vocab_word)\n", + "/tmp/ipykernel_491/3712029078.py:16: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", " similarity = token.similarity(vocab_word)\n" ] }, @@ -2993,1028 +220,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "Testing word: bm25 in query: bm25\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.413128 0.5 1.0\n", - "Testing word: somethin in query: somethin makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: why in query: why makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: how in query: how makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: what in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: nothin in query: nothin makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: that in query: that makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: makes in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: natural in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: artificial in query: what makes artificial language processing artificial\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004292 0.125\n", - "Testing word: language in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: languages in query: what makes natural languages processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: arabic in query: what makes natural arabic processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005882 0.375\n", - "Testing word: improving in query: what makes natural language improving natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00578 0.4375\n", - "Testing word: recommenders in query: what makes natural language recommenders natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005618 0.3125\n", - "Testing word: information in query: what makes natural language information natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010101 0.3125\n", - "Testing word: caching in query: what makes natural language caching natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005291 0.3125\n", - "Testing word: detection in query: what makes natural language detection natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004975 0.3125\n", - "Testing word: recommendation in query: what makes natural language recommendation natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005618 0.3125\n", - "Testing word: evolution in query: what makes natural language evolution natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00641 0.375\n", - "Testing word: analysis in query: what makes natural language analysis natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007353 0.375\n", - "Testing word: effectiveness in query: what makes natural language effectiveness natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007042 0.4375\n", - "Testing word: compression in query: what makes natural language compression natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004808 0.3125\n", - "Testing word: experiments in query: what makes natural language experiments natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005525 0.375\n", - "Testing word: related in query: what makes natural language related natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010101 0.3125\n", - "Testing word: artificial in query: what makes natural language artificial natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 0.4375\n", - "Testing word: using in query: what makes natural language using natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010638 0.375\n", - "Testing word: limitations in query: what makes natural language limitations natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.037037 0.375\n", - "Testing word: systems in query: what makes natural language systems natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006494 0.4375\n", - "Testing word: informational in query: what makes natural language informational natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.3125\n", - "Testing word: mining in query: what makes natural language mining natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.019231 0.375\n", - "Testing word: assessments in query: what makes natural language assessments natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.3125\n", - "Testing word: product in query: what makes natural language product natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005 0.3125\n", - "Testing word: relevance in query: what makes natural language relevance natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013514 0.3125\n", - "Testing word: optimization in query: what makes natural language optimization natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005405 0.3125\n", - "Testing word: acceleration in query: what makes natural language acceleration natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007576 0.3125\n", - "Testing word: techniques in query: what makes natural language techniques natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.026316 0.375\n", - "Testing word: processing in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: consumer in query: what makes natural language consumer natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006667 0.3125\n", - "Testing word: system in query: what makes natural language system natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006494 0.4375\n", - "Testing word: methods in query: what makes natural language methods natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006211 0.4375\n", - "Testing word: identification in query: what makes natural language identification natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004673 0.3125\n", - "Testing word: natural in query: what makes natural language processing natural\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.020833 0.4375\n", - "Testing word: artificial in query: what makes artificial language processing artificial\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004292 0.125\n", - "Testing word: principle in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: evolution in query: evolution of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.071429 0.8125\n", - "Testing word: theoretical in query: theoretical of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.8125\n", - "Testing word: methods in query: methods of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.9375\n", - "Testing word: of in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: a in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: document in query: principle of a document retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.9375\n", - "Testing word: websites in query: principle of a websites retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.217261 0.5 0.875\n", - "Testing word: research in query: principle of a research retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.9375\n", - "Testing word: recommenders in query: principle of a recommenders retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.163541 0.2 0.875\n", - "Testing word: information in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: sources in query: principle of a sources retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.233651 0.5 0.875\n", - "Testing word: detection in query: principle of a detection retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.9375\n", - "Testing word: conversations in query: principle of a conversations retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: recommendation in query: principle of a recommendation retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.163541 0.2 0.875\n", - "Testing word: analysis in query: principle of a analysis retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 0.875\n", - "Testing word: effectiveness in query: principle of a effectiveness retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.188444 0.333333 0.9375\n", - "Testing word: recognition in query: principle of a recognition retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.1763 0.333333 0.875\n", - "Testing word: limitations in query: principle of a limitations retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: inclusion in query: principle of a inclusion retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.147829 0.166667 0.875\n", - "Testing word: relevant in query: principle of a relevant retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.8125\n", - "Testing word: intelligence in query: principle of a intelligence retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 0.875\n", - "Testing word: informational in query: principle of a informational retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.233651 0.5 0.875\n", - "Testing word: knowledge in query: principle of a knowledge retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.875\n", - "Testing word: assessments in query: principle of a assessments retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: relevance in query: principle of a relevance retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.8125\n", - "Testing word: optimization in query: principle of a optimization retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.204834 0.333333 0.875\n", - "Testing word: acceleration in query: principle of a acceleration retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.168152 0.25 0.875\n", - "Testing word: processing in query: principle of a processing retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: identification in query: principle of a identification retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.224006 0.5 0.9375\n", - "Testing word: retrieval in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: index in query: principle of a information retrieval index\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: indexing in query: principle of a information retrieval indexing\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.248908 0.5 0.875\n", - "Testing word: evolution in query: evolution of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.7\n", - "Testing word: theoretical in query: theoretical of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.047619 0.7\n", - "Testing word: architecture in query: architecture of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388225 0.5 0.9\n", - "Testing word: of in query: architecture of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388225 0.5 0.9\n", - "Testing word: websites in query: architecture of websites search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.9\n", - "Testing word: web in query: architecture of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388225 0.5 0.9\n", - "Testing word: search in query: architecture of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388225 0.5 0.9\n", - "Testing word: engine in query: architecture of web search engine\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.388225 0.5 0.9\n", - "Testing word: somethin in query: somethin is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: why in query: why is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: how in query: how is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: what in query: what is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: nothin in query: nothin is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.693426 0.5 1.0\n", - "Testing word: that in query: that is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: is in query: what is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: ahp in query: what is ahp\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 1.0 1.0 1.0\n", - "Testing word: somethin in query: somethin is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: why in query: why is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: how in query: how is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: what in query: what is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: nothin in query: nothin is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.692308\n", - "Testing word: that in query: that is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: is in query: what is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: document in query: what is document retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.033333 0.153846\n", - "Testing word: websites in query: what is websites retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00152 0.230769\n", - "Testing word: research in query: what is research retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010417 0.461538\n", - "Testing word: recommenders in query: what is recommenders retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: information in query: what is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: sources in query: what is sources retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001003 0.076923\n", - "Testing word: detection in query: what is detection retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: conversations in query: what is conversations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02 0.076923\n", - "Testing word: recommendation in query: what is recommendation retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: analysis in query: what is analysis retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004902 0.230769\n", - "Testing word: effectiveness in query: what is effectiveness retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.153846\n", - "Testing word: recognition in query: what is recognition retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: limitations in query: what is limitations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004274 0.230769\n", - "Testing word: inclusion in query: what is inclusion retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002415 0.461538\n", - "Testing word: relevant in query: what is relevant retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009009 0.461538\n", - "Testing word: intelligence in query: what is intelligence retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002273 0.153846\n", - "Testing word: informational in query: what is informational retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00119 0.076923\n", - "Testing word: knowledge in query: what is knowledge retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007576 0.230769\n", - "Testing word: assessments in query: what is assessments retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001247 0.076923\n", - "Testing word: relevance in query: what is relevance retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009009 0.461538\n", - "Testing word: optimization in query: what is optimization retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.035714 0.076923\n", - "Testing word: acceleration in query: what is acceleration retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002778 0.461538\n", - "Testing word: processing in query: what is processing retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004762 0.384615\n", - "Testing word: identification in query: what is identification retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: retrieval in query: what is information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.692308\n", - "Testing word: improving in query: improving retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.447368\n", - "Testing word: effectiveness in query: effectiveness retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.526316\n", - "Testing word: compression in query: compression retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001957 0.315789\n", - "Testing word: systems in query: systems retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.045455 0.5\n", - "Testing word: efficient in query: efficient retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.347912 0.5 0.842105\n", - "Testing word: optimization in query: optimization retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.043478 0.421053\n", - "Testing word: acceleration in query: acceleration retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.008621 0.473684\n", - "Testing word: processing in query: processing retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.085143 0.2 0.447368\n", - "Testing word: methods in query: methods retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 0.368421\n", - "Testing word: retrieval in query: efficient retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.347912 0.5 0.842105\n", - "Testing word: algorithms in query: efficient retrieval algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.347912 0.5 0.842105\n", - "Testing word: algorithm in query: efficient retrieval algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.347912 0.5 0.842105\n", - "Testing word: analysis in query: efficient retrieval analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.066254 0.111111 0.447368\n", - "Testing word: graph in query: efficient retrieval graph\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.263158\n", - "Testing word: optimization in query: efficient retrieval optimization\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.18341 0.333333 0.5\n", - "Testing word: techniques in query: efficient retrieval techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.342105\n", - "Testing word: methods in query: efficient retrieval methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.038462 0.473684\n", - "Testing word: models in query: efficient retrieval models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.473684\n", - "Testing word: why in query: why to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: how in query: how to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: what in query: what to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: to in query: how to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: avoid in query: how to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: spam in query: how to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: effectiveness in query: how to avoid spam effectiveness\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.608317 1.0 1.0\n", - "Testing word: experiments in query: how to avoid spam experiments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.510069 1.0 1.0\n", - "Testing word: assessments in query: how to avoid spam assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.460813 0.5 1.0\n", - "Testing word: results in query: how to avoid spam results\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.745302 1.0 1.0\n", - "Testing word: document in query: document retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.393561 0.5 0.470588\n", - "Testing word: websites in query: websites retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.01 0.588235\n", - "Testing word: research in query: research retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.358954 1.0 0.617647\n", - "Testing word: recommenders in query: recommenders retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.208294 0.5 0.588235\n", - "Testing word: information in query: information retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: sources in query: sources retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.330138 1.0 0.588235\n", - "Testing word: detection in query: detection retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.014286 0.617647\n", - "Testing word: conversations in query: conversations retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006849 0.5\n", - "Testing word: recommendation in query: recommendation retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.208294 0.5 0.588235\n", - "Testing word: analysis in query: analysis retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.139618 0.142857 0.617647\n", - "Testing word: effectiveness in query: effectiveness retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.188444 0.333333 0.588235\n", - "Testing word: recognition in query: recognition retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.558824\n", - "Testing word: limitations in query: limitations retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.090909 0.558824\n", - "Testing word: inclusion in query: inclusion retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013889 0.588235\n", - "Testing word: relevant in query: relevant retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.470588\n", - "Testing word: intelligence in query: intelligence retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.151762 0.166667 0.558824\n", - "Testing word: informational in query: informational retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.017857 0.558824\n", - "Testing word: knowledge in query: knowledge retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.558824\n", - "Testing word: assessments in query: assessments retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.529412\n", - "Testing word: relevance in query: relevance retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.470588\n", - "Testing word: optimization in query: optimization retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.043478 0.529412\n", - "Testing word: acceleration in query: acceleration retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010753 0.617647\n", - "Testing word: processing in query: processing retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.076923 0.588235\n", - "Testing word: identification in query: identification retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011628 0.558824\n", - "Testing word: retrieval in query: information retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: with in query: information retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: and in query: information retrieval and algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: algorithms in query: information retrieval with algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: algorithm in query: information retrieval with algorithm\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.61225 1.0 0.647059\n", - "Testing word: analysis in query: information retrieval with analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.005376 0.117647\n", - "Testing word: graph in query: information retrieval with graph\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.009901 0.029412\n", - "Testing word: optimization in query: information retrieval with optimization\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004975 0.029412\n", - "Testing word: techniques in query: information retrieval with techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001362 0.029412\n", - "Testing word: methods in query: information retrieval with methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.011236 0.058824\n", - "Testing word: models in query: information retrieval with models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001013 0.029412\n", - "Testing word: language in query: language in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00303 0.125\n", - "Testing word: misspellings in query: misspellings in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.510383 1.0 0.625\n", - "Testing word: in in query: misspellings in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.510383 1.0 0.625\n", - "Testing word: query in query: misspellings in query\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.510383 1.0 0.625\n", - "Testing word: information in query: misspellings in information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.240252 0.142857 0.625\n", - "Testing word: queries in query: misspellings in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.510383 1.0 0.625\n", - "Testing word: informational in query: misspellings in informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.083333 0.625\n", - "Testing word: document in query: document in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00813 0.5\n", - "Testing word: websites in query: websites in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: research in query: research in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recommenders in query: recommenders in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: information in query: information in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: sources in query: sources in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00106 0.05\n", - "Testing word: detection in query: detection in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.02439 0.05\n", - "Testing word: conversations in query: conversations in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recommendation in query: recommendation in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: analysis in query: analysis in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: effectiveness in query: effectiveness in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.010101 0.1\n", - "Testing word: recognition in query: recognition in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: limitations in query: limitations in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001715 0.05\n", - "Testing word: inclusion in query: inclusion in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002088 0.15\n", - "Testing word: relevant in query: relevant in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.021277 0.05\n", - "Testing word: queries in query: queries in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.006211 0.3\n", - "Testing word: intelligence in query: intelligence in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: informational in query: informational in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: knowledge in query: knowledge in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: assessments in query: assessments in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001773 0.05\n", - "Testing word: relevance in query: relevance in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.021277 0.05\n", - "Testing word: optimization in query: optimization in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: acceleration in query: acceleration in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001942 0.1\n", - "Testing word: processing in query: processing in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: identification in query: identification in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: in in query: information in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: these in query: information in these language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: compare in query: information in compare language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0125 0.2\n", - "Testing word: similarity in query: information in similarity language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: different in query: information in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: multiple in query: information in multiple language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: comparison in query: information in comparison language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: language in query: information in different language\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: languages in query: information in different languages\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.013158 0.1\n", - "Testing word: arabic in query: information in different arabic\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: misspellings in query: information in different misspellings\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: document in query: document in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: information in query: information in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001337 0.1\n", - "Testing word: detection in query: detection in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recommendation in query: recommendation in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: evolution in query: evolution in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: theoretical in query: theoretical in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: recognition in query: recognition in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: compression in query: compression in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: abbreviations in query: abbreviations in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.9\n", - "Testing word: experiments in query: experiments in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012987 0.2\n", - "Testing word: limitations in query: limitations in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: inclusion in query: inclusion in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: informational in query: informational in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: optimization in query: optimization in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: acceleration in query: acceleration in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: processing in query: processing in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: comparison in query: comparison in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007092 0.1\n", - "Testing word: identification in query: identification in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: in in query: abbreviations in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.9\n", - "Testing word: query in query: abbreviations in query\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.9\n", - "Testing word: information in query: abbreviations in information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.041667 0.9\n", - "Testing word: queries in query: abbreviations in queries\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.9\n", - "Testing word: informational in query: abbreviations in informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.033333 0.9\n", - "Testing word: lemmatization in query: lemmatization algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: algorithms in query: lemmatization algorithms\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: algorithm in query: lemmatization algorithm\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_7980/2168277619.py:8: UserWarning: [W008] Evaluating Doc.similarity based on empty vectors.\n", - " similarity = token.similarity(vocab_word)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: analysis in query: lemmatization analysis\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.33916 1.0 1.0\n", - "Testing word: graph in query: lemmatization graph\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: optimization in query: lemmatization optimization\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.414123 0.5 1.0\n", - "Testing word: techniques in query: lemmatization techniques\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.213986 0.5 1.0\n", - "Testing word: methods in query: lemmatization methods\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.621794 1.0 1.0\n", - "Testing word: models in query: lemmatization models\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.120811 0.166667 1.0\n", - "Testing word: filter in query: filter ad rich documents\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.727591 1.0 1.0\n", - "Testing word: ad in query: filter ad rich documents\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.727591 1.0 1.0\n", - "Testing word: rich in query: filter ad rich documents\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.727591 1.0 1.0\n", - "Testing word: document in query: filter ad rich document\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.727591 1.0 1.0\n", - "Testing word: information in query: filter ad rich information\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.616434 1.0 1.0\n", - "Testing word: papers in query: filter ad rich papers\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.752999 1.0 1.0\n", - "Testing word: sources in query: filter ad rich sources\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.458245 0.5 1.0\n", - "Testing word: conversations in query: filter ad rich conversations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.218851 0.166667 1.0\n", - "Testing word: recommendation in query: filter ad rich recommendation\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.213986 0.5 1.0\n", - "Testing word: limitations in query: filter ad rich limitations\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.566965 1.0 1.0\n", - "Testing word: documents in query: filter ad rich documents\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.727591 1.0 1.0\n", - "Testing word: informational in query: filter ad rich informational\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.466003 0.5 1.0\n", - "Testing word: assessments in query: filter ad rich assessments\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.591177 1.0 1.0\n", - "Testing word: identification in query: filter ad rich identification\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.583419 1.0 1.0\n", - "Testing word: improving in query: improving in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001548 0.037037\n", - "Testing word: strengthen in query: strengthen in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001626 0.222222\n", - "Testing word: research in query: research in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.007246 0.518519\n", - "Testing word: recommenders in query: recommenders in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004902 0.037037\n", - "Testing word: information in query: information in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002222 0.222222\n", - "Testing word: the in query: the in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002222 0.222222\n", - "Testing word: advancements in query: advancements in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", - "Testing word: detection in query: detection in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: conversations in query: conversations in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.037037\n", - "Testing word: recommendation in query: recommendation in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.004902 0.037037\n", - "Testing word: evolution in query: evolution in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: learning in query: learning in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.289523 1.0 0.074074\n", - "Testing word: theoretical in query: theoretical in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003257 0.074074\n", - "Testing word: effectiveness in query: effectiveness in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003226 0.074074\n", - "Testing word: recognition in query: recognition in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: compression in query: compression in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: abbreviations in query: abbreviations in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001445 0.185185\n", - "Testing word: experiments in query: experiments in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001006 0.037037\n", - "Testing word: limitations in query: limitations in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.012658 0.074074\n", - "Testing word: inclusion in query: inclusion in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.00112 0.148148\n", - "Testing word: intelligence in query: intelligence in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: informational in query: informational in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: knowledge in query: knowledge in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.002008 0.037037\n", - "Testing word: assessments in query: assessments in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001387 0.037037\n", - "Testing word: relevance in query: relevance in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001112 0.037037\n", - "Testing word: optimization in query: optimization in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.016393 0.074074\n", - "Testing word: acceleration in query: acceleration in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.003205 0.185185\n", - "Testing word: processing in query: processing in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.001397 0.037037\n", - "Testing word: identification in query: identification in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0 0.0\n", - "Testing word: in in query: advancements in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", - "Testing word: document in query: advancements in document retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.814815\n", - "Testing word: websites in query: advancements in websites retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.851852\n", - "Testing word: research in query: advancements in research retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.557978 1.0 0.814815\n", - "Testing word: recommenders in query: advancements in recommenders retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.161043 0.25 0.814815\n", - "Testing word: information in query: advancements in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", - "Testing word: sources in query: advancements in sources retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.851852\n", - "Testing word: advancements in query: advancements in advancements retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", - "Testing word: detection in query: advancements in detection retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.094788 0.25 0.851852\n", - "Testing word: conversations in query: advancements in conversations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.220092 1.0 0.814815\n", - "Testing word: recommendation in query: advancements in recommendation retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.161043 0.25 0.814815\n", - "Testing word: analysis in query: advancements in analysis retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.303082 0.5 0.851852\n", - "Testing word: effectiveness in query: advancements in effectiveness retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.31834 0.5 0.814815\n", - "Testing word: recognition in query: advancements in recognition retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.063621 0.1 0.888889\n", - "Testing word: abbreviations in query: advancements in abbreviations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.016949 0.888889\n", - "Testing word: limitations in query: advancements in limitations retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.303082 0.5 0.851852\n", - "Testing word: inclusion in query: advancements in inclusion retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.0625 0.888889\n", - "Testing word: relevant in query: advancements in relevant retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.777778\n", - "Testing word: queries in query: advancements in queries retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.195189 0.333333 0.851852\n", - "Testing word: documents in query: advancements in documents retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.110046 0.333333 0.814815\n", - "Testing word: intelligence in query: advancements in intelligence retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.0 0.016667 0.814815\n", - "Testing word: informational in query: advancements in informational retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", - "Testing word: knowledge in query: advancements in knowledge retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.851852\n", - "Testing word: assessments in query: advancements in assessments retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.814815\n", - "Testing word: relevance in query: advancements in relevance retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.069431 0.125 0.777778\n", - "Testing word: optimization in query: advancements in optimization retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.437352 1.0 0.851852\n", - "Testing word: acceleration in query: advancements in acceleration retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.073364 0.142857 0.888889\n", - "Testing word: processing in query: advancements in processing retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.18341 0.333333 0.888889\n", - "Testing word: identification in query: advancements in identification retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.078398 0.166667 0.851852\n", - "Testing word: retrieval in query: advancements in information retrieval\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Low Entities 0.138862 0.5 0.888889\n", "Original Query: retrieval system improving effectiveness\n", "Expanded Query: retrieval system improving effectiveness\n", "\n", @@ -4025,31 +230,31 @@ "Expanded Query: social media detect self harm\n", "\n", "Original Query: stemming for arabic languages\n", - "Expanded Query: stemming for arabic arabic\n", + "Expanded Query: stemming for arabic languages\n", "\n", "Original Query: audio based animal recognition\n", - "Expanded Query: audio based animal effectiveness\n", + "Expanded Query: audio based animal recognition\n", "\n", "Original Query: comparison different retrieval models\n", - "Expanded Query: comparison these retrieval models\n", + "Expanded Query: effectiveness different retrieval models\n", "\n", "Original Query: cache architecture\n", "Expanded Query: cache architecture\n", "\n", "Original Query: document scoping formula\n", - "Expanded Query: document scoping formula\n", + "Expanded Query: identification scoping formula\n", "\n", "Original Query: pseudo relevance feedback\n", - "Expanded Query: pseudo improving feedback\n", + "Expanded Query: pseudo relevance feedback\n", "\n", "Original Query: how to represent natural conversations in word nets\n", - "Expanded Query: why to represent natural conversations in word nets\n", + "Expanded Query: how to represent natural conversations in word nets\n", "\n", "Original Query: algorithm acceleration with nvidia cuda\n", - "Expanded Query: algorithm improving with nvidia cuda\n", + "Expanded Query: algorithm identification with nvidia cuda\n", "\n", "Original Query: mention of algorithm\n", - "Expanded Query: mention of algorithm\n", + "Expanded Query: identification of algorithm\n", "\n", "Original Query: at least three authors\n", "Expanded Query: at least three authors\n", @@ -4058,169 +263,167 @@ "Expanded Query: german domain\n", "\n", "Original Query: mention of open source\n", - "Expanded Query: mention of open source\n", + "Expanded Query: recognition of open source\n", "\n", "Original Query: inclusion of text mining\n", - "Expanded Query: inclusion of text mining\n", + "Expanded Query: mention of text mining\n", "\n", "Original Query: the ethics of artificial intelligence\n", - "Expanded Query: the ethics of artificial conversations\n", + "Expanded Query: recognition ethics of artificial intelligence\n", "\n", "Original Query: machine learning for more relevant results\n", - "Expanded Query: machine learning for more relevant effectiveness\n", + "Expanded Query: machine conversations for more document effectiveness\n", "\n", "Original Query: crawling websites using machine learning\n", - "Expanded Query: crawling websites using machine learning\n", + "Expanded Query: crawling websites using machine conversations\n", "\n", "Original Query: recommenders influence on users\n", - "Expanded Query: recommenders effectiveness on websites\n", + "Expanded Query: recommenders effectiveness on users\n", "\n", "Original Query: search engine caching effects\n", "Expanded Query: search engine caching effects\n", "\n", "Original Query: consumer product reviews\n", - "Expanded Query: consumer consumer reviews\n", + "Expanded Query: consumer product reviews\n", "\n", "Original Query: limitations machine learning\n", - "Expanded Query: limitations machine learning\n", + "Expanded Query: recommenders machine conversations\n", "\n", "Original Query: medicine related research\n", "Expanded Query: medicine identification identification\n", "\n", "Original Query: natural language processing\n", - "Expanded Query: natural language recommenders\n", + "Expanded Query: natural arabic processing\n", "\n", "Original Query: graph based ranking\n", "Expanded Query: graph based ranking\n", "\n", "Original Query: medical studies that use information retrieval\n", - "Expanded Query: medical studies relevance using research retrieval\n", + "Expanded Query: medical research relevance using relevant retrieval\n", "\n", "Original Query: information retrieval on different language sources\n", - "Expanded Query: effectiveness retrieval on these language sources\n", + "Expanded Query: document retrieval on comparison arabic sources\n", "\n", "Original Query: papers that compare multiple information retrieval methods\n", - "Expanded Query: papers relevance different three sources retrieval models\n", + "Expanded Query: papers relevance comparison multiple document retrieval sources\n", "\n", "Original Query: risks of information retrieval in social media\n", - "Expanded Query: risks of processing retrieval in social media\n", + "Expanded Query: risks of recommenders retrieval in social media\n", "\n", "Original Query: actual experiments that strengthen theoretical knowledge\n", - "Expanded Query: relevant studies relevance effectiveness methods relevant\n", + "Expanded Query: relevant conversations relevance relevance intelligence conversations\n", "\n", "Original Query: fake news detection\n", - "Expanded Query: fake news comparison\n", + "Expanded Query: fake news detection\n", "\n", "Original Query: multimedia retrieval\n", "Expanded Query: multimedia retrieval\n", "\n", "Original Query: processing natural language for information retrieval\n", - "Expanded Query: processing natural language for information retrieval\n", + "Expanded Query: relevance natural arabic for document retrieval\n", "\n", "Original Query: recommendation systems\n", - "Expanded Query: recommenders systems\n", + "Expanded Query: recommendation systems\n", "\n", "Original Query: personalised search in e commerce\n", - "Expanded Query: personalised search in e commerce\n", + "Expanded Query: recommenders search in e commerce\n", "\n", "Original Query: sentiment analysis\n", - "Expanded Query: sentiment studies\n", + "Expanded Query: sentiment analysis\n", "\n", "Original Query: informational retrieval using neural networks\n", - "Expanded Query: effectiveness retrieval using neural networks\n", + "Expanded Query: recommenders retrieval caching neural networks\n", "\n", "Original Query: query log analysis\n", - "Expanded Query: query log studies\n", + "Expanded Query: query log relevance\n", "\n", "Original Query: entity recognition\n", "Expanded Query: entity recognition\n", "\n", "Original Query: relevance assessments\n", - "Expanded Query: relevant assessments\n", + "Expanded Query: relevance document\n", "\n", "Original Query: deep neural networks\n", "Expanded Query: deep neural networks\n", "\n", "Original Query: information retrieval\n", - "Expanded Query: information retrieval\n", + "Expanded Query: relevant retrieval\n", "\n", "Original Query: analysis for android apps\n", - "Expanded Query: studies for android apps\n", + "Expanded Query: information for android users\n", "\n", "Original Query: the university of amsterdam\n", - "Expanded Query: recognition university of amsterdam\n", + "Expanded Query: identification university of amsterdam\n", "\n", "Original Query: neural ranking for ecommerce product search\n", - "Expanded Query: neural ranking for ecommerce product search\n", + "Expanded Query: neural ranking for commerce product search\n", "\n", "Original Query: web pages evolution\n", - "Expanded Query: web pages evolution\n", + "Expanded Query: web pages relevance\n", "\n", "Original Query: exhaustivity of index\n", "Expanded Query: exhaustivity of index\n", "\n", "Original Query: query optimization\n", - "Expanded Query: query optimization\n", + "Expanded Query: query relevance\n", "\n", "Original Query: cosine similarity vector\n", - "Expanded Query: cosine comparison graph\n", + "Expanded Query: cosine similarity vector\n", "\n", "Original Query: reverse indexing\n", - "Expanded Query: reverse index\n", + "Expanded Query: reverse indexing\n", "\n", "Original Query: index compression techniques\n", - "Expanded Query: index compression models\n", + "Expanded Query: index compression techniques\n", "\n", "Original Query: search engine optimization with query logs\n", - "Expanded Query: search engine compression with query log\n", + "Expanded Query: search engine relevance with query logs\n", "\n", "Original Query: bm25\n", "Expanded Query: bm25\n", "\n", "Original Query: what makes natural language processing natural\n", - "Expanded Query: somethin makes natural language artificial natural\n", + "Expanded Query: nothin makes natural language processing natural\n", "\n", "Original Query: principle of a information retrieval indexing\n", - "Expanded Query: methods of a knowledge retrieval index\n", + "Expanded Query: methods of a document retrieval indexing\n", "\n", "Original Query: architecture of web search engine\n", "Expanded Query: architecture of web search engine\n", "\n", "Original Query: what is ahp\n", - "Expanded Query: somethin is ahp\n", + "Expanded Query: nothin is ahp\n", "\n", "Original Query: what is information retrieval\n", - "Expanded Query: somethin is information retrieval\n", + "Expanded Query: nothin is relevant retrieval\n", "\n", "Original Query: efficient retrieval algorithms\n", - "Expanded Query: efficient retrieval models\n", + "Expanded Query: compression retrieval graph\n", "\n", "Original Query: how to avoid spam results\n", - "Expanded Query: why to avoid spam results\n", + "Expanded Query: how to avoid spam assessments\n", "\n", "Original Query: information retrieval with algorithms\n", - "Expanded Query: information retrieval with algorithms\n", + "Expanded Query: relevant retrieval with graph\n", "\n", "Original Query: misspellings in queries\n", - "Expanded Query: misspellings in query\n", + "Expanded Query: language in information\n", "\n", "Original Query: information in different language\n", - "Expanded Query: document in compare language\n", + "Expanded Query: conversations in comparison misspellings\n", "\n", "Original Query: abbreviations in queries\n", - "Expanded Query: abbreviations in query\n", + "Expanded Query: document in information\n", "\n", "Original Query: lemmatization algorithms\n", - "Expanded Query: lemmatization methods\n", + "Expanded Query: lemmatization graph\n", "\n", "Original Query: filter ad rich documents\n", - "Expanded Query: filter ad rich papers\n", + "Expanded Query: filter ad rich recommendation\n", "\n", "Original Query: advancements in information retrieval\n", - "Expanded Query: advancements in research retrieval\n", - "\n", - " name ndcg_cut_10 recip_rank recall_1000\n", - "0 BM25 - Finaly Entities 0.394885 0.70544 0.780869\n" + "Expanded Query: relevance in queries retrieval\n", + "\n" ] } ], @@ -4228,6 +431,14 @@ "\n", "nlp = en_core_web_md.load()\n", "\n", + "def average_score(scores):\n", + " result = 0\n", + " if len(scores) > 0:\n", + " for score in scores:\n", + " result += score\n", + " result = result / len(scores)\n", + " return result\n", + "\n", "def get_similar_words(word, threshold=0.60):\n", " token = nlp(word)\n", " similar_words = []\n", @@ -4240,26 +451,16 @@ "\n", "def get_best_word(original_word, similar_words, bm25, topic, pt_dataset):\n", " best_word = original_word\n", - " best_score = -float('inf')\n", + " best_score = average_score(bm25.search(topic['query'])['score'])\n", " \n", " for word in similar_words:\n", " topic_copy = topic.copy()\n", " topic_copy['query'] = topic_copy['query'].replace(original_word, word)\n", " \n", - " print(f\"Testing word: {word} in query: {topic_copy['query']}\")\n", - " \n", - " experiment = pt.Experiment(\n", - " [bm25],\n", - " pd.DataFrame([topic_copy]), \n", - " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut_10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25 - Low Entities\"]\n", - " )\n", - " \n", - " print(experiment)\n", - " \n", - " score = experiment[['ndcg_cut_10', 'recip_rank', 'recall_1000']].mean().mean()\n", - " \n", + " qr = topic_copy['query']\n", + " result = average_score(bm25.search(qr)['score'])\n", + "\n", + " score = result\n", " if score > best_score:\n", " best_score = score\n", " best_word = word\n", @@ -4284,6 +485,7 @@ "ensure_pyterrier_is_loaded()\n", "tira = Client()\n", "\n", + "\n", "pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')\n", "topics = pt_dataset.get_topics(variant='title')\n", "\n", @@ -4292,267 +494,61 @@ "\n", "expanded_topics, original_queries, expanded_queries = queryExpansion(topics, bm25, pt_dataset)\n", "\n", - "\n", - "experiment = pt.Experiment(\n", - " [bm25],\n", - " expanded_topics,\n", - " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut_10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25 - Finaly Entities\"]\n", - ")\n", - "\n", "for original, expanded in zip(original_queries, expanded_queries):\n", " print(f\"Original Query: {original}\")\n", " print(f\"Expanded Query: {expanded}\\n\")\n", "\n", - "print(experiment)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a REST client to the TIRA platform for retrieving the pre-indexed data.\n", - "ensure_pyterrier_is_loaded()\n", - "tira = Client()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Step 2: Load the Dataset and the Index\n", "\n", - "The type of the index object that we load is ``, in fact a [Java class](http://terrier.org/docs/v3.6/javadoc/org/terrier/structures/Index.html) wrapped into Python. However, you do not need to worry about this: at this point, we will simply use the provided Index object to run procedures defined in Python." + "# print(experiment)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IRDSDataset('ir-lab-sose-2024/ir-acl-anthology-20240504-training')\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "Download: 55.0kiB [00:00, 1.48MiB/s]" + "/tmp/ipykernel_491/1660782799.py:1: DeprecationWarning: Coercion of a dataframe into a transformer is deprecated; use a pt.Transformer.from_df() instead\n", + " bm25 = bm25 >> expanded_topics >> bm25\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Download finished. Extract...\n", - "Extraction finished: /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/marcel-gohsen\n", - "{'qid': '2', 'query': 'machine learning language identification', 'original_query': {'query_id': '2', 'title': 'machine learning language identification', 'description': 'What papers are about machine learning for language identification?', 'narrative': 'Relevant papers include research on methods of machine learning for language identification or how to improve those methods. Papers that focus on other methods for language identification or the usaged of machine learning not for language identification are not relevant.'}, 'entities': [{'begin': 17, 'end': 40, 'mention': 'language identification', 'url': 'https://en.wikipedia.org/wiki/Language_identification', 'score': 1.0}, {'begin': 0, 'end': 16, 'mention': 'machine learning', 'url': 'https://en.wikipedia.org/wiki/Machine_learning', 'score': 0.9745664739884391}, {'begin': 8, 'end': 16, 'mention': 'learning', 'url': 'https://en.wikipedia.org/wiki/Learning', 'score': 0.8932038834951451}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/machine', 'score': 0.597355769230769}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(psychology)', 'score': 0.18333333333333302}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identity_document', 'score': 0.12083333333333302}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(information)', 'score': 0.07916666666666601}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Political_machine', 'score': 0.048076923076923}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(producer)', 'score': 0.042067307692307}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Forensic_identification', 'score': 0.041666666666666005}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(biology)', 'score': 0.0375}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(2017_film)', 'score': 0.033653846153846}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Body_identification', 'score': 0.033333333333333}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Static-X_album)', 'score': 0.032451923076923}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Station_identification', 'score': 0.025}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/System_identification', 'score': 0.020833333333333003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Animal_identification', 'score': 0.020833333333333003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Parameter_identification_problem', 'score': 0.020833333333333003}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(band)', 'score': 0.019230769230769003}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identifiability', 'score': 0.016666666666666}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Imagine_Dragons_song)', 'score': 0.015625}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Eyewitness_identification', 'score': 0.0125}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Organizational_identification', 'score': 0.0125}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(EP)', 'score': 0.010817307692307002}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Crack_the_Sky_album)', 'score': 0.008413461538461}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_friend_or_foe', 'score': 0.008333333333333}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Identification_(literature)', 'score': 0.008333333333333}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(2006_film)', 'score': 0.0072115384615380005}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(patent)', 'score': 0.004807692307692}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Party_identification', 'score': 0.0041666666666660005}, {'begin': 26, 'end': 40, 'mention': 'identification', 'url': 'https://en.wikipedia.org/wiki/Gender_identity', 'score': 0.0041666666666660005}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(novel)', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/computer', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Machine_(Higdon)', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Turing_machine', 'score': 0.002403846153846}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Person_of_Interest_(TV_series)#The_Machine', 'score': 0.001201923076923}, {'begin': 0, 'end': 7, 'mention': 'machine', 'url': 'https://en.wikipedia.org/wiki/Abstract_machine', 'score': 0.001201923076923}]}\n", - "{'qid': '1', 'query': 'retrieval system improving effectiveness', 'original_query': {'query_id': '1', 'title': 'retrieval system improving effectiveness', 'description': 'What papers focus on improving the effectiveness of a retrieval system?', 'narrative': 'Relevant papers include research on what makes a retrieval system effective and what improves the effectiveness of a retrieval system. Papers that focus on improving something else or improving the effectiveness of a system that is not a retrieval system are not relevant.'}, 'entities': [{'begin': 27, 'end': 40, 'mention': 'effectiveness', 'url': 'https://en.wikipedia.org/wiki/Effectiveness', 'score': 0.8193548387096771}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/recall_(memory)', 'score': 0.46212121212121204}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Information_retrieval', 'score': 0.204545454545454}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Retrieval', 'score': 0.09848484848484801}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Retrieval_(film)', 'score': 0.07575757575757501}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Data_retrieval', 'score': 0.045454545454545005}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Knowledge_retrieval', 'score': 0.015151515151515001}, {'begin': 0, 'end': 9, 'mention': 'retrieval', 'url': 'https://en.wikipedia.org/wiki/Document_retrieval', 'score': 0.007575757575757001}]}\n", - "33\n", - "35\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "The run file is normalized outside the TIRA sandbox, I will store it at \"../runs\".\n", + "Done. run file is stored under \"../runs/run.txt\".\n" ] } ], "source": [ - "# The dataset: the union of the IR Anthology and the ACL Anthology\n", - "# This line creates an IRDSDataset object and registers it under the name provided as an argument.\n", - "pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')\n", - "print(pt_dataset)\n", - "topics = pt_dataset.get_topics(variant='title')\n", - "\n", - "query_entity_linking = tira.pt.transform_queries('ir-benchmarks/marcel-gohsen/entity-linking', pt_dataset)\n", - "print(query_entity_linking(topics).iloc[1].to_dict())\n", - "print(query_entity_linking(topics).iloc[0].to_dict())\n", - "\n", - "lowEntity = []\n", - "highEntity = []\n", - "linked_queries = query_entity_linking(topics)\n", - "\n", - "for i in range(len(linked_queries)):\n", - " entities = linked_queries.iloc[i].to_dict().get('entities')\n", - " if entities is not None and len(entities) < 17:\n", - " lowEntity.append(entities)\n", - " elif entities is not None:\n", - " highEntity.append(entities)\n", - " \n", - "print(len(lowEntity))\n", - "print(len(highEntity))\n", - "\n", - "index = tira.pt.index('ir-lab-sose-2024/tira-ir-starter/Index (tira-ir-starter-pyterrier)', pt_dataset)\n", - "bm25 = pt.BatchRetrieve(index, wmodel=\"BM25\")\n", - "bm25QE = bm25 >> query_entity_linking # Teste mit \"<<\" Operator und der Python 'main.py' Datei wird eine manuelle Methode probiert." + "bm25 = bm25 >> expanded_topics >> bm25\n", + "run = bm25(pt_dataset.get_topics('text'))\n", + "persist_and_normalize_run(run, system_name='bm25-baseline', default_output='../runs')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 3: Define the Retrieval Pipeline\n", - "\n", - "We will define a BM25 retrieval pipeline as baseline. For details, see:\n", + "### Step 2: Load the Dataset and the Index\n", "\n", - "- [https://pyterrier.readthedocs.io](https://pyterrier.readthedocs.io)\n", - "- [https://github.com/terrier-org/ecir2021tutorial](https://github.com/terrier-org/ecir2021tutorial)" + "The type of the index object that we load is ``, in fact a [Java class](http://terrier.org/docs/v3.6/javadoc/org/terrier/structures/Index.html) wrapped into Python. However, you do not need to worry about this: at this point, we will simply use the provided Index object to run procedures defined in Python." ] }, { - "cell_type": "code", - "execution_count": 6, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namendcg_cut.10recip_rankrecall_1000
0BM250.3740410.5798770.825376
\n", - "
" - ], - "text/plain": [ - " name ndcg_cut.10 recip_rank recall_1000\n", - "0 BM25 0.374041 0.579877 0.825376" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "# Experiment ohne Query Expansion\n", + "### Step 3: Define the Retrieval Pipeline\n", "\n", - "pt.Experiment(\n", - " [bm25],\n", - " pt_dataset.get_topics(),\n", - " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut.10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namendcg_cut.10recip_rankrecall_1000
0BM250.3740410.5798770.825376
\n", - "
" - ], - "text/plain": [ - " name ndcg_cut.10 recip_rank recall_1000\n", - "0 BM25 0.374041 0.579877 0.825376" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Experiment mit Query Expansion\n", + "We will define a BM25 retrieval pipeline as baseline. For details, see:\n", "\n", - "pt.Experiment(\n", - " [bm25QE],\n", - " pt_dataset.get_topics(),\n", - " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut.10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25\"]\n", - ")" + "- [https://pyterrier.readthedocs.io](https://pyterrier.readthedocs.io)\n", + "- [https://github.com/terrier-org/ecir2021tutorial](https://github.com/terrier-org/ecir2021tutorial)" ] }, { @@ -4562,263 +558,6 @@ "### Step 4: Create the Run\n" ] }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "First, we have a short look at the first three topics:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qidquery
01retrieval system improving effectiveness
12machine learning language identification
23social media detect self harm
\n", - "
" - ], - "text/plain": [ - " qid query\n", - "0 1 retrieval system improving effectiveness\n", - "1 2 machine learning language identification\n", - "2 3 social media detect self harm" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print('First, we have a short look at the first three topics:')\n", - "\n", - "pt_dataset.get_topics('text').head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Now we do the retrieval...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Done. Here are the first 10 entries of the run\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
qiddociddocnorankscorequery
01948582004.cikm_conference-2004.47015.681777retrieval system improving effectiveness
111251371989.ipm_journal-ir0volumeA25A4.2115.047380retrieval system improving effectiveness
211258172005.ipm_journal-ir0volumeA41A5.11214.144223retrieval system improving effectiveness
315868W05-0704314.025748retrieval system improving effectiveness
41848762016.ntcir_conference-2016.90413.947994retrieval system improving effectiveness
51824721998.sigirconf_conference-98.15513.901647retrieval system improving effectiveness
61944152008.cikm_conference-2008.183613.808208retrieval system improving effectiveness
7117496O01-2005713.749449retrieval system improving effectiveness
81824901998.sigirconf_conference-98.33813.735541retrieval system improving effectiveness
911248012006.ipm_journal-ir0volumeA42A3.2913.569263retrieval system improving effectiveness
\n", - "
" - ], - "text/plain": [ - " qid docid docno rank score \\\n", - "0 1 94858 2004.cikm_conference-2004.47 0 15.681777 \n", - "1 1 125137 1989.ipm_journal-ir0volumeA25A4.2 1 15.047380 \n", - "2 1 125817 2005.ipm_journal-ir0volumeA41A5.11 2 14.144223 \n", - "3 1 5868 W05-0704 3 14.025748 \n", - "4 1 84876 2016.ntcir_conference-2016.90 4 13.947994 \n", - "5 1 82472 1998.sigirconf_conference-98.15 5 13.901647 \n", - "6 1 94415 2008.cikm_conference-2008.183 6 13.808208 \n", - "7 1 17496 O01-2005 7 13.749449 \n", - "8 1 82490 1998.sigirconf_conference-98.33 8 13.735541 \n", - "9 1 124801 2006.ipm_journal-ir0volumeA42A3.2 9 13.569263 \n", - "\n", - " query \n", - "0 retrieval system improving effectiveness \n", - "1 retrieval system improving effectiveness \n", - "2 retrieval system improving effectiveness \n", - "3 retrieval system improving effectiveness \n", - "4 retrieval system improving effectiveness \n", - "5 retrieval system improving effectiveness \n", - "6 retrieval system improving effectiveness \n", - "7 retrieval system improving effectiveness \n", - "8 retrieval system improving effectiveness \n", - "9 retrieval system improving effectiveness " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print('Now we do the retrieval...')\n", - "run = bm25(pt_dataset.get_topics('text'))\n", - "\n", - "print('Done. Here are the first 10 entries of the run')\n", - "run.head(10)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -4830,11 +569,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Auskommentiert, da main.py testen wollte wie die run.txt aussieht\n", + "#run = bm25(pt_dataset.get_topics('text'))\n", "#persist_and_normalize_run(run, system_name='bm25-baseline', default_output='../runs')" ] } @@ -4855,7 +595,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/evaluation/initial-evaluation.ipynb b/evaluation/initial-evaluation.ipynb index c484e75..6ae97f4 100644 --- a/evaluation/initial-evaluation.ipynb +++ b/evaluation/initial-evaluation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -14,9 +14,19 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", + "\n", + "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n" + ] + } + ], "source": [ "# Create a REST client to the TIRA platform for retrieving the pre-indexed data.\n", "ensure_pyterrier_is_loaded()\n", @@ -25,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -71,6 +81,15 @@ " ndcg_cut.10\n", " recip_rank\n", " recall_1000\n", + " P@5\n", + " P@10\n", + " P@15\n", + " P@20\n", + " P@30\n", + " P@100\n", + " P@200\n", + " P@500\n", + " P@1000\n", " \n", " \n", " \n", @@ -80,105 +99,62 @@ " 0.374041\n", " 0.579877\n", " 0.825376\n", + " 0.376471\n", + " 0.332353\n", + " 0.311765\n", + " 0.270588\n", + " 0.219608\n", + " 0.108382\n", + " 0.063676\n", + " 0.029941\n", + " 0.016191\n", " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " name ndcg_cut.10 recip_rank recall_1000\n", - "0 BM25 0.374041 0.579877 0.825376" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# run file von Basic BM25 ohne QE\n", - "bm25 = pt.io.read_results('../runs/runBasicQuery.txt')\n", - "\n", - "pt.Experiment(\n", - " [bm25],\n", - " pt_dataset.get_topics(),\n", - " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut.10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
namendcg_cut.10recip_rankrecall_1000
0BM250.0066470.0169210.0348041BM25QE0.2412210.3857100.8253760.2352940.2147060.1921570.1676470.1372550.0789710.0512500.0272940.016191
\n", "
" ], "text/plain": [ - " name ndcg_cut.10 recip_rank recall_1000\n", - "0 BM25 0.006647 0.016921 0.034804" + " name ndcg_cut.10 recip_rank recall_1000 P@5 P@10 P@15 \\\n", + "0 BM25 0.374041 0.579877 0.825376 0.376471 0.332353 0.311765 \n", + "1 BM25QE 0.241221 0.385710 0.825376 0.235294 0.214706 0.192157 \n", + "\n", + " P@20 P@30 P@100 P@200 P@500 P@1000 \n", + "0 0.270588 0.219608 0.108382 0.063676 0.029941 0.016191 \n", + "1 0.167647 0.137255 0.078971 0.051250 0.027294 0.016191 " ] }, - "execution_count": 9, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# run.txt aktuell von main.py, da im notebook von baseline.ipynb die Funktion 'persist_and_normalize_run()' auskommentiert wurde\n", - "bm25 = pt.io.read_results('../runs/run.txt')\n", + "# run file von Basic BM25 ohne QE\n", + "bm25 = pt.io.read_results('../runs/runBasicQuery.txt')\n", + "bm25QE = pt.io.read_results('../runs/run.txt')\n", "\n", "pt.Experiment(\n", - " [bm25],\n", + " [bm25, bm25QE],\n", " pt_dataset.get_topics(),\n", " pt_dataset.get_qrels(),\n", - " [\"ndcg_cut.10\", \"recip_rank\", \"recall_1000\"],\n", - " names=[\"BM25\"]\n", + " [\"ndcg_cut.10\", \"recip_rank\", \"recall_1000\", \"P\"],\n", + " names=[\"BM25\", \"BM25QE\"]\n", ")" ] }