diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 14ed6869e..55af5d198 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -556,7 +556,7 @@ jobs:
           path: |
             data
             !data/*.zip
-          key: test-mldata-000-2ee919d5c0eef34d5a4f40bcf0480c1bf0310417db6921e3a2575c48991f379c2f4ad179f8514390133795614a96fa5b4ece55906c68a90af07c09670b2c3c5b
+          key: test-mldata-001-2ee919d5c0eef34d5a4f40bcf0480c1bf0310417db6921e3a2575c48991f379c2f4ad179f8514390133795614a96fa5b4ece55906c68a90af07c09670b2c3c5b
       - name: Download ML data
         run: |
           python -m lenskit.data.fetch ml-100k ml-20m
@@ -613,7 +613,7 @@ jobs:
           path: |
             data
             !data/*.zip
-          key: test-mldata-000-cd26f1c44a6962b0936346b346a9b418a3ed04b01a2892269fccd24a6387e943dba6d5e64ab2f8feb1823475601d65c2e6ebbeeeca0c2c210f0d37c00aabf2e9
+          key: test-mldata-001-cd26f1c44a6962b0936346b346a9b418a3ed04b01a2892269fccd24a6387e943dba6d5e64ab2f8feb1823475601d65c2e6ebbeeeca0c2c210f0d37c00aabf2e9
       - name: Download ML data
         run: |
           python -m lenskit.data.fetch ml-100k ml-1m ml-10m ml-20m
diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt
index b7e0c9ac0..2882dffb6 100644
--- a/.vscode/ltex.dictionary.en-US.txt
+++ b/.vscode/ltex.dictionary.en-US.txt
@@ -9,3 +9,4 @@ lenskit
 invoker
 CUDA
 subpackages
+recomputation
diff --git a/conftest.py b/conftest.py
index 277df074b..9deb7043d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -15,6 +15,7 @@
 from pytest import fixture, skip
 
 from lenskit.parallel import ensure_parallel_init
+from lenskit.util.test import ml_100k, ml_ds, ml_ratings  # noqa: F401
 
 logging.getLogger("numba").setLevel(logging.INFO)
 
diff --git a/docs/GettingStarted.ipynb b/docs/GettingStarted.ipynb
index 9742b32e7..04c933104 100644
--- a/docs/GettingStarted.ipynb
+++ b/docs/GettingStarted.ipynb
@@ -26,8 +26,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from lenskit.datasets import ML100K\n",
     "from lenskit.data import from_interactions_df\n",
+    "from lenskit.data.movielens import load_movielens_df\n",
     "from lenskit import batch, topn, util\n",
     "from lenskit import crossfold as xf\n",
     "from lenskit.algorithms import Recommender, als, knn\n",
@@ -77,7 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -156,15 +156,14 @@
        "4   166   346     1.0  886397596"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "ml100k = ML100K(here('data/ml-100k'))\n",
-    "ratings = ml100k.ratings\n",
-    "ratings.head()"
+    "ml100k = load_movielens_df(here('data/ml-100k.zip'))\n",
+    "ml100k.head()"
    ]
   },
   {
@@ -178,7 +177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -210,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -235,22 +234,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/mde48/LensKit/lkpy/lenskit/lenskit/data/matrix.py:152: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /Users/runner/miniforge3/conda-bld/libtorch_1716578890680/work/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)\n",
-      "  matrix = matrix.to_sparse_csr()\n"
+      "/Users/mde48/LensKit/lkpy/lenskit/lenskit/data/dataset.py:628: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /Users/runner/miniforge3/conda-bld/libtorch_1719361060788/work/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)\n",
+      "  return torch.sparse_csr_tensor(\n"
      ]
     }
    ],
    "source": [
     "all_recs = []\n",
     "test_data = []\n",
-    "for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):\n",
+    "for train, test in xf.partition_users(ml100k[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):\n",
     "    test_data.append(test)\n",
     "    all_recs.append(eval('ItemItem', algo_ii, train, test))\n",
     "    all_recs.append(eval('ALS', algo_als, train, test))"
@@ -265,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -299,32 +298,32 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1125</td>\n",
-       "      <td>5.014371</td>\n",
+       "      <td>1449</td>\n",
+       "      <td>4.994975</td>\n",
        "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>ItemItem</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1449</td>\n",
-       "      <td>4.967544</td>\n",
+       "      <td>1398</td>\n",
+       "      <td>4.866851</td>\n",
        "      <td>2</td>\n",
        "      <td>2</td>\n",
        "      <td>ItemItem</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>427</td>\n",
-       "      <td>4.863028</td>\n",
+       "      <td>511</td>\n",
+       "      <td>4.845399</td>\n",
        "      <td>2</td>\n",
        "      <td>3</td>\n",
        "      <td>ItemItem</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>483</td>\n",
-       "      <td>4.855851</td>\n",
+       "      <td>1512</td>\n",
+       "      <td>4.805413</td>\n",
        "      <td>2</td>\n",
        "      <td>4</td>\n",
        "      <td>ItemItem</td>\n",
@@ -332,7 +331,7 @@
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>1594</td>\n",
-       "      <td>4.846334</td>\n",
+       "      <td>4.788468</td>\n",
        "      <td>2</td>\n",
        "      <td>5</td>\n",
        "      <td>ItemItem</td>\n",
@@ -343,14 +342,14 @@
       ],
       "text/plain": [
        "   item     score  user  rank Algorithm\n",
-       "0  1125  5.014371     2     1  ItemItem\n",
-       "1  1449  4.967544     2     2  ItemItem\n",
-       "2   427  4.863028     2     3  ItemItem\n",
-       "3   483  4.855851     2     4  ItemItem\n",
-       "4  1594  4.846334     2     5  ItemItem"
+       "0  1449  4.994975     2     1  ItemItem\n",
+       "1  1398  4.866851     2     2  ItemItem\n",
+       "2   511  4.845399     2     3  ItemItem\n",
+       "3  1512  4.805413     2     4  ItemItem\n",
+       "4  1594  4.788468     2     5  ItemItem"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -369,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -387,7 +386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -427,27 +426,27 @@
        "      <th rowspan=\"5\" valign=\"top\">ItemItem</th>\n",
        "      <th>2</th>\n",
        "      <td>100</td>\n",
-       "      <td>0.085382</td>\n",
+       "      <td>0.081186</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7</th>\n",
+       "      <th>6</th>\n",
        "      <td>100</td>\n",
-       "      <td>0.223133</td>\n",
+       "      <td>0.288946</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>100</td>\n",
-       "      <td>0.097582</td>\n",
+       "      <td>0.082112</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
+       "      <th>10</th>\n",
        "      <td>100</td>\n",
-       "      <td>0.063818</td>\n",
+       "      <td>0.364167</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10</th>\n",
+       "      <th>14</th>\n",
        "      <td>100</td>\n",
-       "      <td>0.211332</td>\n",
+       "      <td>0.182636</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -456,14 +455,14 @@
       "text/plain": [
        "                nrecs      ndcg\n",
        "Algorithm user                 \n",
-       "ItemItem  2       100  0.085382\n",
-       "          7       100  0.223133\n",
-       "          8       100  0.097582\n",
-       "          9       100  0.063818\n",
-       "          10      100  0.211332"
+       "ItemItem  2       100  0.081186\n",
+       "          6       100  0.288946\n",
+       "          8       100  0.082112\n",
+       "          10      100  0.364167\n",
+       "          14      100  0.182636"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -484,19 +483,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "Algorithm\n",
-       "ALS         0.140061\n",
-       "ItemItem    0.099664\n",
+       "ALS         0.132649\n",
+       "ItemItem    0.096963\n",
        "Name: ndcg, dtype: float64"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -507,7 +506,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -516,13 +515,13 @@
        "<Axes: xlabel='Algorithm'>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     },
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAHhCAYAAABN6eUeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAtn0lEQVR4nO3df1SUdd7/8dfwayZL0ERBXUTw3lW8ScvhXm8wPLVrmHZn3mtFv3BPanvTaTNgve809C5tkzbNmyyB1aDWzqbsru2pVjJpSw8GJ1cC6xS33W0i3jYcg4rRLMDh+v7hcb73NGAOGvMBno9zrnO8PvO+rnlfHad5+bl+jM2yLEsAAAAGCwl2AwAAAN+FwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYLywYDdwsXR1denTTz/V0KFDZbPZgt0OAAA4D5Zl6cSJExozZoxCQnqeRxkwgeXTTz9VXFxcsNsAAAC9cPToUf3gBz/o8fUBE1iGDh0q6cwBR0ZGBrkbAABwPtxut+Li4rzf4z0ZMIHl7GmgyMhIAgsAAP3Md13OwUW3AADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADBerwJLUVGREhIS5HA45HQ6VVVV1WOty+XSHXfcoYkTJyokJEQ5OTnn3Pf27dtls9k0f/783rQGAAAGoIADS3l5uXJycpSfn6+6ujqlp6drzpw5ampq6ra+vb1dI0eOVH5+vqZOnXrOfR85ckTLli1Tenp6oG0BAIABLODAsmHDBi1evFhLlixRUlKSCgsLFRcXp+Li4m7rx48fr6eeekoLFy5UVFRUj/v1eDy68847tXr1aiUmJgbaFgAAGMACCiwdHR2qra1VRkaGz3hGRoaqq6svqJE1a9Zo5MiRWrx48XnVt7e3y+12+ywAAGBgCgukuKWlRR6PRzExMT7jMTExam5u7nUTb7/9tkpLS1VfX3/e2xQUFGj16tW9fs+BZPzyncFuAX2o8fEbgt0CAPS5Xl10a7PZfNYty/IbO18nTpzQXXfdpS1btig6Ovq8t1uxYoXa2tq8y9GjR3v1/gAAwHwBzbBER0crNDTUbzbl+PHjfrMu5+vvf/+7GhsbdeONN3rHurq6zjQXFqZDhw5pwoQJftvZ7XbZ7fZevScAAOhfApphiYiIkNPpVGVlpc94ZWWl0tLSetXApEmT9P7776u+vt67zJs3T9dee63q6+sVFxfXq/0CAICBI6AZFknKy8tTVlaWUlJSlJqaqs2bN6upqUnZ2dmSzpyqOXbsmLZu3erd5uy1KSdPntRnn32m+vp6RUREaPLkyXI4HEpOTvZ5j2HDhkmS3zgAABicAg4smZmZam1t1Zo1a+RyuZScnKyKigrFx8dLOvOguG8/k+Wqq67y/rm2tlYvvvii4uPj1djYeGHdAwCAQcFmWZYV7CYuBrfbraioKLW1tSkyMjLY7fQp7hIaXLhLCMBAcr7f3/yWEAAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGC8XgWWoqIiJSQkyOFwyOl0qqqqqsdal8ulO+64QxMnTlRISIhycnL8arZs2aL09HQNHz5cw4cP16xZs7R///7etAYAAAaggANLeXm5cnJylJ+fr7q6OqWnp2vOnDlqamrqtr69vV0jR45Ufn6+pk6d2m3Nnj17dPvtt+utt95STU2Nxo0bp4yMDB07dizQ9gAAwABksyzLCmSD6dOna9q0aSouLvaOJSUlaf78+SooKDjnttdcc42uvPJKFRYWnrPO4/Fo+PDheuaZZ7Rw4cJua9rb29Xe3u5dd7vdiouLU1tbmyIjI8//gAaA8ct3BrsF9KHGx28IdgsAcNG43W5FRUV95/d3QDMsHR0dqq2tVUZGhs94RkaGqqure9dpN06dOqXOzk5dfvnlPdYUFBQoKirKu8TFxV209wcAAGYJKLC0tLTI4/EoJibGZzwmJkbNzc0Xranly5dr7NixmjVrVo81K1asUFtbm3c5evToRXt/AABglrDebGSz2XzWLcvyG+utJ554Qtu2bdOePXvkcDh6rLPb7bLb7RflPQEAgNkCCizR0dEKDQ31m005fvy436xLb6xfv15r167VG2+8oSlTplzw/gAAwMAQ0CmhiIgIOZ1OVVZW+oxXVlYqLS3tghpZt26dHn30Ue3atUspKSkXtC8AADCwBHxKKC8vT1lZWUpJSVFqaqo2b96spqYmZWdnSzpzbcmxY8e0detW7zb19fWSpJMnT+qzzz5TfX29IiIiNHnyZElnTgOtWrVKL774osaPH++dwbnssst02WWXXegxAgCAfi7gwJKZmanW1latWbNGLpdLycnJqqioUHx8vKQzD4r79jNZrrrqKu+fa2tr9eKLLyo+Pl6NjY2SzjyIrqOjQzfffLPPdg8//LAeeeSRQFsEAAADTMDPYTHV+d7HPRDxHJbBheewABhIvpfnsAAAAAQDgQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeL0KLEVFRUpISJDD4ZDT6VRVVVWPtS6XS3fccYcmTpyokJAQ5eTkdFu3Y8cOTZ48WXa7XZMnT9af//zn3rQGAAAGoIADS3l5uXJycpSfn6+6ujqlp6drzpw5ampq6ra+vb1dI0eOVH5+vqZOndptTU1NjTIzM5WVlaWDBw8qKytLt956q955551A2wMAAAOQzbIsK5ANpk+frmnTpqm4uNg7lpSUpPnz56ugoOCc215zzTW68sorVVhY6DOemZkpt9ut1157zTt2/fXXa/jw4dq2bVu3+2pvb1d7e7t33e12Ky4uTm1tbYqMjAzkkPq98ct3BrsF9KHGx28IdgsAcNG43W5FRUV95/d3QDMsHR0dqq2tVUZGhs94RkaGqqure9epzsywfHufs2fPPuc+CwoKFBUV5V3i4uJ6/f4AAMBsYYEUt7S0yOPxKCYmxmc8JiZGzc3NvW6iubk54H2uWLFCeXl53vWzMywAMJAwgzq4MIPas4ACy1k2m81n3bIsv7Hve592u112u/2C3hMAAPQPAZ0Sio6OVmhoqN/Mx/Hjx/1mSAIRGxt70fcJAAAGjoACS0REhJxOpyorK33GKysrlZaW1usmUlNT/fa5e/fuC9onAAAYOAI+JZSXl6esrCylpKQoNTVVmzdvVlNTk7KzsyWdubbk2LFj2rp1q3eb+vp6SdLJkyf12Wefqb6+XhEREZo8ebIk6YEHHtDMmTP1m9/8RjfddJNefvllvfHGG9q3b99FOEQAANDfBRxYMjMz1draqjVr1sjlcik5OVkVFRWKj4+XdOZBcd9+JstVV13l/XNtba1efPFFxcfHq7GxUZKUlpam7du3a+XKlVq1apUmTJig8vJyTZ8+/QIODQAADBQBP4fFVOd7H/dAxF0Egwt3EQwufL4Hl8H4+f5ensMCAAAQDAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOP1KrAUFRUpISFBDodDTqdTVVVV56zfu3evnE6nHA6HEhMTVVJS4ldTWFioiRMn6pJLLlFcXJxyc3P1zTff9KY9AAAwwAQcWMrLy5WTk6P8/HzV1dUpPT1dc+bMUVNTU7f1hw8f1ty5c5Wenq66ujo99NBDWrp0qXbs2OGt+f3vf6/ly5fr4YcfVkNDg0pLS1VeXq4VK1b0/sgAAMCAERboBhs2bNDixYu1ZMkSSWdmRl5//XUVFxeroKDAr76kpETjxo1TYWGhJCkpKUkHDhzQ+vXrtWDBAklSTU2NZsyYoTvuuEOSNH78eN1+++3av39/b48LAAAMIAHNsHR0dKi2tlYZGRk+4xkZGaquru52m5qaGr/62bNn68CBA+rs7JQkXX311aqtrfUGlE8++UQVFRW64YYbeuylvb1dbrfbZwEAAANTQDMsLS0t8ng8iomJ8RmPiYlRc3Nzt9s0Nzd3W3/69Gm1tLRo9OjRuu222/TZZ5/p6quvlmVZOn36tO69914tX768x14KCgq0evXqQNoHAAD9VK8uurXZbD7rlmX5jX1X/f8d37Nnjx577DEVFRXp3Xff1UsvvaS//OUvevTRR3vc54oVK9TW1uZdjh492ptDAQAA/UBAMyzR0dEKDQ31m005fvy43yzKWbGxsd3Wh4WFacSIEZKkVatWKSsry3tdzBVXXKGvvvpKv/jFL5Sfn6+QEP9cZbfbZbfbA2kfAAD0UwHNsERERMjpdKqystJnvLKyUmlpad1uk5qa6le/e/dupaSkKDw8XJJ06tQpv1ASGhoqy7K8szEAAGDwCviUUF5enp599lmVlZWpoaFBubm5ampqUnZ2tqQzp2oWLlzorc/OztaRI0eUl5enhoYGlZWVqbS0VMuWLfPW3HjjjSouLtb27dt1+PBhVVZWatWqVZo3b55CQ0MvwmECAID+LODbmjMzM9Xa2qo1a9bI5XIpOTlZFRUVio+PlyS5XC6fZ7IkJCSooqJCubm52rRpk8aMGaONGzd6b2mWpJUrV8pms2nlypU6duyYRo4cqRtvvFGPPfbYRThEAADQ39msAXLOxe12KyoqSm1tbYqMjAx2O31q/PKdwW4Bfajx8Z5v98fAw+d7cBmMn+/z/f7mt4QAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxutVYCkqKlJCQoIcDoecTqeqqqrOWb937145nU45HA4lJiaqpKTEr+bLL7/Ufffdp9GjR8vhcCgpKUkVFRW9aQ8AAAwwAQeW8vJy5eTkKD8/X3V1dUpPT9ecOXPU1NTUbf3hw4c1d+5cpaenq66uTg899JCWLl2qHTt2eGs6Ojp03XXXqbGxUX/605906NAhbdmyRWPHju39kQEAgAEjLNANNmzYoMWLF2vJkiWSpMLCQr3++usqLi5WQUGBX31JSYnGjRunwsJCSVJSUpIOHDig9evXa8GCBZKksrIyff7556qurlZ4eLgkKT4+vrfHBAAABpiAZlg6OjpUW1urjIwMn/GMjAxVV1d3u01NTY1f/ezZs3XgwAF1dnZKkl555RWlpqbqvvvuU0xMjJKTk7V27Vp5PJ4ee2lvb5fb7fZZAADAwBRQYGlpaZHH41FMTIzPeExMjJqbm7vdprm5udv606dPq6WlRZL0ySef6E9/+pM8Ho8qKiq0cuVKPfnkk3rsscd67KWgoEBRUVHeJS4uLpBDAQAA/UivLrq12Ww+65Zl+Y19V/3/He/q6tKoUaO0efNmOZ1O3XbbbcrPz1dxcXGP+1yxYoXa2tq8y9GjR3tzKAAAoB8I6BqW6OhohYaG+s2mHD9+3G8W5azY2Nhu68PCwjRixAhJ0ujRoxUeHq7Q0FBvTVJSkpqbm9XR0aGIiAi//drtdtnt9kDaBwAA/VRAMywRERFyOp2qrKz0Ga+srFRaWlq326SmpvrV7969WykpKd4LbGfMmKGPP/5YXV1d3pqPPvpIo0eP7jasAACAwSXgU0J5eXl69tlnVVZWpoaGBuXm5qqpqUnZ2dmSzpyqWbhwobc+OztbR44cUV5enhoaGlRWVqbS0lItW7bMW3PvvfeqtbVVDzzwgD766CPt3LlTa9eu1X333XcRDhEAAPR3Ad/WnJmZqdbWVq1Zs0Yul0vJycmqqKjw3obscrl8nsmSkJCgiooK5ebmatOmTRozZow2btzovaVZkuLi4rR7927l5uZqypQpGjt2rB544AE9+OCDF+EQAQBAf2ezzl4B28+53W5FRUWpra1NkZGRwW6nT41fvjPYLaAPNT5+Q7BbQB/i8z24DMbP9/l+f/NbQgAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxehVYioqKlJCQIIfDIafTqaqqqnPW7927V06nUw6HQ4mJiSopKemxdvv27bLZbJo/f35vWgMAAANQwIGlvLxcOTk5ys/PV11dndLT0zVnzhw1NTV1W3/48GHNnTtX6enpqqur00MPPaSlS5dqx44dfrVHjhzRsmXLlJ6eHviRAACAASvgwLJhwwYtXrxYS5YsUVJSkgoLCxUXF6fi4uJu60tKSjRu3DgVFhYqKSlJS5Ys0aJFi7R+/XqfOo/HozvvvFOrV69WYmLid/bR3t4ut9vtswAAgIEpoMDS0dGh2tpaZWRk+IxnZGSourq6221qamr86mfPnq0DBw6os7PTO7ZmzRqNHDlSixcvPq9eCgoKFBUV5V3i4uICORQAANCPBBRYWlpa5PF4FBMT4zMeExOj5ubmbrdpbm7utv706dNqaWmRJL399tsqLS3Vli1bzruXFStWqK2tzbscPXo0kEMBAAD9SFhvNrLZbD7rlmX5jX1X/dnxEydO6K677tKWLVsUHR193j3Y7XbZ7fYAugYAAP1VQIElOjpaoaGhfrMpx48f95tFOSs2Nrbb+rCwMI0YMUIffPCBGhsbdeONN3pf7+rqOtNcWJgOHTqkCRMmBNImAAAYYAI6JRQRESGn06nKykqf8crKSqWlpXW7TWpqql/97t27lZKSovDwcE2aNEnvv/++6uvrvcu8efN07bXXqr6+nmtTAABA4KeE8vLylJWVpZSUFKWmpmrz5s1qampSdna2pDPXlhw7dkxbt26VJGVnZ+uZZ55RXl6e7rnnHtXU1Ki0tFTbtm2TJDkcDiUnJ/u8x7BhwyTJbxwAAAxOAQeWzMxMtba2as2aNXK5XEpOTlZFRYXi4+MlSS6Xy+eZLAkJCaqoqFBubq42bdqkMWPGaOPGjVqwYMHFOwoAADCg2ayzV8D2c263W1FRUWpra1NkZGSw2+lT45fvDHYL6EONj98Q7BbQh/h8Dy6D8fN9vt/f/JYQAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHi9CixFRUVKSEiQw+GQ0+lUVVXVOev37t0rp9Mph8OhxMRElZSU+Ly+ZcsWpaena/jw4Ro+fLhmzZql/fv396Y1AAAwAAUcWMrLy5WTk6P8/HzV1dUpPT1dc+bMUVNTU7f1hw8f1ty5c5Wenq66ujo99NBDWrp0qXbs2OGt2bNnj26//Xa99dZbqqmp0bhx45SRkaFjx471/sgAAMCAYbMsywpkg+nTp2vatGkqLi72jiUlJWn+/PkqKCjwq3/wwQf1yiuvqKGhwTuWnZ2tgwcPqqamptv38Hg8Gj58uJ555hktXLiw25r29na1t7d7191ut+Li4tTW1qbIyMhADqnfG798Z7BbQB9qfPyGYLeAPsTne3AZjJ9vt9utqKio7/z+DmiGpaOjQ7W1tcrIyPAZz8jIUHV1dbfb1NTU+NXPnj1bBw4cUGdnZ7fbnDp1Sp2dnbr88st77KWgoEBRUVHeJS4uLpBDAQAA/UhAgaWlpUUej0cxMTE+4zExMWpubu52m+bm5m7rT58+rZaWlm63Wb58ucaOHatZs2b12MuKFSvU1tbmXY4ePRrIoQAAgH4krDcb2Ww2n3XLsvzGvqu+u3FJeuKJJ7Rt2zbt2bNHDoejx33a7XbZ7fZA2gYAAP1UQIElOjpaoaGhfrMpx48f95tFOSs2Nrbb+rCwMI0YMcJnfP369Vq7dq3eeOMNTZkyJZDWAADAABbQKaGIiAg5nU5VVlb6jFdWViotLa3bbVJTU/3qd+/erZSUFIWHh3vH1q1bp0cffVS7du1SSkpKIG0BAIABLuDbmvPy8vTss8+qrKxMDQ0Nys3NVVNTk7KzsyWdubbk/97Zk52drSNHjigvL08NDQ0qKytTaWmpli1b5q154okntHLlSpWVlWn8+PFqbm5Wc3OzTp48eREOEQAA9HcBX8OSmZmp1tZWrVmzRi6XS8nJyaqoqFB8fLwkyeVy+TyTJSEhQRUVFcrNzdWmTZs0ZswYbdy4UQsWLPDWFBUVqaOjQzfffLPPez388MN65JFHenloAABgoAj4OSymOt/7uAcintMwuAzG5zQMZny+B5fB+Pn+Xp7DAgAAEAwEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADj9SqwFBUVKSEhQQ6HQ06nU1VVVees37t3r5xOpxwOhxITE1VSUuJXs2PHDk2ePFl2u12TJ0/Wn//85960BgAABqCAA0t5eblycnKUn5+vuro6paena86cOWpqauq2/vDhw5o7d67S09NVV1enhx56SEuXLtWOHTu8NTU1NcrMzFRWVpYOHjyorKws3XrrrXrnnXd6f2QAAGDAsFmWZQWywfTp0zVt2jQVFxd7x5KSkjR//nwVFBT41T/44IN65ZVX1NDQ4B3Lzs7WwYMHVVNTI0nKzMyU2+3Wa6+95q25/vrrNXz4cG3btu28+nK73YqKilJbW5siIyMDOaR+b/zyncFuAX2o8fEbgt0C+hCf78FlMH6+z/f7OyyQnXZ0dKi2tlbLly/3Gc/IyFB1dXW329TU1CgjI8NnbPbs2SotLVVnZ6fCw8NVU1Oj3Nxcv5rCwsIee2lvb1d7e7t3va2tTdKZAx9sutpPBbsF9KHB+Hd8MOPzPbgMxs/32WP+rvmTgAJLS0uLPB6PYmJifMZjYmLU3Nzc7TbNzc3d1p8+fVotLS0aPXp0jzU97VOSCgoKtHr1ar/xuLi48z0coF+KKgx2BwC+L4P5833ixAlFRUX1+HpAgeUsm83ms25Zlt/Yd9V/ezzQfa5YsUJ5eXne9a6uLn3++ecaMWLEObfDwOB2uxUXF6ejR48OulOAwEDH53twsSxLJ06c0JgxY85ZF1BgiY6OVmhoqN/Mx/Hjx/1mSM6KjY3ttj4sLEwjRow4Z01P+5Qku90uu93uMzZs2LDzPRQMEJGRkfwPDRig+HwPHueaWTkroLuEIiIi5HQ6VVlZ6TNeWVmptLS0brdJTU31q9+9e7dSUlIUHh5+zpqe9gkAAAaXgE8J5eXlKSsrSykpKUpNTdXmzZvV1NSk7OxsSWdO1Rw7dkxbt26VdOaOoGeeeUZ5eXm65557VFNTo9LSUp+7fx544AHNnDlTv/nNb3TTTTfp5Zdf1htvvKF9+/ZdpMMEAAD9WcCBJTMzU62trVqzZo1cLpeSk5NVUVGh+Ph4SZLL5fJ5JktCQoIqKiqUm5urTZs2acyYMdq4caMWLFjgrUlLS9P27du1cuVKrVq1ShMmTFB5ebmmT59+EQ4RA5HdbtfDDz/sd1oQQP/H5xvdCfg5LAAAAH2N3xICAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8Agv6pSNHjujDDz9UV1dXsFsBAPQBAguM9rvf/c7vV7t/8YtfKDExUVdccYWSk5N19OjR4DQHAOgzBBYYraSkxOc3Jnbt2qXnnntOW7du1d/+9jcNGzas21/tBtC/fPPNN1q3bp3mzp2rlJQUTZs2zWcBevVrzUBf+eijj5SSkuJdf/nllzVv3jzdeeedkqS1a9fq7rvvDlZ7AC6SRYsWqbKyUjfffLN+/OMfy2azBbslGIbAAqN9/fXXPr/WWl1drUWLFnnXExMT/X7pG0D/s3PnTlVUVGjGjBnBbgWG4pQQjBYfH6/a2lpJUktLiz744ANdffXV3tebm5vP62fJAZht7NixGjp0aLDbgMEILDDawoULdd999+nRRx/VLbfcokmTJsnpdHpfr66uVnJychA7BHAxPPnkk3rwwQd15MiRYLcCQ3FKCEZ78MEHderUKb300kuKjY3VH//4R5/X3377bd1+++1B6g7AxZKSkqJvvvlGiYmJGjJkiMLDw31e//zzz4PUGUzBrzWjX+vs7JTL5dK4ceOC3QqACzBr1iw1NTVp8eLFiomJ8bvo9uc//3mQOoMpCCzo1w4ePKhp06bJ4/EEuxUAF2DIkCGqqanR1KlTg90KDMU1LACAoJs0aZK+/vrrYLcBgxFYAABB9/jjj+tXv/qV9uzZo9bWVrndbp8F4JQQ+jVOCQEDQ0jImX8/f/vaFcuyZLPZ+IyDu4Rgtvfee++crx86dKiPOgHwfXrrrbeC3QIMxwwLjBYSEiKbzaZz/TXlX18AMPAxwwKjHT58+Dtrvvjiiz7oBMD3raqqSr/97W/1ySef6I9//KPGjh2rF154QQkJCT5PuMbgxEW3MFp8fHy3y7Bhw7Rz50797Gc/83nyLYD+aceOHZo9e7YuueQSvfvuu2pvb5cknThxQmvXrg1ydzABgQX9yptvvqm77rpLo0eP1tNPP605c+bowIEDwW4LwAX69a9/rZKSEm3ZssXnKbdpaWl69913g9gZTMEpIRjvf//3f/X888+rrKxMX331lW699VZ1dnZqx44dmjx5crDbA3ARHDp0SDNnzvQbj4yM1Jdfftn3DcE4zLDAaHPnztXkyZP14Ycf6umnn9ann36qp59+OthtAbjIRo8erY8//thvfN++fUpMTAxCRzANMyww2u7du7V06VLde++9+uEPfxjsdgB8T/7t3/5NDzzwgMrKymSz2fTpp5+qpqZGy5Yt03/+538Guz0YgMACo1VVVamsrEwpKSmaNGmSsrKylJmZGey2AFxk//Ef/6G2tjZde+21+uabbzRz5kzZ7XYtW7ZMv/zlL4PdHgzAc1jQL5w6dUrbt29XWVmZ9u/fL4/How0bNmjRokUaOnRosNsDcJGcOnVKH374obq6ujR58mRddtllwW4JhiCwoN85dOiQSktL9cILL+jLL7/Uddddp1deeSXYbQG4AIsWLdJTTz3l9w+Qr776Svfff7/KysqC1BlMQWBBv+XxePTqq6+qrKyMwAL0c6GhoXK5XBo1apTPeEtLi2JjY3X69OkgdQZTcA0L+q3Q0FDNnz9f8+fPD3YrAHrJ7XbLsixZlqUTJ07I4XB4X/N4PKqoqPALMRicCCwAgKAZNmyYbDabbDabfvSjH/m9brPZtHr16iB0BtNwSggAEDR79+6VZVn6yU9+oh07dujyyy/3vhYREaH4+HiNGTMmiB3CFAQWAEDQHTlyROPGjZPNZgt2KzAUgQUAEDTvvffeedVNmTLle+4EpiOwAACCJiQkRDabTef6KrLZbPJ4PH3YFUzERbcAgKA5fPhwsFtAP8EMCwAAMB4zLAAAI3zzzTd67733dPz4cXV1dfm8Nm/evCB1BVMQWAAAQbdr1y4tXLhQLS0tfq9xDQskKSTYDQAA8Mtf/lK33HKLXC6Xurq6fBbCCiSuYQEAGCAyMlJ1dXWaMGFCsFuBoZhhAQAE3c0336w9e/YEuw0YjBkWAEDQnTp1SrfccotGjhypK664QuHh4T6vL126NEidwRQEFgBA0D377LPKzs7WJZdcohEjRvg8ot9ms+mTTz4JYncwAYEFABB0sbGxWrp0qZYvX66QEK5WgD/+VgAAgq6jo0OZmZmEFfSIvxkAgKD7+c9/rvLy8mC3AYPx4DgAQNB5PB498cQTev311zVlyhS/i243bNgQpM5gCq5hAQAE3bXXXtvjazabTW+++WYfdgMTEVgAAIDxuIYFAGCMjz/+WK+//rq+/vprSRL/psZZBBYAQNC1trbqpz/9qX70ox9p7ty5crlckqQlS5boV7/6VZC7gwkILACAoMvNzVV4eLiampo0ZMgQ73hmZqZ27doVxM5gCu4SAgAE3e7du/X666/rBz/4gc/4D3/4Qx05ciRIXcEkzLAAAILuq6++8plZOaulpUV2uz0IHcE0BBYAQNDNnDlTW7du9a7bbDZ1dXVp3bp157zlGYMHtzUDAILuww8/1DXXXCOn06k333xT8+bN0wcffKDPP/9cb7/9tiZMmBDsFhFkBBYAgBGam5tVXFys2tpadXV1adq0abrvvvs0evToYLcGAxBYAABB19TUpLi4ONlstm5fGzduXBC6gkkILACAoAsNDZXL5dKoUaN8xltbWzVq1Ch5PJ4gdQZTcNEtACDoLMvqdnbl5MmTcjgcQegIpuE5LACAoMnLy5N05q6gVatW+dza7PF49M477+jKK68MUncwCYEFABA0dXV1ks7MsLz//vuKiIjwvhYREaGpU6dq2bJlwWoPBuEaFgBA0N19993auHGjhg4dGuxWYCgCCwAgaH72s5+dV91LL730PXcC03FKCAAQNFFRUcFuAf0EMywAAMB43NYMAACMR2ABAADGI7AAAADjEVgAAIDxCCwALtiePXtks9n05ZdfGvNe48ePV2Fh4ffeD4C+QWABcN6qq6sVGhqq66+/Pmg9pKWlyeVyeW+Hff755zVs2LCg9QOgbxBYAJy3srIy3X///dq3b5+ampr6/P07OzsVERGh2NjYbn8oD8DARWABcF6++uor/eEPf9C9996rf/mXf9Hzzz9/zvotW7YoLi5OQ4YM0b/+679qw4YNfjMhxcXFmjBhgiIiIjRx4kS98MILPq/bbDaVlJTopptu0qWXXqpf//rXPqeE9uzZo7vvvlttbW2y2Wyy2Wx65JFHvNufOnVKixYt0tChQzVu3Dht3rzZ+1pjY6NsNpv+8Ic/KD09XZdccon+6Z/+SR999JH+9re/KSUlRZdddpmuv/56ffbZZxf6nw/AhbIA4DyUlpZaKSkplmVZ1quvvmqNHz/e6urqsizLst566y1LkvXFF19YlmVZ+/bts0JCQqx169ZZhw4dsjZt2mRdfvnlVlRUlHd/L730khUeHm5t2rTJOnTokPXkk09aoaGh1ptvvumtkWSNGjXKKi0ttf7+979bjY2NPu/V3t5uFRYWWpGRkZbL5bJcLpd14sQJy7IsKz4+3rr88sutTZs2Wf/zP/9jFRQUWCEhIVZDQ4NlWZZ1+PBhS5I1adIka9euXdaHH35o/fM//7M1bdo065prrrH27dtnvfvuu9Y//MM/WNnZ2X3wXxjAuRBYAJyXtLQ0q7Cw0LIsy+rs7LSio6OtyspKy7L8A0tmZqZ1ww03+Gx/5513+gSWtLQ065577vGpueWWW6y5c+d61yVZOTk5PjXffq/nnnvOZ79nxcfHW3fddZd3vauryxo1apRVXFxsWdb/DyzPPvust2bbtm2WJOuvf/2rd6ygoMCaOHHiuf7TAOgDnBIC8J0OHTqk/fv367bbbpMkhYWFKTMzU2VlZT3W//jHP/YZ+/Z6Q0ODZsyY4TM2Y8YMNTQ0+IylpKT0uu8pU6Z4/2yz2RQbG6vjx4/3WBMTEyNJuuKKK3zGvr0NgL7Hjx8C+E6lpaU6ffq0xo4d6x2zLEvh4eH64osv/Ooty/K7KNbq5mfLuqv59till17a677Dw8P93q+rq6vHmrPv/e2xb28DoO8xwwLgnE6fPq2tW7fqySefVH19vXc5ePCg4uPj9fvf/95vm0mTJmn//v0+YwcOHPBZT0pK0r59+3zGqqurlZSUFFB/ERER8ng8AW0DoP9hhgXAOf3lL3/RF198ocWLF3uffXLWzTffrNLSUv3Xf/2Xz/j999+vmTNnasOGDbrxxhv15ptv6rXXXvOZPfn3f/933XrrrZo2bZp++tOf6tVXX9VLL72kN954I6D+xo8fr5MnT+qvf/2rpk6dqiFDhmjIkCG9P2AARmKGBcA5lZaWatasWX5hRZIWLFig+vp6vfvuuz7jM2bMUElJiTZs2KCpU6dq165dys3NlcPh8NbMnz9fTz31lNatW6d//Md/1G9/+1s999xzuuaaawLqLy0tTdnZ2crMzNTIkSP1xBNP9Oo4AZjNZnV3YhkALrJ77rlH//3f/62qqqpgtwKgH+KUEIDvxfr163Xdddfp0ksv1Wuvvabf/e53KioqCnZbAPopZlgAfC9uvfVW7dmzRydOnFBiYqLuv/9+ZWdnB7stAP0UgQUAABiPi24BAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOP9Pz0IvsSzsXRNAAAAAElFTkSuQmCC",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAHhCAYAAABN6eUeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArW0lEQVR4nO3df1BV953/8dcV+RETwd+gFhHcVnGJpl62Lhgc0xqMZmPcaiS/sBM1XTI2CtRdf6CbxDSSqnGpiUBVSGqmUdpiJ8lKVKzR0cDEStB0Ims2GxXXXMZAEvBHBLyc7x+O97s3F4wXDecDPB8zZ8b7Oe9z7vs4XnnxOT+uw7IsSwAAAAbrYXcDAAAA34bAAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgvJ52N3CrtLS06LPPPlPv3r3lcDjsbgcAANwAy7J0/vx5DRkyRD16tD2P0mUCy2effabIyEi72wAAAO1w5swZfe9732tzfZcJLL1795Z09YBDQ0Nt7gYAANyIhoYGRUZGen6Ot6XLBJZrp4FCQ0MJLAAAdDLfdjkHF90CAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjNfT7gZw84Yv3Wl3C+hAp1683+4WAKDDMcMCAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB47Qosubm5io6OVkhIiJxOpw4ePNhmrcvl0qOPPqqRI0eqR48eSk9P96nZvHmzkpKS1LdvX/Xt21eTJ0/W4cOH29MaAADogvwOLEVFRUpPT1dWVpYqKyuVlJSkqVOnqrq6utX6xsZGDRw4UFlZWRo7dmyrNfv379cjjzyid999V+Xl5Ro2bJiSk5N19uxZf9sDAABdkMOyLMufDcaPH69x48YpLy/PMxYbG6sZM2YoOzv7uttOmjRJd911l3Jycq5b53a71bdvX73yyiuaM2dOqzWNjY1qbGz0vG5oaFBkZKTq6+sVGhp64wfUBQxfutPuFtCBTr14v90tAMAt09DQoLCwsG/9+e3XDEtTU5MqKiqUnJzsNZ6cnKyysrL2ddqKS5cuqbm5Wf369WuzJjs7W2FhYZ4lMjLylr0/AAAwi1+Bpba2Vm63W+Hh4V7j4eHhqqmpuWVNLV26VEOHDtXkyZPbrFm2bJnq6+s9y5kzZ27Z+wMAALP0bM9GDofD67VlWT5j7bVmzRpt27ZN+/fvV0hISJt1wcHBCg4OviXvCQAAzOZXYBkwYIACAgJ8ZlPOnTvnM+vSHuvWrdPq1au1d+9ejRkz5qb3BwAAuga/TgkFBQXJ6XSqtLTUa7y0tFSJiYk31cjatWv1/PPPa9euXYqPj7+pfQEAgK7F71NCmZmZSk1NVXx8vBISErRp0yZVV1crLS1N0tVrS86ePautW7d6tjl69Kgk6cKFC/r888919OhRBQUFafTo0ZKungZauXKl3njjDQ0fPtwzg3PHHXfojjvuuNljBAAAnZzfgSUlJUV1dXVatWqVXC6X4uLiVFJSoqioKElXHxT3zWey/PCHP/T8uaKiQm+88YaioqJ06tQpSVcfRNfU1KRZs2Z5bffMM8/o2Wef9bdFAADQxfj9HBZT3eh93F0Rz2HpXngOC4Cu5Dt5DgsAAIAdCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeO0KLLm5uYqOjlZISIicTqcOHjzYZq3L5dKjjz6qkSNHqkePHkpPT2+1rri4WKNHj1ZwcLBGjx6tP//5z+1pDQAAdEF+B5aioiKlp6crKytLlZWVSkpK0tSpU1VdXd1qfWNjowYOHKisrCyNHTu21Zry8nKlpKQoNTVVx44dU2pqqmbPnq3333/f3/YAAEAX5LAsy/Jng/Hjx2vcuHHKy8vzjMXGxmrGjBnKzs6+7raTJk3SXXfdpZycHK/xlJQUNTQ06J133vGM3Xffferbt6+2bdvW6r4aGxvV2Njoed3Q0KDIyEjV19crNDTUn0Pq9IYv3Wl3C+hAp1683+4WAOCWaWhoUFhY2Lf+/PZrhqWpqUkVFRVKTk72Gk9OTlZZWVn7OtXVGZZv7nPKlCnX3Wd2drbCwsI8S2RkZLvfHwAAmM2vwFJbWyu3263w8HCv8fDwcNXU1LS7iZqaGr/3uWzZMtXX13uWM2fOtPv9AQCA2Xq2ZyOHw+H12rIsn7Hvep/BwcEKDg6+qfcEAACdg18zLAMGDFBAQIDPzMe5c+d8Zkj8ERERccv3CQAAug6/AktQUJCcTqdKS0u9xktLS5WYmNjuJhISEnz2uWfPnpvaJwAA6Dr8PiWUmZmp1NRUxcfHKyEhQZs2bVJ1dbXS0tIkXb225OzZs9q6datnm6NHj0qSLly4oM8//1xHjx5VUFCQRo8eLUlatGiRJk6cqF//+td68MEH9eabb2rv3r06dOjQLThEAADQ2fkdWFJSUlRXV6dVq1bJ5XIpLi5OJSUlioqKknT1QXHffCbLD3/4Q8+fKyoq9MYbbygqKkqnTp2SJCUmJmr79u1asWKFVq5cqREjRqioqEjjx4+/iUMDAABdhd/PYTHVjd7H3RXxHJbuheewAOhKbvTnd7vuEgIAdAx+Iele+IWkbXz5IQAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADBeuwJLbm6uoqOjFRISIqfTqYMHD163/sCBA3I6nQoJCVFMTIzy8/N9anJycjRy5EjddtttioyMVEZGhi5fvtye9gAAQBfjd2ApKipSenq6srKyVFlZqaSkJE2dOlXV1dWt1p88eVLTpk1TUlKSKisrtXz5ci1cuFDFxcWemt///vdaunSpnnnmGVVVVamgoEBFRUVatmxZ+48MAAB0GT393WD9+vWaN2+e5s+fL+nqzMju3buVl5en7Oxsn/r8/HwNGzZMOTk5kqTY2FgdOXJE69at08yZMyVJ5eXlmjBhgh599FFJ0vDhw/XII4/o8OHDbfbR2NioxsZGz+uGhgZ/DwUAAHQSfs2wNDU1qaKiQsnJyV7jycnJKisra3Wb8vJyn/opU6boyJEjam5uliTdfffdqqio8ASUTz/9VCUlJbr//vvb7CU7O1thYWGeJTIy0p9DAQAAnYhfgaW2tlZut1vh4eFe4+Hh4aqpqWl1m5qamlbrr1y5otraWknSww8/rOeff1533323AgMDNWLECN1zzz1aunRpm70sW7ZM9fX1nuXMmTP+HAoAAOhE/D4lJEkOh8PrtWVZPmPfVv9/x/fv368XXnhBubm5Gj9+vD755BMtWrRIgwcP1sqVK1vdZ3BwsIKDg9vTPgAA6GT8CiwDBgxQQECAz2zKuXPnfGZRromIiGi1vmfPnurfv78kaeXKlUpNTfVcF3PnnXfq4sWL+vnPf66srCz16MHd1wAAdGd+JYGgoCA5nU6VlpZ6jZeWlioxMbHVbRISEnzq9+zZo/j4eAUGBkqSLl265BNKAgICZFmWZzYGAAB0X35PXWRmZmrLli0qLCxUVVWVMjIyVF1drbS0NElXry2ZM2eOpz4tLU2nT59WZmamqqqqVFhYqIKCAi1evNhT88ADDygvL0/bt2/XyZMnVVpaqpUrV2r69OkKCAi4BYcJAAA6M7+vYUlJSVFdXZ1WrVoll8uluLg4lZSUKCoqSpLkcrm8nskSHR2tkpISZWRkaOPGjRoyZIg2bNjguaVZklasWCGHw6EVK1bo7NmzGjhwoB544AG98MILt+AQAQBAZ+ewusg5l4aGBoWFham+vl6hoaF2t9Ohhi/daXcL6ECnXmz7dn90PXy+u5fu+Pm+0Z/fXM0KAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxmtXYMnNzVV0dLRCQkLkdDp18ODB69YfOHBATqdTISEhiomJUX5+vk/NV199pQULFmjw4MEKCQlRbGysSkpK2tMeAADoYvwOLEVFRUpPT1dWVpYqKyuVlJSkqVOnqrq6utX6kydPatq0aUpKSlJlZaWWL1+uhQsXqri42FPT1NSke++9V6dOndKf/vQnnThxQps3b9bQoUPbf2QAAKDL6OnvBuvXr9e8efM0f/58SVJOTo52796tvLw8ZWdn+9Tn5+dr2LBhysnJkSTFxsbqyJEjWrdunWbOnClJKiws1BdffKGysjIFBgZKkqKiotp7TAAAoIvxa4alqalJFRUVSk5O9hpPTk5WWVlZq9uUl5f71E+ZMkVHjhxRc3OzJOmtt95SQkKCFixYoPDwcMXFxWn16tVyu91t9tLY2KiGhgavBQAAdE1+BZba2lq53W6Fh4d7jYeHh6umpqbVbWpqalqtv3LlimprayVJn376qf70pz/J7XarpKREK1as0EsvvaQXXnihzV6ys7MVFhbmWSIjI/05FAAA0Im066Jbh8Ph9dqyLJ+xb6v/v+MtLS0aNGiQNm3aJKfTqYcfflhZWVnKy8trc5/Lli1TfX29Zzlz5kx7DgUAAHQCfl3DMmDAAAUEBPjMppw7d85nFuWaiIiIVut79uyp/v37S5IGDx6swMBABQQEeGpiY2NVU1OjpqYmBQUF+ew3ODhYwcHB/rQPAAA6Kb9mWIKCguR0OlVaWuo1XlpaqsTExFa3SUhI8Knfs2eP4uPjPRfYTpgwQZ988olaWlo8NR9//LEGDx7calgBAADdi9+nhDIzM7VlyxYVFhaqqqpKGRkZqq6uVlpamqSrp2rmzJnjqU9LS9Pp06eVmZmpqqoqFRYWqqCgQIsXL/bUPPXUU6qrq9OiRYv08ccfa+fOnVq9erUWLFhwCw4RAAB0dn7f1pySkqK6ujqtWrVKLpdLcXFxKikp8dyG7HK5vJ7JEh0drZKSEmVkZGjjxo0aMmSINmzY4LmlWZIiIyO1Z88eZWRkaMyYMRo6dKgWLVqkJUuW3IJDBAAAnZ3DunYFbCfX0NCgsLAw1dfXKzQ01O52OtTwpTvtbgEd6NSL99vdAjoQn+/upTt+vm/05zffJQQAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGa1dgyc3NVXR0tEJCQuR0OnXw4MHr1h84cEBOp1MhISGKiYlRfn5+m7Xbt2+Xw+HQjBkz2tMaAADogvwOLEVFRUpPT1dWVpYqKyuVlJSkqVOnqrq6utX6kydPatq0aUpKSlJlZaWWL1+uhQsXqri42Kf29OnTWrx4sZKSkvw/EgAA0GX5HVjWr1+vefPmaf78+YqNjVVOTo4iIyOVl5fXan1+fr6GDRumnJwcxcbGav78+Zo7d67WrVvnVed2u/XYY4/pueeeU0xMTPuOBgAAdEl+BZampiZVVFQoOTnZazw5OVllZWWtblNeXu5TP2XKFB05ckTNzc2esVWrVmngwIGaN2/eDfXS2NiohoYGrwUAAHRNfgWW2tpaud1uhYeHe42Hh4erpqam1W1qamparb9y5Ypqa2slSe+9954KCgq0efPmG+4lOztbYWFhniUyMtKfQwEAAJ1Iuy66dTgcXq8ty/IZ+7b6a+Pnz5/X448/rs2bN2vAgAE33MOyZctUX1/vWc6cOePHEQAAgM6kpz/FAwYMUEBAgM9syrlz53xmUa6JiIhotb5nz57q37+/PvroI506dUoPPPCAZ31LS8vV5nr21IkTJzRixAif/QYHBys4ONif9gEAQCfl1wxLUFCQnE6nSktLvcZLS0uVmJjY6jYJCQk+9Xv27FF8fLwCAwM1atQo/e1vf9PRo0c9y/Tp03XPPffo6NGjnOoBAAD+zbBIUmZmplJTUxUfH6+EhARt2rRJ1dXVSktLk3T1VM3Zs2e1detWSVJaWppeeeUVZWZm6sknn1R5ebkKCgq0bds2SVJISIji4uK83qNPnz6S5DMOAAC6J78DS0pKiurq6rRq1Sq5XC7FxcWppKREUVFRkiSXy+X1TJbo6GiVlJQoIyNDGzdu1JAhQ7RhwwbNnDnz1h0FAADo0hzWtStgO7mGhgaFhYWpvr5eoaGhdrfToYYv3Wl3C+hAp1683+4W0IH4fHcv3fHzfaM/v/kuIQAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADBeuwJLbm6uoqOjFRISIqfTqYMHD163/sCBA3I6nQoJCVFMTIzy8/O91m/evFlJSUnq27ev+vbtq8mTJ+vw4cPtaQ0AAHRBfgeWoqIipaenKysrS5WVlUpKStLUqVNVXV3dav3Jkyc1bdo0JSUlqbKyUsuXL9fChQtVXFzsqdm/f78eeeQRvfvuuyovL9ewYcOUnJyss2fPtv/IAABAl+GwLMvyZ4Px48dr3LhxysvL84zFxsZqxowZys7O9qlfsmSJ3nrrLVVVVXnG0tLSdOzYMZWXl7f6Hm63W3379tUrr7yiOXPm3FBfDQ0NCgsLU319vUJDQ/05pE5v+NKddreADnTqxfvtbgEdiM9399IdP983+vPbrxmWpqYmVVRUKDk52Ws8OTlZZWVlrW5TXl7uUz9lyhQdOXJEzc3NrW5z6dIlNTc3q1+/fm320tjYqIaGBq8FAAB0TX4FltraWrndboWHh3uNh4eHq6amptVtampqWq2/cuWKamtrW91m6dKlGjp0qCZPntxmL9nZ2QoLC/MskZGR/hwKAADoRNp10a3D4fB6bVmWz9i31bc2Lklr1qzRtm3btGPHDoWEhLS5z2XLlqm+vt6znDlzxp9DAAAAnUhPf4oHDBiggIAAn9mUc+fO+cyiXBMREdFqfc+ePdW/f3+v8XXr1mn16tXau3evxowZc91egoODFRwc7E/7AACgk/JrhiUoKEhOp1OlpaVe46WlpUpMTGx1m4SEBJ/6PXv2KD4+XoGBgZ6xtWvX6vnnn9euXbsUHx/vT1sAAKCL8/uUUGZmprZs2aLCwkJVVVUpIyND1dXVSktLk3T1VM3/vbMnLS1Np0+fVmZmpqqqqlRYWKiCggItXrzYU7NmzRqtWLFChYWFGj58uGpqalRTU6MLFy7cgkMEAACdnV+nhCQpJSVFdXV1WrVqlVwul+Li4lRSUqKoqChJksvl8nomS3R0tEpKSpSRkaGNGzdqyJAh2rBhg2bOnOmpyc3NVVNTk2bNmuX1Xs8884yeffbZdh4aAADoKvx+DoupeA4Luovu+JyG7ozPd/fSHT/f38lzWAAAAOxAYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAAADjEVgAAIDxCCwAAMB4BBYAAGA8AgsAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWAAAgPEILAAAwHgEFgAAYDwCCwAAMB6BBQAAGI/AAgAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAYj8ACAACMR2ABAADGa1dgyc3NVXR0tEJCQuR0OnXw4MHr1h84cEBOp1MhISGKiYlRfn6+T01xcbFGjx6t4OBgjR49Wn/+85/b0xoAAOiC/A4sRUVFSk9PV1ZWliorK5WUlKSpU6equrq61fqTJ09q2rRpSkpKUmVlpZYvX66FCxequLjYU1NeXq6UlBSlpqbq2LFjSk1N1ezZs/X++++3/8gAAECX4bAsy/Jng/Hjx2vcuHHKy8vzjMXGxmrGjBnKzs72qV+yZIneeustVVVVecbS0tJ07NgxlZeXS5JSUlLU0NCgd955x1Nz3333qW/fvtq2bdsN9dXQ0KCwsDDV19crNDTUn0Pq9IYv3Wl3C+hAp1683+4W0IH4fHcv3fHzfaM/v3v6s9OmpiZVVFRo6dKlXuPJyckqKytrdZvy8nIlJyd7jU2ZMkUFBQVqbm5WYGCgysvLlZGR4VOTk5PTZi+NjY1qbGz0vK6vr5d09cC7m5bGS3a3gA7UHf+Nd2d8vruX7vj5vnbM3zZ/4ldgqa2tldvtVnh4uNd4eHi4ampqWt2mpqam1forV66otrZWgwcPbrOmrX1KUnZ2tp577jmf8cjIyBs9HKBTCsuxuwMA35Xu/Pk+f/68wsLC2lzvV2C5xuFweL22LMtn7Nvqvznu7z6XLVumzMxMz+uWlhZ98cUX6t+//3W3Q9fQ0NCgyMhInTlzptudAgS6Oj7f3YtlWTp//ryGDBly3Tq/AsuAAQMUEBDgM/Nx7tw5nxmSayIiIlqt79mzp/r373/dmrb2KUnBwcEKDg72GuvTp8+NHgq6iNDQUP5DA7ooPt/dx/VmVq7x6y6hoKAgOZ1OlZaWeo2XlpYqMTGx1W0SEhJ86vfs2aP4+HgFBgZet6atfQIAgO7F71NCmZmZSk1NVXx8vBISErRp0yZVV1crLS1N0tVTNWfPntXWrVslXb0j6JVXXlFmZqaefPJJlZeXq6CgwOvun0WLFmnixIn69a9/rQcffFBvvvmm9u7dq0OHDt2iwwQAAJ2Z34ElJSVFdXV1WrVqlVwul+Li4lRSUqKoqChJksvl8nomS3R0tEpKSpSRkaGNGzdqyJAh2rBhg2bOnOmpSUxM1Pbt27VixQqtXLlSI0aMUFFRkcaPH38LDhFdUXBwsJ555hmf04IAOj8+32iN389hAQAA6Gh8lxAAADAegQUAABiPwAIAAIxHYAEAAMYjsAAAAOMRWNApnT59WsePH1dLS4vdrQAAOgCBBUb73e9+5/Ot3T//+c8VExOjO++8U3FxcTpz5ow9zQEAOgyBBUbLz8/3+o6JXbt26dVXX9XWrVv117/+VX369Gn1W7sBdC6XL1/W2rVrNW3aNMXHx2vcuHFeC9Cub2sGOsrHH3+s+Ph4z+s333xT06dP12OPPSZJWr16tZ544gm72gNwi8ydO1elpaWaNWuWfvSjH8nhcNjdEgxDYIHRvv76a69vay0rK9PcuXM9r2NiYny+6RtA57Nz506VlJRowoQJdrcCQ3FKCEaLiopSRUWFJKm2tlYfffSR7r77bs/6mpqaG/pacgBmGzp0qHr37m13GzAYgQVGmzNnjhYsWKDnn39eDz30kEaNGiWn0+lZX1ZWpri4OBs7BHArvPTSS1qyZIlOnz5tdyswFKeEYLQlS5bo0qVL2rFjhyIiIvTHP/7Ra/17772nRx55xKbuANwq8fHxunz5smJiYtSrVy8FBgZ6rf/iiy9s6gym4Nua0ak1NzfL5XJp2LBhdrcC4CZMnjxZ1dXVmjdvnsLDw30uuv3Zz35mU2cwBYEFndqxY8c0btw4ud1uu1sBcBN69eql8vJyjR071u5WYCiuYQEA2G7UqFH6+uuv7W4DBiOwAABs9+KLL+qXv/yl9u/fr7q6OjU0NHgtAKeE0KlxSgjoGnr0uPr78zevXbEsSw6Hg884uEsIZvvwww+vu/7EiRMd1AmA79K7775rdwswHDMsMFqPHj3kcDh0vX+m/PYFAF0fMyww2smTJ7+15ssvv+yATgB81w4ePKjf/va3+vTTT/XHP/5RQ4cO1euvv67o6GivJ1yje+KiWxgtKiqq1aVPnz7auXOnfvrTn3o9+RZA51RcXKwpU6botttu0wcffKDGxkZJ0vnz57V69Wqbu4MJCCzoVPbt26fHH39cgwcP1ssvv6ypU6fqyJEjdrcF4Cb96le/Un5+vjZv3uz1lNvExER98MEHNnYGU3BKCMb73//9X7322msqLCzUxYsXNXv2bDU3N6u4uFijR4+2uz0At8CJEyc0ceJEn/HQ0FB99dVXHd8QjMMMC4w2bdo0jR49WsePH9fLL7+szz77TC+//LLdbQG4xQYPHqxPPvnEZ/zQoUOKiYmxoSOYhhkWGG3Pnj1auHChnnrqKX3/+9+3ux0A35F/+Zd/0aJFi1RYWCiHw6HPPvtM5eXlWrx4sf793//d7vZgAAILjHbw4EEVFhYqPj5eo0aNUmpqqlJSUuxuC8At9m//9m+qr6/XPffco8uXL2vixIkKDg7W4sWL9Ytf/MLu9mAAnsOCTuHSpUvavn27CgsLdfjwYbndbq1fv15z585V79697W4PwC1y6dIlHT9+XC0tLRo9erTuuOMOu1uCIQgs6HROnDihgoICvf766/rqq69077336q233rK7LQA3Ye7cufrNb37j8wvIxYsX9fTTT6uwsNCmzmAKAgs6LbfbrbfffluFhYUEFqCTCwgIkMvl0qBBg7zGa2trFRERoStXrtjUGUzBNSzotAICAjRjxgzNmDHD7lYAtFNDQ4Msy5JlWTp//rxCQkI869xut0pKSnxCDLonAgsAwDZ9+vSRw+GQw+HQD37wA5/1DodDzz33nA2dwTScEgIA2ObAgQOyLEs//vGPVVxcrH79+nnWBQUFKSoqSkOGDLGxQ5iCwAIAsN3p06c1bNgwORwOu1uBoQgsAADbfPjhhzdUN2bMmO+4E5iOwAIAsE2PHj3kcDh0vR9FDodDbre7A7uCibjoFgBgm5MnT9rdAjoJZlgAAIDxmGEBABjh8uXL+vDDD3Xu3Dm1tLR4rZs+fbpNXcEUBBYAgO127dqlOXPmqLa21mcd17BAknrY3QAAAL/4xS/00EMPyeVyqaWlxWshrEDiGhYAgAFCQ0NVWVmpESNG2N0KDMUMCwDAdrNmzdL+/fvtbgMGY4YFAGC7S5cu6aGHHtLAgQN15513KjAw0Gv9woULbeoMpiCwAABst2XLFqWlpem2225T//79vR7R73A49Omnn9rYHUxAYAEA2C4iIkILFy7U0qVL1aMHVyvAF/8qAAC2a2pqUkpKCmEFbeJfBgDAdj/72c9UVFRkdxswGA+OAwDYzu12a82aNdq9e7fGjBnjc9Ht+vXrbeoMpuAaFgCA7e6555421zkcDu3bt68Du4GJCCwAAMB4XMMCADDGJ598ot27d+vrr7+WJPE7Na4hsAAAbFdXV6ef/OQn+sEPfqBp06bJ5XJJkubPn69f/vKXNncHExBYAAC2y8jIUGBgoKqrq9WrVy/PeEpKinbt2mVjZzAFdwkBAGy3Z88e7d69W9/73ve8xr///e/r9OnTNnUFkzDDAgCw3cWLF71mVq6pra1VcHCwDR3BNAQWAIDtJk6cqK1bt3peOxwOtbS0aO3atde95RndB7c1AwBsd/z4cU2aNElOp1P79u3T9OnT9dFHH+mLL77Qe++9pxEjRtjdImxGYAEAGKGmpkZ5eXmqqKhQS0uLxo0bpwULFmjw4MF2twYDEFgAALarrq5WZGSkHA5Hq+uGDRtmQ1cwCYEFAGC7gIAAuVwuDRo0yGu8rq5OgwYNktvttqkzmIKLbgEAtrMsq9XZlQsXLigkJMSGjmAansMCALBNZmampKt3Ba1cudLr1ma32633339fd911l03dwSQEFgCAbSorKyVdnWH529/+pqCgIM+6oKAgjR07VosXL7arPRiEa1gAALZ74okntGHDBvXu3dvuVmAoAgsAwDY//elPb6hux44d33EnMB2nhAAAtgkLC7O7BXQSzLAAAADjcVszAAAwHoEFAAAYj8ACAACMR2ABAADGI7AAuGn79++Xw+HQV199Zcx7DR8+XDk5Od95PwA6BoEFwA0rKytTQECA7rvvPtt6SExMlMvl8twO+9prr6lPnz629QOgYxBYANywwsJCPf300zp06JCqq6s7/P2bm5sVFBSkiIiIVr8oD0DXRWABcEMuXryoP/zhD3rqqaf0T//0T3rttdeuW79582ZFRkaqV69e+ud//metX7/eZyYkLy9PI0aMUFBQkEaOHKnXX3/da73D4VB+fr4efPBB3X777frVr37ldUpo//79euKJJ1RfXy+HwyGHw6Fnn33Ws/2lS5c0d+5c9e7dW8OGDdOmTZs8606dOiWHw6E//OEPSkpK0m233aZ/+Id/0Mcff6y//vWvio+P1x133KH77rtPn3/++c3+9QG4WRYA3ICCggIrPj7esizLevvtt63hw4dbLS0tlmVZ1rvvvmtJsr788kvLsizr0KFDVo8ePay1a9daJ06csDZu3Gj169fPCgsL8+xvx44dVmBgoLVx40brxIkT1ksvvWQFBARY+/bt89RIsgYNGmQVFBRY//M//2OdOnXK670aGxutnJwcKzQ01HK5XJbL5bLOnz9vWZZlRUVFWf369bM2btxo/fd//7eVnZ1t9ejRw6qqqrIsy7JOnjxpSbJGjRpl7dq1yzp+/Lj1j//4j9a4ceOsSZMmWYcOHbI++OAD6+/+7u+stLS0DvgbBnA9BBYANyQxMdHKycmxLMuympubrQEDBlilpaWWZfkGlpSUFOv+++/32v6xxx7zCiyJiYnWk08+6VXz0EMPWdOmTfO8lmSlp6d71XzzvV599VWv/V4TFRVlPf74457XLS0t1qBBg6y8vDzLsv5/YNmyZYunZtu2bZYk6y9/+YtnLDs72xo5cuT1/moAdABOCQH4VidOnNDhw4f18MMPS5J69uyplJQUFRYWtln/ox/9yGvsm6+rqqo0YcIEr7EJEyaoqqrKayw+Pr7dfY8ZM8bzZ4fDoYiICJ07d67NmvDwcEnSnXfe6TX2zW0AdDy+/BDAtyooKNCVK1c0dOhQz5hlWQoMDNSXX37pU29Zls9FsVYrX1vWWs03x26//fZ29x0YGOjzfi0tLW3WXHvvb459cxsAHY8ZFgDXdeXKFW3dulUvvfSSjh496lmOHTumqKgo/f73v/fZZtSoUTp8+LDX2JEjR7xex8bG6tChQ15jZWVlio2N9au/oKAgud1uv7YB0PkwwwLguv7zP/9TX375pebNm+d59sk1s2bNUkFBgf7jP/7Da/zpp5/WxIkTtX79ej3wwAPat2+f3nnnHa/Zk3/913/V7NmzNW7cOP3kJz/R22+/rR07dmjv3r1+9Td8+HBduHBBf/nLXzR27Fj16tVLvXr1av8BAzASMywArqugoECTJ0/2CSuSNHPmTB09elQffPCB1/iECROUn5+v9evXa+zYsdq1a5cyMjIUEhLiqZkxY4Z+85vfaO3atfr7v/97/fa3v9Wrr76qSZMm+dVfYmKi0tLSlJKSooEDB2rNmjXtOk4AZnNYrZ1YBoBb7Mknn9R//dd/6eDBg3a3AqAT4pQQgO/EunXrdO+99+r222/XO++8o9/97nfKzc21uy0AnRQzLAC+E7Nnz9b+/ft1/vx5xcTE6Omnn1ZaWprdbQHopAgsAADAeFx0CwAAjEdgAQAAxiOwAAAA4xFYAACA8QgsAADAeAQWAABgPAILAAAwHoEFAAAY7/8BsXXWBXROp/wAAAAASUVORK5CYII=",
       "text/plain": [
        "<Figure size 640x480 with 1 Axes>"
       ]
@@ -534,13 +533,6 @@
    "source": [
     "results.groupby('Algorithm').ndcg.mean().plot.bar()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/docs/datasets.rst b/docs/datasets.rst
deleted file mode 100644
index c851faa49..000000000
--- a/docs/datasets.rst
+++ /dev/null
@@ -1,79 +0,0 @@
-Loading Data
-============
-
-LensKit can work with any data in a :py:class:`pandas.DataFrame` with the expected
-columns.  LensKit algorithms expect a ``ratings`` frame to contain the following
-columns (in any order):
-
-* ``user``, containing user identifiers.  No requirements are placed on user IDs —
-  if an algorithm requires something specific, such as contiguous 0-based identifiers
-  for indexing into an array — it will use a :py:class:`pandas.Index` to map them.
-* ``item``, containing item identifiers. The same comments apply as for ``user``.
-* ``rating``, containing user ratings (if available).  Implicit-feedback code will
-  not require ratings.
-
-‘Rating’ data can contain other columns as well, and is a catch-all for any user-item
-interaction data.  Algorithms will document any non-standard columns they can make
-use of.
-
-:py:meth:`lenskit.algorithms.Recommender.fit` can also accept additional data objects
-as keyword arguments, and algorithms that wrap other algorithms will pass this data
-through unchanged.  Algorithms ignore extra data objects they receive.  This allows
-you to build algorithms that train on data besides user-item interactions, such as
-user metadata or item content.
-
-Data Loaders
-------------
-
-.. module:: lenskit.datasets
-
-The :py:mod:`lenskit.datasets` module provides utilities for reading a variety
-of commonly-used LensKit data sets.  It does not package or automatically
-download them, but loads them from a local directory where you have unpacked
-the data set.  Each data set class or function takes a ``path`` parameter
-specifying the location of the data set.
-
-The normal mode of operation for these utilities is to provide a class for the
-data set; this class then exposes the data set's data as attributes.  These
-attributes are cached internally, so e.g. accessing :py:attr:`MovieLens.ratings`
-twice will only load the data file once.
-
-These data files have normalized column names to fit with LensKit's general
-conventions.  These are the following:
-
-- User ID columns are called ``user``.
-- Item ID columns are called ``item``.
-- Rating columns are called ``rating``.
-- Timestamp columns are called ``timestamp``.
-
-Other column names are unchanged.  Data tables that provide information about
-specific things, such as a table of movie titles, are indexed by the relevant
-ID (e.g. :py:attr:`MovieLens.ratings` is indexed by ``item``).
-
-Data sets supported:
-
-* :class:`MovieLens`
-* :class:`ML100K`
-* :class:`ML1M`
-* :class:`ML10M`
-
-MovieLens Data Sets
--------------------
-
-The GroupLens research group provides several data sets extracted from the
-MovieLens service :cite:p:`movielens`.
-These can be downloaded from https://grouplens.org/datasets/movielens/.
-
-.. autoclass:: MovieLens
-    :members:
-
-.. autoclass:: ML100K
-    :members:
-
-.. autoclass:: ML1M
-    :inherited-members:
-    :members:
-
-.. autoclass:: ML10M
-    :inherited-members:
-    :members:
diff --git a/docs/index.rst b/docs/index.rst
index 8bf792201..cc85cb3ce 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,7 +39,6 @@ Resources
     :caption: Running Experiments
 
     data
-    datasets
     crossfold
     batch
     evaluation/index
diff --git a/docs/releases/2024.rst b/docs/releases/2024.rst
index f3ec639b2..c99c8626b 100644
--- a/docs/releases/2024.rst
+++ b/docs/releases/2024.rst
@@ -31,6 +31,14 @@ Significant Changes
     * :py:class:`~lenskit.algorithms.als.ImplicitMF`
     * :py:class:`~lenskit.algorithms.als.BiasedMF`
 
+*   :class:`~lenskit.data.Dataset`.  LensKit now provides an abstraction for
+    training data instead of working with Pandas data frames directly, that
+    allows components to reduce code duplication and recomputation, access data
+    in multiple formats (Pandas, NumPy, and PyTorch), and provided standardized
+    structures like mappings of user or item IDs to array indices.  This also
+    supersedes the old bespoke dataset loading support, with functions like
+    :func:`~lenskit.data.load_movielens` to load standard datasets.
+
 *   Many LensKit components (batch running, model training, etc.) now report progress with
     :py:mod:`progress_api`, and can be connected to TQDM or Enlighten.
 
diff --git a/lenskit-funksvd/tests/test_funksvd.py b/lenskit-funksvd/tests/test_funksvd.py
index 0c80dfd59..bfa87121d 100644
--- a/lenskit-funksvd/tests/test_funksvd.py
+++ b/lenskit-funksvd/tests/test_funksvd.py
@@ -13,7 +13,7 @@
 
 from pytest import approx, mark
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
 import lenskit.funksvd as svd
 import lenskit.util.test as lktu
 
@@ -139,16 +139,16 @@ def test_fsvd_predict_bad_user():
 
 @lktu.wantjit
 @mark.slow
-def test_fsvd_save_load():
-    ratings = lktu.ml_test.ratings
-
+def test_fsvd_save_load(ml_ds: Dataset):
     original = svd.FunkSVD(20, iterations=20)
-    original.fit(from_interactions_df(ratings))
+    original.fit(ml_ds)
 
     assert original.bias is not None
-    assert original.bias.mean_ == approx(ratings.rating.mean())
-    assert original.item_features_.shape == (ratings.item.nunique(), 20)
-    assert original.user_features_.shape == (ratings.user.nunique(), 20)
+    assert original.bias.mean_ == approx(
+        ml_ds.interaction_matrix("scipy", field="rating").data.mean()
+    )
+    assert original.item_features_.shape == (ml_ds.item_count, 20)
+    assert original.user_features_.shape == (ml_ds.user_count, 20)
 
     mod = pickle.dumps(original)
     _log.info("serialized to %d bytes", len(mod))
@@ -165,8 +165,8 @@ def test_fsvd_save_load():
 
 @lktu.wantjit
 @mark.slow
-def test_fsvd_train_binary():
-    ratings = lktu.ml_test.ratings.drop(columns=["rating", "timestamp"])
+def test_fsvd_train_binary(ml_ratings: pd.DataFrame):
+    ratings = ml_ratings.drop(columns=["rating", "timestamp"])
 
     original = svd.FunkSVD(20, iterations=20, bias=False)
     original.fit(from_interactions_df(ratings))
@@ -178,10 +178,10 @@ def test_fsvd_train_binary():
 
 @lktu.wantjit
 @mark.slow
-def test_fsvd_known_preds():
+def test_fsvd_known_preds(ml_ds: Dataset):
     algo = svd.FunkSVD(15, iterations=125, lrate=0.001)
     _log.info("training %s on ml data", algo)
-    algo.fit(from_interactions_df(lktu.ml_test.ratings))
+    algo.fit(ml_ds)
 
     dir = Path(__file__).parent
     pred_file = dir / "funksvd-preds.csv"
@@ -207,15 +207,12 @@ def test_fsvd_known_preds():
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_fsvd_batch_accuracy():
+def test_fsvd_batch_accuracy(ml_100k: pd.DataFrame):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
     from lenskit import batch
     from lenskit.algorithms import basic, bias
 
-    ratings = lktu.ml100k.ratings
-
     svd_algo = svd.FunkSVD(25, 125, damping=10)
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
@@ -225,7 +222,7 @@ def eval(train, test):
         _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
-    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
+    folds = xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2))
     preds = pd.concat(eval(train, test) for (train, test) in folds)
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
diff --git a/lenskit-hpf/tests/test_hpf.py b/lenskit-hpf/tests/test_hpf.py
index aa615f647..26d576804 100644
--- a/lenskit-hpf/tests/test_hpf.py
+++ b/lenskit-hpf/tests/test_hpf.py
@@ -20,10 +20,9 @@
 
 
 @mark.slow
-def test_hpf_train_large(tmp_path):
+def test_hpf_train_large(tmp_path, ml_ratings):
     algo = hpf.HPF(20)
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.assign(rating=ratings.rating + 0.5)
+    ratings = ml_ratings.assign(rating=ml_ratings.rating + 0.5)
     ds = from_interactions_df(ratings)
     algo.fit(ds)
 
@@ -51,9 +50,9 @@ def test_hpf_train_large(tmp_path):
 
 
 @mark.slow
-def test_hpf_train_binary(tmp_path):
+def test_hpf_train_binary(tmp_path, ml_ratings):
     algo = hpf.HPF(20)
-    ratings = lktu.ml_test.ratings.drop(columns=["timestamp", "rating"])
+    ratings = ml_ratings.drop(columns=["timestamp", "rating"])
     ds = from_interactions_df(ratings)
     algo.fit(ds)
 
diff --git a/lenskit-implicit/tests/test_implicit.py b/lenskit-implicit/tests/test_implicit.py
index 0a09adf09..15d74017f 100644
--- a/lenskit-implicit/tests/test_implicit.py
+++ b/lenskit-implicit/tests/test_implicit.py
@@ -19,13 +19,11 @@
 
 
 @mark.slow
-def test_implicit_als_train_rec():
+def test_implicit_als_train_rec(ml_ds):
     algo = ALS(25)
     assert algo.factors == 25
-    ratings = lktu.ml_test.ratings
-    ds = from_interactions_df(ratings)
 
-    ret = algo.fit(ds)
+    ret = algo.fit(ml_ds)
     assert ret is algo
 
     recs = algo.recommend(100, n=20)
@@ -46,14 +44,11 @@ def test_implicit_als_train_rec():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K not downloaded")
 @mark.parametrize("n_jobs", [1, None])
-def test_implicit_als_batch_accuracy(n_jobs):
+def test_implicit_als_batch_accuracy(ml_100k, n_jobs):
     import lenskit.crossfold as xf
     from lenskit import batch, topn
 
-    ratings = lktu.ml100k.ratings
-
     algo_t = ALS(25)
 
     def eval(train, test):
@@ -66,7 +61,7 @@ def eval(train, test):
         recs = batch.recommend(algo, users, 100, n_jobs=n_jobs)
         return recs
 
-    folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    folds = list(xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2)))
     test = pd.concat(f.test for f in folds)
 
     recs = pd.concat(eval(train, test) for (train, test) in folds)
@@ -81,12 +76,11 @@ def eval(train, test):
 
 
 @mark.slow
-def test_implicit_bpr_train_rec():
+def test_implicit_bpr_train_rec(ml_ds):
     algo = BPR(25, use_gpu=False)
     assert algo.factors == 25
-    ratings = lktu.ml_test.ratings
 
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
 
     recs = algo.recommend(100, n=20)
     assert len(recs) == 20
diff --git a/lenskit/lenskit/algorithms/basic.py b/lenskit/lenskit/algorithms/basic.py
index 4d7bfa0c0..023a7cf36 100644
--- a/lenskit/lenskit/algorithms/basic.py
+++ b/lenskit/lenskit/algorithms/basic.py
@@ -10,6 +10,7 @@
 
 import logging
 from collections.abc import Iterable, Sequence
+from typing import overload
 
 import numpy as np
 import pandas as pd
@@ -112,7 +113,13 @@ class Fallback(Predictor):
     missing values, and so forth.
     """
 
-    def __init__(self, algorithms, *others):
+    algorithms: list[Predictor]
+
+    @overload
+    def __init__(self, algorithms: Iterable[Predictor]): ...
+    @overload
+    def __init__(self, algorithms: Predictor, *others: Predictor): ...
+    def __init__(self, algorithms: Predictor | Iterable[Predictor], *others):
         """
         Args:
             algorithms: a list of component algorithms.  Each one will be trained.
@@ -120,12 +127,11 @@ def __init__(self, algorithms, *others):
                 additional algorithms, in which case ``algorithms`` is taken to be
                 a single algorithm.
         """
-        if others:
-            self.algorithms = [algorithms] + list(others)
-        elif isinstance(algorithms, Iterable) or isinstance(algorithms, Sequence):
-            self.algorithms = algorithms
+        if isinstance(algorithms, Iterable) or isinstance(algorithms, Sequence):
+            assert not others
+            self.algorithms = list(algorithms)
         else:
-            self.algorithms = [algorithms]
+            self.algorithms = [algorithms] + list(others)
 
     @override
     def fit(self, data: Dataset, **kwargs):
@@ -172,7 +178,7 @@ def fit(self, data: Dataset, **kwarsg):
 
     @override
     def candidates(self, user, ratings=None):
-        return np.array([], dtype=self.dtype_)
+        return np.array([], dtype=self.dtype_)  # type: ignore
 
 
 class UnratedItemCandidateSelector(CandidateSelector):
diff --git a/lenskit/lenskit/algorithms/bias.py b/lenskit/lenskit/algorithms/bias.py
index 9ffd3f90a..0e934922b 100644
--- a/lenskit/lenskit/algorithms/bias.py
+++ b/lenskit/lenskit/algorithms/bias.py
@@ -92,7 +92,7 @@ def fit(self, data: Dataset, **kwargs):
         """
         _logger.info("building bias model for %d ratings", data.interaction_count)
         ratings = data.interaction_matrix("scipy", layout="coo", field="rating")
-        nrows, ncols = ratings.shape
+        nrows, ncols = ratings.shape  # type: ignore
 
         self.mean_ = float(np.mean(ratings.data))
         _logger.info("global mean: %.3f", self.mean_)
@@ -242,7 +242,7 @@ def inverse_transform_user(self, user, ratings, user_bias=None):
 
     def fit_transform(self, data: Dataset, **kwargs) -> pd.DataFrame:
         """
-        Fit with ratings and return the training data transformed.
+        Fit with ratings and return the training data matrix transformed.
         """
         # FIXME: make this more efficient, don't rename things.
         self.fit(data)
diff --git a/lenskit/lenskit/batch/_predict.py b/lenskit/lenskit/batch/_predict.py
index 5fe352f90..4cc64df1c 100644
--- a/lenskit/lenskit/batch/_predict.py
+++ b/lenskit/lenskit/batch/_predict.py
@@ -42,9 +42,9 @@ def predict(algo, pairs, *, n_jobs=None, **kwargs):
 
         >>> from lenskit.algorithms.bias import Bias
         >>> from lenskit.metrics.predict import rmse
-        >>> from lenskit import datasets
         >>> from lenskit.data import from_interactions_df
-        >>> ratings = datasets.MovieLens('data/ml-latest-small').ratings
+        >>> from lenskit.data.movielens import load_movielens_df
+        >>> ratings = load_movielens_df('data/ml-latest-small')
         >>> bias = Bias()
         >>> bias.fit(from_interactions_df(ratings[:-1000]))
         <lenskit.algorithms.bias.Bias object at ...>
diff --git a/lenskit/lenskit/crossfold.py b/lenskit/lenskit/crossfold.py
index e13c0e3dd..817d8477d 100644
--- a/lenskit/lenskit/crossfold.py
+++ b/lenskit/lenskit/crossfold.py
@@ -67,8 +67,8 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
 
     We can loop over a sequence of train-test pairs::
 
-        >>> from lenskit import datasets
-        >>> ratings = datasets.MovieLens('data/ml-latest-small').ratings
+        >>> from lenskit.data.movielens import load_movielens_df
+        >>> ratings = load_movielens_df('data/ml-latest-small')
         >>> for train, test in sample_rows(ratings, 5, 1000):
         ...     print(len(test))
         1000
diff --git a/lenskit/lenskit/data/fetch.py b/lenskit/lenskit/data/fetch.py
index 2bbed6b17..531f7abc9 100644
--- a/lenskit/lenskit/data/fetch.py
+++ b/lenskit/lenskit/data/fetch.py
@@ -9,20 +9,10 @@
 import sys
 from pathlib import Path
 from urllib.request import urlopen
-from zipfile import ZipFile
 
 _log = logging.getLogger("lenskit.data.fetch")
 
 ML_LOC = "http://files.grouplens.org/datasets/movielens/"
-ML_DATASETS = {
-    "ml-100k": "ml-100k/u.data",
-    "ml-1m": "ml-1m/ratings.dat",
-    "ml-10m": "ml-10M100K/ratings.dat",
-    "ml-20m": "ml-20m/ratings.csv",
-    "ml-25m": "ml-25m/ratings.csv",
-    "ml-latest": "ml-latest/ratings.csv",
-    "ml-latest-small": "ml-latest-small/ratings.csv",
-}
 
 
 def fetch_ml(name: str, base_dir: Path):
@@ -41,15 +31,14 @@ def fetch_ml(name: str, base_dir: Path):
         name:
             The name of the dataset.
         base_dir:
-            The base directory into which data should be extracted.
+            The base directory into which data should be downloaded.
     """
     zipname = f"{name}.zip"
     zipfile = base_dir / zipname
     zipurl = ML_LOC + zipname
 
-    test_file = base_dir / ML_DATASETS[name]
-    if test_file.exists():
-        _log.info("%s already exists", test_file)
+    if zipfile.exists():
+        _log.info("%s already exists", zipfile)
         return
 
     _log.info("downloading data set %s", name)
@@ -61,10 +50,6 @@ def fetch_ml(name: str, base_dir: Path):
             zf.write(block)
             block = res.read(8 * 1024 * 1024)
 
-    _log.info("unpacking data set")
-    with ZipFile(zipfile, "r") as zf:
-        zf.extractall(base_dir)
-
 
 def _fetch_main():
     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
diff --git a/lenskit/lenskit/data/movielens.py b/lenskit/lenskit/data/movielens.py
index f62a0c065..e5f13861c 100644
--- a/lenskit/lenskit/data/movielens.py
+++ b/lenskit/lenskit/data/movielens.py
@@ -10,6 +10,8 @@
 
 import logging
 import re
+from dataclasses import dataclass
+from enum import Enum
 from pathlib import Path
 from typing import TypeAlias
 from zipfile import ZipFile
@@ -24,6 +26,37 @@
 LOC: TypeAlias = Path | tuple[ZipFile, str]
 
 
+class MLVersion(Enum):
+    ML_100K = "ml-100k"
+    ML_1M = "ml-1m"
+    ML_10M = "ml-10m"
+    ML_20M = "ml-20m"
+    ML_25M = "ml-25m"
+    ML_LATEST_SMALL = "ml-latest-small"
+    ML_LATEST = "ml-latest"
+    ML_MODERN = "ml-modern"
+
+
+@dataclass
+class MLData:
+    version: MLVersion
+    source: Path | ZipFile
+    prefix: str = ""
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        if isinstance(self.source, ZipFile):
+            self.source.close()
+
+    def open_file(self, name: str):
+        if isinstance(self.source, Path):
+            return open(self.source / (self.prefix + name), "r")
+        else:
+            return self.source.open(self.prefix + name)
+
+
 def load_movielens(path: str | Path) -> Dataset:
     """
     Load a MovieLens dataset.  The appropriate MovieLens format is detected
@@ -37,10 +70,42 @@ def load_movielens(path: str | Path) -> Dataset:
     Returns:
         The dataset.
     """
+    df = load_movielens_df(path)
+    return from_interactions_df(df)
+
+
+def load_movielens_df(path: str | Path) -> pd.DataFrame:
+    """
+    Load the ratings from a MovieLens dataset as a raw data frame.  The
+    appropriate MovieLens format is detected based on the file contents.
+
+    Args:
+        path:
+            The path to the dataset, either as an unpacked directory or a zip
+            file.
+
+    Returns:
+        The ratings, with columns ``user``, ``item``, ``rating``, and
+        ``timestamp``.
+    """
+    with _ml_detect_and_open(path) as ml:
+        match ml.version:
+            case MLVersion.ML_100K:
+                return _load_ml_100k(ml)
+            case MLVersion.ML_1M | MLVersion.ML_10M:
+                return _load_ml_million(ml)
+            case _:
+                return _load_ml_modern(ml)
+
+
+def _ml_detect_and_open(path: str | Path) -> MLData:
     loc = Path(path)
+    ds: MLVersion
+
     if loc.is_file() and loc.suffix == ".zip":
         _log.debug("opening zip file at %s", loc)
-        with ZipFile(loc, "r") as zf:
+        zf = ZipFile(loc, "r")
+        try:
             infos = zf.infolist()
             first = infos[0]
             if not first.is_dir:
@@ -53,88 +118,76 @@ def load_movielens(path: str | Path) -> Dataset:
                 _log.error("%s: invalid directory name %s", loc, first.filename)
                 raise RuntimeError("invalid ML zip file")
 
-            ds = dsm.group(1).lower()
+            ds = MLVersion(dsm.group(1).lower())
             _log.debug("%s: found ML data set %s", loc, ds)
-            return _load_for_type((zf, first.filename), ds)
+            return MLData(ds, zf, first.filename)
+        except Exception as e:  # pragma nocover
+            zf.close()
+            raise e
     else:
         _log.debug("loading from directory %s", loc)
         dsm = re.match(r"^(ml-\d+[MmKk])", loc.name)
         if dsm:
-            ds = dsm.group(1)
+            ds = MLVersion(dsm.group(1))
             _log.debug("%s: inferred data set %s from dir name", loc, ds)
         else:
             _log.debug("%s: checking contents for data type", loc)
             if (loc / "u.data").exists():
                 _log.debug("%s: found u.data, interpreting as 100K")
-                ds = "ml-100k"
+                ds = MLVersion.ML_100K
             elif (loc / "ratings.dat").exists():
                 if (loc / "tags.dat").exists():
                     _log.debug("%s: found ratings.dat and tags.dat, interpreting as 10M", loc)
-                    ds = "ml-10m"
+                    ds = MLVersion.ML_10M
                 else:
                     _log.debug("%s: found ratings.dat but no tags, interpreting as 1M", loc)
-                    ds = "ml-1m"
+                    ds = MLVersion.ML_1M
             elif (loc / "ratings.csv").exists():
                 _log.debug("%s: found ratings.csv, interpreting as modern (20M and later)", loc)
-                ds = "ml-modern"
+                ds = MLVersion.ML_MODERN
             else:
                 _log.error("%s: could not detect MovieLens data", loc)
                 raise RuntimeError("invalid ML directory")
 
-        return _load_for_type(loc, ds)
-
-
-def _load_for_type(loc: LOC, ds: str) -> Dataset:
-    "Load the specified MovieLens data set"
-    match ds:
-        case "ml-100k":
-            return _load_ml_100k(loc)
-        case "ml-1m" | "ml-10m":
-            return _load_ml_million(loc)
-        case _:
-            return _load_ml_modern(loc)
+        return MLData(ds, loc)
 
 
-def _load_ml_100k(loc: LOC) -> Dataset:
-    with _open_file(loc, "u.data") as data:
-        rates_df = pd.read_csv(
+def _load_ml_100k(ml: MLData) -> pd.DataFrame:
+    with ml.open_file("u.data") as data:
+        return pd.read_csv(
             data,
             sep="\t",
             header=None,
-            names=["user_id", "item_id", "rating", "timestamp"],
+            names=["user", "item", "rating", "timestamp"],
             dtype={
-                "user_id": np.int32,
-                "item_id": np.int32,
+                "user": np.int32,
+                "item": np.int32,
                 "rating": np.float32,
                 "timestamp": np.int32,
             },
         )
 
-    return from_interactions_df(rates_df)
 
-
-def _load_ml_million(loc: LOC) -> Dataset:
-    with _open_file(loc, "ratings.dat") as data:
-        rates_df = pd.read_csv(
+def _load_ml_million(ml: MLData) -> pd.DataFrame:
+    with ml.open_file("ratings.dat") as data:
+        return pd.read_csv(
             data,
             sep=":",
             header=None,
-            names=["user_id", "_ui", "item_id", "_ir", "rating", "_rt", "timestamp"],
+            names=["user", "_ui", "item", "_ir", "rating", "_rt", "timestamp"],
             usecols=[0, 2, 4, 6],
             dtype={
-                "user_id": np.int32,
-                "item_id": np.int32,
+                "user": np.int32,
+                "item": np.int32,
                 "rating": np.float32,
                 "timestamp": np.int32,
             },
         )
 
-    return from_interactions_df(rates_df)
-
 
-def _load_ml_modern(loc: LOC) -> Dataset:
-    with _open_file(loc, "ratings.csv") as data:
-        rates_df = pd.read_csv(
+def _load_ml_modern(ml: MLData) -> pd.DataFrame:
+    with ml.open_file("ratings.csv") as data:
+        return pd.read_csv(
             data,
             dtype={
                 "userId": np.int32,
@@ -142,14 +195,4 @@ def _load_ml_modern(loc: LOC) -> Dataset:
                 "rating": np.float32,
                 "timestamp": np.int64,
             },
-        )
-
-    return from_interactions_df(rates_df, item_col="movieId")
-
-
-def _open_file(loc: LOC, name: str):
-    if isinstance(loc, Path):
-        return open(loc / name, "r")
-    else:
-        zf, root = loc
-        return zf.open(root + name)
+        ).rename(columns={"userId": "user", "movieId": "item"})
diff --git a/lenskit/lenskit/datasets/__init__.py b/lenskit/lenskit/datasets/__init__.py
deleted file mode 100644
index 8171dfcf8..000000000
--- a/lenskit/lenskit/datasets/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# This file is part of LensKit.
-# Copyright (C) 2018-2023 Boise State University
-# Copyright (C) 2023-2024 Drexel University
-# Licensed under the MIT license, see LICENSE.md for details.
-# SPDX-License-Identifier: MIT
-
-from .movielens import *  # noqa: F403
diff --git a/lenskit/lenskit/datasets/movielens.py b/lenskit/lenskit/datasets/movielens.py
deleted file mode 100644
index 4155cd5f2..000000000
--- a/lenskit/lenskit/datasets/movielens.py
+++ /dev/null
@@ -1,444 +0,0 @@
-# This file is part of LensKit.
-# Copyright (C) 2018-2023 Boise State University
-# Copyright (C) 2023-2024 Drexel University
-# Licensed under the MIT license, see LICENSE.md for details.
-# SPDX-License-Identifier: MIT
-
-"""
-Code to import commonly-used RecSys data sets into LensKit-compatible data frames.
-"""
-
-import logging
-import os.path
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-
-from lenskit.util import cached
-
-_log = logging.getLogger(__name__)
-
-__doctest_skip__ = []
-if not os.path.exists("data/ml-100k"):
-    __doctest_skip__.append("ML100K.*")
-if not os.path.exists("data/ml-20m"):
-    __doctest_skip__.append("MovieLens.tag_genome")
-if not os.path.exists("data/ml-1m.*"):
-    __doctest_skip__.append("ML1M.*")
-if not os.path.exists("data/ml-10M100K"):
-    __doctest_skip__.append("ML10M.*")
-    __doctest_skip__.append("MLM.*")
-
-__all__ = ["MovieLens", "ML100K", "ML1M", "ML10M"]
-
-
-class MovieLens:
-    """
-    Code for reading current MovieLens data sets, including ML-20M, ML-Latest, and
-    ML-Latest-Small.
-
-    Parameters:
-        path(str or pathlib.Path): Path to the directory containing the data set.
-    """
-
-    def __init__(self, path="data/ml-20m"):
-        self.path = Path(path)
-
-    @cached
-    def ratings(self):
-        """
-        The rating table.
-
-        >>> mlsmall = MovieLens('data/ml-latest-small')
-        >>> mlsmall.ratings
-                user  item  rating   timestamp
-        0          1    31     2.5  1260759144
-        1          1  1029     3.0  1260759179
-        2          1  1061     3.0  1260759182
-        3          1  1129     2.0  1260759185
-        4          1  1172     4.0  1260759205
-        ...
-        [100004 rows x 4 columns]
-        """
-
-        fn = self.path / "ratings.csv"
-        ratings = pd.read_csv(
-            fn,
-            dtype={
-                "movieId": np.int32,
-                "userId": np.int32,
-                "rating": np.float64,
-                "timestamp": np.int32,
-            },
-        )
-        ratings.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
-        _log.debug("loaded %s, takes %d bytes", fn, ratings.memory_usage().sum())
-        return ratings
-
-    @cached
-    def movies(self):
-        """
-        The movie table, with titles and genres.  It is indexed by movie ID.
-
-        >>> mlsmall = MovieLens('data/ml-latest-small')
-        >>> mlsmall.movies
-                                                            title                                           genres
-        item
-        1                                        Toy Story (1995)      Adventure|Animation|Children|Comedy|Fantasy
-        2                                          Jumanji (1995)                       Adventure|Children|Fantasy
-        3                                 Grumpier Old Men (1995)                                   Comedy|Romance
-        4                                Waiting to Exhale (1995)                             Comedy|Drama|Romance
-        5                      Father of the Bride Part II (1995)                                           Comedy
-        ...
-        [9125 rows x 2 columns]
-        """  # noqa: E501
-
-        fn = self.path / "movies.csv"
-        movies = pd.read_csv(
-            fn,
-            dtype={
-                "movieId": np.int32,
-                "title": object,
-                "genres": object,
-            },
-        )
-        movies.rename(columns={"movieId": "item"}, inplace=True)
-        movies.set_index("item", inplace=True)
-        _log.debug("loaded %s, takes %d bytes", fn, movies.memory_usage().sum())
-        return movies
-
-    @cached
-    def links(self):
-        """
-        The movie link table, connecting movie IDs to external identifiers.  It is indexed
-        by movie ID.
-
-        >>> mlsmall = MovieLens('data/ml-latest-small')
-        >>> mlsmall.links
-                 imdbId  tmdbId
-        item
-        1        114709     862
-        2        113497    8844
-        3        113228   15602
-        4        114885   31357
-        5        113041   11862
-        ...
-        [9125 rows x 2 columns]
-        """
-
-        fn = self.path / "links.csv"
-        links = pd.read_csv(
-            fn, dtype={"movieId": np.int32, "imdbId": np.int64, "tmdbId": pd.Int64Dtype()}
-        )
-        links.rename(columns={"movieId": "item"}, inplace=True)
-        links.set_index("item", inplace=True)
-        _log.debug("loaded %s, takes %d bytes", fn, links.memory_usage().sum())
-        return links
-
-    @cached
-    def tags(self):
-        """
-        The tag application table, recording user-supplied tags for movies.
-
-
-        >>> mlsmall = MovieLens('data/ml-latest-small')
-        >>> mlsmall.tags
-              user  ...   timestamp
-        0       15  ...  1138537770
-        1       15  ...  1193435061
-        2       15  ...  1170560997
-        3       15  ...  1170626366
-        4       15  ...  1141391765
-        ...
-        [1296 rows x 4 columns]
-        """
-
-        fn = self.path / "tags.csv"
-        tags = pd.read_csv(
-            fn,
-            dtype={
-                "movieId": np.int32,
-                "userId": np.int32,
-                "tag": object,
-                "timestamp": np.int32,
-            },
-        )
-        tags.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
-        _log.debug("loaded %s, takes %d bytes", fn, tags.memory_usage().sum())
-        return tags
-
-    @cached
-    def tag_genome(self):
-        """
-        The tag genome table, recording inferred item-tag relevance scores.  This gets returned
-        as a wide Pandas data frame, with rows indexed by item ID.
-
-        >>> ml20m = MovieLens('data/ml-20m')
-        >>> ml20m.tag_genome
-        tag         007  007 (series)  18th century  ...     wwii   zombie  zombies
-        item                                         ...
-        1       0.02500       0.02500       0.05775  ...  0.03625  0.07775  0.02300
-        2       0.03975       0.04375       0.03775  ...  0.01475  0.09025  0.01875
-        3       0.04350       0.05475       0.02800  ...  0.01950  0.09700  0.01850
-        4       0.03725       0.03950       0.03675  ...  0.01525  0.06450  0.01300
-        5       0.04200       0.05275       0.05925  ...  0.01675  0.10750  0.01825
-        ...
-        [10381 rows x 1128 columns]
-        """
-
-        fn = self.path / "genome-scores.csv"
-        tags = pd.read_csv(self.path / "genome-tags.csv")
-        tags = tags.set_index("tagId")
-        tags = tags["tag"].astype("category")
-        genome = pd.read_csv(
-            fn,
-            dtype={
-                "movieId": np.int32,
-                "tagId": np.int32,
-                "relevance": np.float64,
-            },
-        )
-        genome.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
-        genome = genome.join(tags, on="tagId")
-        genome = genome.pivot(index="item", columns="tag", values="relevance")
-        _log.debug("loaded %s, takes %d bytes", fn, genome.memory_usage().sum())
-        return genome
-
-
-class ML100K:
-    """
-    The MovieLens 100K data set.  This older data set is in a different format from
-    the more current data sets loaded by :class:`MovieLens`.
-    """
-
-    def __init__(self, path="data/ml-100k"):
-        self.path = Path(path)
-
-    @property
-    def available(self):
-        "Query whether the data set exists."
-        return (self.path / "u.data").exists()
-
-    @cached
-    def ratings(self):
-        """
-        Return the rating data (from ``u.data``).
-
-        >>> ml = ML100K()
-        >>> ml.ratings
-               user  item  rating  timestamp
-        0       196   242     3.0  881250949
-        1       186   302     3.0  891717742
-        2        22   377     1.0  878887116
-        3       244    51     2.0  880606923
-        4       166   346     1.0  886397596
-        ...
-        [100000 rows x 4 columns]
-        """
-        fn = self.path / "u.data"
-        ratings = pd.read_csv(
-            fn,
-            sep="\t",
-            header=None,
-            names=["user", "item", "rating", "timestamp"],
-            dtype={"user": np.int32, "item": np.int32, "rating": np.float32, "timestamp": np.int32},
-        )
-        _log.debug("loaded %s", fn)
-        return ratings
-
-    @cached
-    def users(self):
-        """
-        Return the user data (from ``u.user``).
-
-        >>> ml = ML100K()
-        >>> ml.users
-              age gender     occupation     zip
-        user
-        1      24      M     technician   85711
-        2      53      F          other   94043
-        3      23      M         writer   32067
-        4      24      M     technician   43537
-        5      33      F          other   15213
-        ...
-        [943 rows x 4 columns]
-        """
-        fn = self.path / "u.user"
-        users = pd.read_csv(
-            fn,
-            sep="|",
-            header=None,
-            names=["user", "age", "gender", "occupation", "zip"],
-            dtype={"user": np.int32, "age": np.int8, "occupation": "category"},
-        )
-        _log.debug("loaded %s", fn)
-        return users.set_index("user")
-
-    @cached
-    def movies(self):
-        """
-        Return the user data (from ``u.user``).
-
-        >>> ml = ML100K()
-        >>> ml.movies
-                                                  title      release  ...  War Western
-        item                                                          ...
-        1                              Toy Story (1995)  01-Jan-1995  ...    0       0
-        2                              GoldenEye (1995)  01-Jan-1995  ...    0       0
-        3                             Four Rooms (1995)  01-Jan-1995  ...    0       0
-        4                             Get Shorty (1995)  01-Jan-1995  ...    0       0
-        5                                Copycat (1995)  01-Jan-1995  ...    0       0
-        ...
-        [1682 rows x 23 columns]
-        """
-        fn = self.path / "u.item"
-        genres = [
-            "unknown",
-            "Action",
-            "Adventure",
-            "Animation",
-            "Children's",
-            "Comedy",
-            "Crime",
-            "Documentary",
-            "Drama",
-            "Fantasy",
-            "Film-Noir",
-            "Horror",
-            "Musical",
-            "Mystery",
-            "Romance",
-            "Sci-Fi",
-            "Thriller",
-            "War",
-            "Western",
-        ]
-        items = pd.read_csv(
-            fn,
-            sep="|",
-            header=None,
-            encoding="latin1",
-            names=["item", "title", "release", "vidrelease", "imdb"] + genres,
-        )
-        _log.debug("loaded %s", fn)
-        return items.set_index("item")
-
-
-class MLM:
-    """
-    Base classes for ML1M and ML10M.
-    """
-
-    def __init__(self, path):
-        self.path = Path(path)
-
-    @cached
-    def ratings(self):
-        """
-        Return the rating data (from ``ratings.dat``).
-
-        >>> ml = ML10M()
-        >>> ml.ratings
-                   user  item  rating  timestamp
-        0             1   122     5.0  838985046
-        1             1   185     5.0  838983525
-        2             1   231     5.0  838983392
-        3             1   292     5.0  838983421
-        4             1   316     5.0  838983392
-        ...
-        [10000054 rows x 4 columns]
-        """
-        fn = self.path / "ratings.dat"
-        ratings = pd.read_csv(
-            fn,
-            sep=":",
-            header=None,
-            names=["user", "_ui", "item", "_ir", "rating", "_rt", "timestamp"],
-            usecols=[0, 2, 4, 6],
-            dtype={"user": np.int32, "item": np.int32, "rating": np.float32, "timestamp": np.int32},
-        )
-        _log.debug("loaded %s", fn)
-        return ratings
-
-    @cached
-    def movies(self):
-        """
-        Return the movie data (from ``movies.dat``).  Indexed by movie ID.
-
-        >>> ml = ML10M()
-        >>> ml.movies
-                                                            title                                           genres
-        item
-        1                                        Toy Story (1995)      Adventure|Animation|Children|Comedy|Fantasy
-        2                                          Jumanji (1995)                       Adventure|Children|Fantasy
-        3                                 Grumpier Old Men (1995)                                   Comedy|Romance
-        4                                Waiting to Exhale (1995)                             Comedy|Drama|Romance
-        5                      Father of the Bride Part II (1995)                                           Comedy
-        ...
-        [10681 rows x 2 columns]
-        """  # noqa: E501
-        fn = self.path / "movies.dat"
-        movies = pd.read_csv(
-            fn,
-            sep=":",
-            header=None,
-            names=["item", "_ir", "title", "_tg", "genres"],
-            usecols=[0, 2, 4],
-            dtype={"item": np.int32},
-        )
-        movies.set_index("item", inplace=True)
-        _log.debug("loaded %s", fn)
-        return movies
-
-
-class ML10M(MLM):
-    """
-    MovieLens 10M100K data set.
-    """
-
-    def __init__(self, path="data/ml-10M100K"):
-        super().__init__(path)
-
-
-class ML1M(MLM):
-    """
-    MovieLens 1M data set.
-
-    .. note::
-       Some documentation examples use ML-10M100K; that is because this class shares implementation
-       with the 10M data set.
-    """
-
-    def __init__(self, path="data/ml-1m"):
-        super().__init__(path)
-
-    @cached
-    def users(self):
-        """
-        Return the movie data (from ``users.dat``).  Indexed by user ID.
-
-        >>> ml = ML1M()
-        >>> ml.users
-             gender  age    zip
-        user
-        1         F    1  48067
-        2         M   56  70072
-        3         M   25  55117
-        4         M   45  02460
-        5         M   25  55455
-        ...
-        [6040 rows x 3 columns]
-        """
-        fn = self.path / "users.dat"
-        users = pd.read_csv(
-            fn,
-            sep=":",
-            header=None,
-            names=["user", "_ug", "gender", "_ga", "age", "_ao", "occupation", "_oz", "zip"],
-            usecols=[0, 2, 4, 8],
-            dtype={"user": np.int32, "gender": "category", "age": np.int8, "timestamp": np.int32},
-        )
-        users.set_index("user", inplace=True)
-        _log.debug("loaded %s", fn)
-        return users
diff --git a/lenskit/lenskit/util/test.py b/lenskit/lenskit/util/test.py
index 3dbe6986b..d153302ce 100644
--- a/lenskit/lenskit/util/test.py
+++ b/lenskit/lenskit/util/test.py
@@ -26,34 +26,63 @@
 from lenskit.algorithms.ranking import PlackettLuce
 from lenskit.batch import recommend
 from lenskit.crossfold import simple_test_pair
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, LazyDataset, from_interactions_df
 from lenskit.data.matrix import torch_sparse_from_scipy
-from lenskit.datasets import ML100K, MovieLens
+from lenskit.data.movielens import load_movielens, load_movielens_df
 
-ml_test = MovieLens("data/ml-latest-small")
-ml100k = ML100K("data/ml-100k")
+ml_test_dir = here("data/ml-latest-small")
+ml_100k_zip = here("data/ml-100k.zip")
 
+ml_test: Dataset = LazyDataset(lambda: load_movielens(ml_test_dir))
 
-@pytest.fixture(scope="module")
+
+@pytest.fixture(scope="session")
 def ml_ratings():
     """
-    Fixture to load the test MovieLens ratings.
+    Fixture to load the test MovieLens ratings as a data frame. To use this,
+    just include it as a parameter in your test::
+
+        def test_thing_with_data(ml_ratings: pd.DataFrame):
+            ...
+
+    .. note::
+        This is imported in ``conftest.py`` so it is always available in LensKit tests.
     """
-    path = here("data/ml-latest-small")
-    yield pd.read_csv(path / "ratings.csv")
+    yield load_movielens_df(ml_test_dir)
 
 
-@pytest.fixture
+@pytest.fixture(scope="module")
 def ml_ds(ml_ratings: pd.DataFrame):
-    return from_interactions_df(ml_ratings, item_col="movieId")
+    """
+    Fixture to load the MovieLens test dataset.  To use this, just include it as
+    a parameter in your test::
+
+        def test_thing_with_data(ml_ds: Dataset):
+            ...
+
+    .. note::
+        This is imported in ``conftest.py`` so it is always available in LensKit tests.
+    """
+    yield from_interactions_df(ml_ratings)
+
+
+@pytest.fixture
+def ml_100k():
+    """
+    Fixture to load the MovieLens 100K dataset (currently as a data frame).  It skips
+    the test if the ML100K data is not available.
+    """
+    if not ml_100k_zip.exists():
+        pytest.skip("ML100K data not available")
+    yield load_movielens_df(ml_100k_zip)
 
 
 @pytest.fixture(scope="session")
-def demo_recs():
+def demo_recs(ml_ratings: pd.DataFrame):
     """
     A demo set of train, test, and recommendation data.
     """
-    train, test = simple_test_pair(ml_test.ratings, f_rates=0.5)
+    train, test = simple_test_pair(ml_ratings, f_rates=0.5)
 
     users = test["user"].unique()
     algo = PopScore()
diff --git a/lenskit/tests/test_als_explicit.py b/lenskit/tests/test_als_explicit.py
index e393d3f55..0ef45a28e 100644
--- a/lenskit/tests/test_als_explicit.py
+++ b/lenskit/tests/test_als_explicit.py
@@ -13,7 +13,8 @@
 
 from pytest import approx, mark
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
+from lenskit.data.movielens import load_movielens_df
 import lenskit.util.test as lktu
 from lenskit import batch
 from lenskit.algorithms import als
@@ -116,14 +117,14 @@ def test_als_predict_for_new_users_with_new_ratings():
     n_users = 3
     n_items = 2
     new_u_id = -1
-    ratings = lktu.ml_test.ratings
 
     np.random.seed(45)
-    users = np.random.choice(ratings.user.unique(), n_users)
-    items = np.random.choice(ratings.item.unique(), n_items)
+    users = np.random.choice(lktu.ml_test.users.ids(), n_users)
+    items = np.random.choice(lktu.ml_test.items.ids(), n_items)
+    ratings = lktu.ml_test.interaction_log("pandas", original_ids=True)
 
     algo = als.BiasedMF(20, epochs=10)
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(lktu.ml_test)
     _log.debug("Items: " + str(items))
     assert algo.bias is not None
     assert algo.users_ is not None
@@ -133,12 +134,12 @@ def test_als_predict_for_new_users_with_new_ratings():
         _log.debug(f"user: {u}")
         preds = algo.predict_for_user(u, items)
 
-        user_data = ratings[ratings.user == u]
+        user_data = ratings[ratings.user_id == u]
 
         _log.debug("user_features from fit: " + str(algo.user_features_[algo.users_.number(u), :]))
 
         new_ratings = pd.Series(
-            user_data.rating.to_numpy(), index=user_data.item
+            user_data.rating.to_numpy(), index=user_data.item_id
         )  # items as index and ratings as values
         new_preds = algo.predict_for_user(new_u_id, items, new_ratings)
 
@@ -178,33 +179,33 @@ def test_als_predict_no_user_features_basic():
     n_users = 1
     n_items = 2
     new_u_id = -1
-    ratings = lktu.ml_test.ratings
 
     np.random.seed(45)
-    u = np.random.choice(ratings.user.unique(), n_users)[0]
-    items = np.random.choice(ratings.item.unique(), n_items)
+    u = np.random.choice(lktu.ml_test.users.ids(), n_users)[0]
+    items = np.random.choice(lktu.ml_test.items.ids(), n_items)
 
     algo = als.BiasedMF(5, epochs=10)
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(lktu.ml_test)
     _log.debug("Items: " + str(items))
     assert algo.bias is not None
     assert algo.users_ is not None
     assert algo.user_features_ is not None
 
     algo_no_user_features = als.BiasedMF(5, epochs=10, save_user_features=False)
-    algo_no_user_features.fit(from_interactions_df(ratings))
+    algo_no_user_features.fit(lktu.ml_test)
 
     assert algo_no_user_features.user_features_ is None
 
     _log.debug(f"user: {u}")
     preds = algo.predict_for_user(u, items)
 
-    user_data = ratings[ratings.user == u]
+    ratings = lktu.ml_test.interaction_log("pandas", original_ids=True)
+    user_data = ratings[ratings.user_id == u]
 
     _log.debug("user_features from fit: " + str(algo.user_features_[algo.users_.number(u), :]))
 
     new_ratings = pd.Series(
-        user_data.rating.to_numpy(), index=user_data.item
+        user_data.rating.to_numpy(), index=user_data.item_id
     )  # items as index and ratings as values
     new_preds = algo_no_user_features.predict_for_user(new_u_id, items, new_ratings)
 
@@ -216,23 +217,22 @@ def test_als_predict_no_user_features_basic():
 
 @lktu.wantjit
 @mark.slow
-def test_als_train_large():
+def test_als_train_large(ml_ratings):
     algo = als.BiasedMF(20, epochs=10)
-    ratings = lktu.ml_test.ratings
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(lktu.ml_test)
 
     assert algo.bias is not None
     assert algo.users_ is not None
     assert algo.user_features_ is not None
 
-    assert algo.bias.mean_ == approx(ratings.rating.mean())
+    assert algo.bias.mean_ == approx(ml_ratings.rating.mean())
     assert algo.n_features == 20
-    assert algo.n_items == ratings.item.nunique()
-    assert algo.n_users == ratings.user.nunique()
+    assert algo.n_items == ml_ratings.item.nunique()
+    assert algo.n_users == ml_ratings.user.nunique()
 
-    icounts = ratings.groupby("item").rating.count()
-    isums = ratings.groupby("item").rating.sum()
-    is2 = isums - icounts * ratings.rating.mean()
+    icounts = ml_ratings.groupby("item").rating.count()
+    isums = ml_ratings.groupby("item").rating.sum()
+    is2 = isums - icounts * ml_ratings.rating.mean()
     imeans = is2 / (icounts + 5)
     ibias = pd.Series(algo.bias.item_offsets_, index=algo.items_.index)
     imeans, ibias = imeans.align(ibias)
@@ -240,13 +240,12 @@ def test_als_train_large():
 
 
 # don't use wantjit, use this to do a non-JIT test
-def test_als_save_load():
+def test_als_save_load(ml_ratings: pd.DataFrame):
     original = als.BiasedMF(5, epochs=5)
-    ratings = lktu.ml_test.ratings
-    original.fit(from_interactions_df(ratings))
+    original.fit(lktu.ml_test)
 
     assert original.bias is not None
-    assert original.bias.mean_ == approx(ratings.rating.mean())
+    assert original.bias.mean_ == approx(ml_ratings.rating.mean())
     assert original.users_ is not None
 
     mod = pickle.dumps(original)
@@ -268,12 +267,11 @@ def test_als_save_load():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_als_batch_accuracy():
+def test_als_batch_accuracy(ml_100k):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
 
-    ratings = lktu.ml100k.ratings
+    ratings = load_movielens_df(lktu.ml_100k_zip)
 
     algo = als.BiasedMF(25, epochs=20, damping=5)
 
diff --git a/lenskit/tests/test_als_implicit.py b/lenskit/tests/test_als_implicit.py
index 0b4c130d0..37453f9a6 100644
--- a/lenskit/tests/test_als_implicit.py
+++ b/lenskit/tests/test_als_implicit.py
@@ -11,9 +11,10 @@
 import pandas as pd
 import torch
 
-from pytest import mark
+from pytest import approx, mark
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
+from lenskit.data.movielens import load_movielens_df
 import lenskit.util.test as lktu
 from lenskit.algorithms import Recommender, als
 
@@ -86,7 +87,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings():
     assert abs(preds.loc[i] - new_preds.loc[i]) <= 0.1
 
 
-def test_als_predict_for_new_users_with_new_ratings():
+def test_als_predict_for_new_users_with_new_ratings(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     """
     Test if ImplicitMF predictions using the same ratings for a new user
     is the same as a user in ml-latest-small dataset.
@@ -95,14 +96,13 @@ def test_als_predict_for_new_users_with_new_ratings():
     n_users = 3
     n_items = 2
     new_u_id = -1
-    ratings = lktu.ml_test.ratings
 
     np.random.seed(45)
-    users = np.random.choice(ratings.user.unique(), n_users)
-    items = np.random.choice(ratings.item.unique(), n_items)
+    users = np.random.choice(ml_ds.users, n_users)
+    items = np.random.choice(ml_ds.items, n_items)
 
     algo = als.ImplicitMF(20, epochs=10, use_ratings=False)
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
     assert algo.users_ is not None
     assert algo.user_features_ is not None
 
@@ -114,7 +114,7 @@ def test_als_predict_for_new_users_with_new_ratings():
         upos = algo.users_.number(u)
 
         # get the user's rating series
-        user_data = ratings[ratings.user == u]
+        user_data = ml_ratings[ml_ratings.user == u]
         new_ratings = user_data.set_index("item")["rating"].copy()
 
         nr_info = new_ratings.to_frame()
@@ -142,7 +142,9 @@ def test_als_predict_for_new_users_with_new_ratings():
         assert all(diffs <= 0.1)
 
 
-def test_als_recs_topn_for_new_users_with_new_ratings(rng):
+def test_als_recs_topn_for_new_users_with_new_ratings(
+    rng, ml_ratings: pd.DataFrame, ml_ds: Dataset
+):
     """
     Test if ImplicitMF topn recommendations using the same ratings for a new user
     is the same as a user in ml-latest-small dataset.
@@ -154,13 +156,12 @@ def test_als_recs_topn_for_new_users_with_new_ratings(rng):
 
     n_users = 10
     new_u_id = -1
-    ratings = lktu.ml_test.ratings
 
-    users = rng.choice(np.unique(ratings.user), n_users)
+    users = rng.choice(ml_ds.users, n_users)
 
     algo = als.ImplicitMF(20, epochs=10, use_ratings=True)
     rec_algo = basic.TopN(algo)
-    rec_algo.fit(from_interactions_df(ratings))
+    rec_algo.fit(ml_ds)
     assert algo.users_ is not None
     assert algo.user_features_ is not None
     # _log.debug("Items: " + str(items))
@@ -168,7 +169,7 @@ def test_als_recs_topn_for_new_users_with_new_ratings(rng):
     correlations = pd.Series(np.nan, index=users)
     for u in users:
         recs = rec_algo.recommend(u, 10)
-        user_data = ratings[ratings.user == u]
+        user_data = ml_ratings[ml_ratings.user == u]
         upos = algo.users_.number(u)
         _log.info("user %s: %s ratings", u, len(user_data))
 
@@ -215,47 +216,45 @@ def test_als_predict_bad_user():
     assert np.isnan(preds.loc[3])
 
 
-def test_als_predict_no_user_features_basic():
-    ratings = lktu.ml_test.ratings
+def test_als_predict_no_user_features_basic(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     np.random.seed(45)
-    u = np.random.choice(ratings.user.unique(), 1)[0]
-    items = np.random.choice(ratings.item.unique(), 2)
+    u = np.random.choice(ml_ds.users, 1)[0]
+    items = np.random.choice(ml_ds.items, 2)
 
-    algo = als.ImplicitMF(5, epochs=10, use_ratings=True)
-    algo.fit(from_interactions_df(ratings))
+    algo = als.ImplicitMF(5, epochs=10)
+    algo.fit(ml_ds)
     preds = algo.predict_for_user(u, items)
 
-    user_data = ratings[ratings.user == u]
+    user_data = ml_ratings[ml_ratings.user == u]
     new_ratings = user_data.set_index("item")["rating"].copy()
 
     algo_no_user_features = als.ImplicitMF(5, epochs=10, save_user_features=False)
-    algo_no_user_features.fit(from_interactions_df(ratings))
+    algo_no_user_features.fit(ml_ds)
     preds_no_user_features = algo_no_user_features.predict_for_user(u, items, new_ratings)
 
     assert algo_no_user_features.user_features_ is None
+    assert preds_no_user_features.values == approx(preds, abs=0.1)
     diffs = np.abs(preds - preds_no_user_features)
     assert all(diffs <= 0.1)
 
 
 @lktu.wantjit
-def test_als_train_large():
+def test_als_train_large(ml_ds: Dataset):
     algo = als.ImplicitMF(20, epochs=20, use_ratings=False)
-    ratings = lktu.ml_test.ratings
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
 
     assert algo.users_ is not None
     assert algo.user_features_ is not None
-    assert len(algo.users_.index) == ratings.user.nunique()
-    assert len(algo.items_.index) == ratings.item.nunique()
-    assert algo.user_features_.shape == (ratings.user.nunique(), 20)
-    assert algo.item_features_.shape == (ratings.item.nunique(), 20)
+    assert len(algo.users_.index) == ml_ds.user_count
+    assert len(algo.items_.index) == ml_ds.item_count
+    assert algo.user_features_.shape == (ml_ds.user_count, 20)
+    assert algo.item_features_.shape == (ml_ds.item_count, 20)
 
 
-def test_als_save_load(tmp_path):
+def test_als_save_load(tmp_path, ml_ds: Dataset):
     "Test saving and loading ALS models, and regularized training."
     algo = als.ImplicitMF(5, epochs=5, reg=(2, 1), use_ratings=False)
-    ratings = lktu.ml_test.ratings
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
     assert algo.users_ is not None
 
     fn = tmp_path / "model.bpk"
@@ -272,42 +271,38 @@ def test_als_save_load(tmp_path):
 
 
 @lktu.wantjit
-def test_als_train_large_noratings():
+def test_als_train_large_noratings(ml_ds: Dataset):
     algo = als.ImplicitMF(20, epochs=20)
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.loc[:, ["user", "item"]]
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
 
     assert algo.users_ is not None
     assert algo.user_features_ is not None
-    assert len(algo.users_.index) == ratings.user.nunique()
-    assert len(algo.items_.index) == ratings.item.nunique()
-    assert algo.user_features_.shape == (ratings.user.nunique(), 20)
-    assert algo.item_features_.shape == (ratings.item.nunique(), 20)
+    assert len(algo.users_.index) == ml_ds.user_count
+    assert len(algo.items_.index) == ml_ds.item_count
+    assert algo.user_features_.shape == (ml_ds.user_count, 20)
+    assert algo.item_features_.shape == (ml_ds.item_count, 20)
 
 
 @lktu.wantjit
-def test_als_train_large_ratings():
+def test_als_train_large_ratings(ml_ds):
     algo = als.ImplicitMF(20, epochs=20, use_ratings=True)
-    ratings = lktu.ml_test.ratings
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
 
     assert algo.users_ is not None
     assert algo.user_features_ is not None
-    assert len(algo.users_.index) == ratings.user.nunique()
-    assert len(algo.items_.index) == ratings.item.nunique()
-    assert algo.user_features_.shape == (ratings.user.nunique(), 20)
-    assert algo.item_features_.shape == (ratings.item.nunique(), 20)
+    assert len(algo.users_.index) == ml_ds.user_count
+    assert len(algo.items_.index) == ml_ds.item_count
+    assert algo.user_features_.shape == (ml_ds.user_count, 20)
+    assert algo.item_features_.shape == (ml_ds.item_count, 20)
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_als_implicit_batch_accuracy():
+def test_als_implicit_batch_accuracy(ml_100k):
     import lenskit.crossfold as xf
     from lenskit import batch, topn
 
-    ratings = lktu.ml100k.ratings
+    ratings = load_movielens_df(lktu.ml_100k_zip)
 
     def eval(train, test):
         train = train.astype({"rating": np.float_})
diff --git a/lenskit/tests/test_batch_predict.py b/lenskit/tests/test_batch_predict.py
index 9619925b3..ddd7dec12 100644
--- a/lenskit/tests/test_batch_predict.py
+++ b/lenskit/tests/test_batch_predict.py
@@ -122,17 +122,14 @@ def test_predict_include_rating(mlb: MLB):
     assert all(preds.rating.values == urv.loc[preds.index, :].rating.values)
 
 
-@pytest.mark.skipif(not lktu.ml100k.available, reason="ML-100K required")
 @pytest.mark.eval
 @pytest.mark.parametrize("ncpus", [None, 1, 2])
-def test_bias_batch_predict(ncpus):
+def test_bias_batch_predict(ml_100k, ncpus):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
     from lenskit import batch
     from lenskit.algorithms import bias
 
-    ratings = lktu.ml100k.ratings
-
     algo = bias.Bias(damping=5)
 
     def eval(train, test):
@@ -143,7 +140,7 @@ def eval(train, test):
         return recs
 
     preds = pd.concat(
-        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+        (eval(train, test) for (train, test) in xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2)))
     )
 
     _log.info("analyzing predictions")
diff --git a/lenskit/tests/test_batch_recommend.py b/lenskit/tests/test_batch_recommend.py
index 6fd0e1928..b3a693545 100644
--- a/lenskit/tests/test_batch_recommend.py
+++ b/lenskit/tests/test_batch_recommend.py
@@ -19,7 +19,6 @@
 from lenskit.algorithms import Recommender
 from lenskit.algorithms.basic import PopScore, TopN
 from lenskit.algorithms.bias import Bias
-from lenskit.util.test import ml_ratings, ml_ds  # noqa: F401
 
 _log = logging.getLogger(__name__)
 
@@ -73,11 +72,8 @@ def check_positive_ndcg(self, recs):
 
 
 @pytest.fixture
-def ml_folds() -> MLFolds:
-    if not lktu.ml100k.available:
-        pytest.skip("ML-100K not available")
-    ratings = lktu.ml100k.ratings
-    return MLFolds(ratings)
+def ml_folds(ml_100k) -> MLFolds:
+    return MLFolds(ml_100k)
 
 
 def test_recommend_single(mlb: MLB):
diff --git a/lenskit/tests/test_bias.py b/lenskit/tests/test_bias.py
index ef2424f62..7cabc9181 100644
--- a/lenskit/tests/test_bias.py
+++ b/lenskit/tests/test_bias.py
@@ -15,8 +15,7 @@
 
 from lenskit import util as lku
 from lenskit.algorithms.bias import Bias
-from lenskit.data.dataset import from_interactions_df
-from lenskit.util.test import ml_test, ml_ds, ml_ratings  # noqa: F401
+from lenskit.data.dataset import Dataset, from_interactions_df
 
 _log = logging.getLogger(__name__)
 
@@ -149,6 +148,8 @@ def test_bias_global_predict():
 def test_bias_item_predict():
     algo = Bias(users=False)
     algo.fit(simple_ds)
+    assert algo.item_offsets_ is not None
+
     p = algo.predict_for_user(10, [1, 2, 3])
 
     assert len(p) == 3
@@ -172,6 +173,7 @@ def test_bias_user_predict():
 def test_bias_new_user_predict():
     algo = Bias()
     algo.fit(simple_ds)
+    assert algo.item_offsets_ is not None
 
     ratings = pd.DataFrame({"item": [1, 2, 3], "rating": [1.5, 2.5, 3.5]})
     ratings = ratings.set_index("item").rating
@@ -188,6 +190,7 @@ def test_bias_new_user_predict():
 def test_bias_predict_unknown_item():
     algo = Bias()
     algo.fit(simple_ds)
+    assert algo.item_offsets_ is not None
 
     p = algo.predict_for_user(10, [1, 3, 4])
 
@@ -200,6 +203,7 @@ def test_bias_predict_unknown_item():
 def test_bias_predict_unknown_user():
     algo = Bias()
     algo.fit(simple_ds)
+    assert algo.item_offsets_ is not None
 
     p = algo.predict_for_user(15, [1, 3])
 
@@ -207,47 +211,51 @@ def test_bias_predict_unknown_user():
     assert p.values == approx((algo.item_offsets_.loc[[1, 3]] + algo.mean_).values)
 
 
-def test_bias_train_ml_ratings():
+def test_bias_train_ml_ratings(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     algo = Bias()
-    ratings = ml_test.ratings
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(ml_ds)
+    assert algo.item_offsets_ is not None
 
-    assert algo.mean_ == approx(ratings.rating.mean())
-    imeans_data = ratings.groupby("item").rating.mean()
+    assert algo.mean_ == approx(ml_ratings.rating.mean())
+    imeans_data = ml_ds.item_stats()["mean_rating"]
     imeans_algo = algo.item_offsets_ + algo.mean_
     ares, data = imeans_algo.align(imeans_data)
     assert ares.values == approx(data.values)
 
-    urates = ratings.set_index("user").loc[2].set_index("item").rating
+    urates = ml_ratings.set_index("user").loc[2].set_index("item").rating
     umean = (urates - imeans_data[urates.index]).mean()
     p = algo.predict_for_user(2, [10, 11, -1])
     assert len(p) == 3
     assert p.iloc[0] == approx(imeans_data.loc[10] + umean)
     assert p.iloc[1] == approx(imeans_data.loc[11] + umean)
-    assert p.iloc[2] == approx(ratings.rating.mean() + umean)
+    assert p.iloc[2] == approx(ml_ratings.rating.mean() + umean)
 
 
-def test_bias_transform():
+def test_bias_transform(ml_ds: Dataset):
     algo = Bias()
-    ratings = ml_test.ratings
 
-    normed = algo.fit_transform(from_interactions_df(ratings))
+    normed = algo.fit_transform(ml_ds)
 
-    assert all(normed["user"] == ratings["user"])
-    assert all(normed["item"] == ratings["item"])
+    ratings = ml_ds.interaction_log("pandas", original_ids=True)
+    assert all(normed["user"] == ratings["user_id"])
+    assert all(normed["item"] == ratings["item_id"])
     denorm = algo.inverse_transform(normed)
     assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
-    n2 = ratings.join(algo.item_offsets_, on="item")
-    n2 = n2.join(algo.user_offsets_, on="user")
+    assert algo.item_offsets_ is not None
+    assert algo.user_offsets_ is not None
+    n2 = ratings.join(algo.item_offsets_, on="item_id")
+    n2 = n2.join(algo.user_offsets_, on="user_id")
     nr = n2.rating - algo.mean_ - n2.i_off - n2.u_off
     assert normed["rating"].values == approx(nr.values)
 
 
-def test_bias_transform_tensor(ml_ratings, ml_ds):
+def test_bias_transform_tensor(ml_ds):
     algo = Bias()
 
     algo.fit(ml_ds)
+    assert algo.item_offsets_ is not None
+    assert algo.user_offsets_ is not None
 
     mat = ml_ds.interaction_matrix("torch", layout="coo")
     normed = algo.transform(mat)
@@ -262,40 +270,49 @@ def test_bias_transform_tensor(ml_ratings, ml_ds):
     assert recon.values().numpy() == approx(mat.values().numpy())
 
 
-def test_bias_transform_indexes():
+def test_bias_transform_indexes(ml_ds: Dataset):
     algo = Bias()
-    ratings = ml_test.ratings
 
-    normed = algo.fit_transform(from_interactions_df(ratings), indexes=True)
+    normed = algo.fit_transform(ml_ds, indexes=True)
+    assert algo.item_offsets_ is not None
+    assert algo.user_offsets_ is not None
+
+    ratings = ml_ds.interaction_log("pandas", original_ids=True)
 
-    assert all(normed["user"] == ratings["user"])
-    assert all(normed["item"] == ratings["item"])
-    assert all(normed["uidx"] == algo.user_offsets_.index.get_indexer(ratings["user"]))
-    assert all(normed["iidx"] == algo.item_offsets_.index.get_indexer(ratings["item"]))
+    assert all(normed["user"] == ratings["user_id"])
+    assert all(normed["item"] == ratings["item_id"])
+    assert all(normed["uidx"] == ml_ds.users.numbers(ratings["user_id"]))
+    assert all(normed["iidx"] == ml_ds.items.numbers(ratings["item_id"]))
     denorm = algo.inverse_transform(normed)
     assert denorm["rating"].values == approx(ratings["rating"].values, 1.0e-6)
 
 
 @mark.parametrize(["users", "items"], [(True, False), (False, True), (False, False)])
-def test_bias_transform_disable(users, items):
+def test_bias_transform_disable(ml_ds: Dataset, users: bool, items: bool):
     algo = Bias(users=users, items=items)
-    ratings = ml_test.ratings
 
-    normed = algo.fit_transform(from_interactions_df(ratings))
+    normed = algo.fit_transform(ml_ds)
 
-    assert all(normed["user"] == ratings["user"])
-    assert all(normed["item"] == ratings["item"])
+    ratings = ml_ds.interaction_log("pandas", original_ids=True)
+    assert all(normed["user"] == ratings["user_id"])
+    assert all(normed["item"] == ratings["item_id"])
     denorm = algo.inverse_transform(normed)
     assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
     n2 = ratings
     nr = n2.rating - algo.mean_
     if items:
-        n2 = n2.join(algo.item_offsets_, on="item")
+        assert algo.item_offsets_ is not None
+        n2 = n2.join(algo.item_offsets_, on="item_id")
         nr = nr - n2.i_off
+    else:
+        assert algo.item_offsets_ is None
     if users:
-        n2 = n2.join(algo.user_offsets_, on="user")
+        assert algo.user_offsets_ is not None
+        n2 = n2.join(algo.user_offsets_, on="user_id")
         nr = nr - n2.u_off
+    else:
+        assert algo.user_offsets_ is None
     assert normed["rating"].values == approx(nr.values)
 
 
@@ -381,6 +398,8 @@ def test_transform_user_without_user_bias():
     user = 12
     algo = Bias()
     algo.fit(simple_ds)
+    assert algo.item_offsets_ is not None
+    assert algo.user_offsets_ is not None
 
     new_ratings = pd.Series([-0.5, 1.5], index=[2, 3])  # items as index and ratings as values
 
diff --git a/lenskit/tests/test_candidate_selector.py b/lenskit/tests/test_candidate_selector.py
index 52f47e891..f158e77e8 100644
--- a/lenskit/tests/test_candidate_selector.py
+++ b/lenskit/tests/test_candidate_selector.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
 import lenskit.util.test as lktu
 from lenskit.algorithms import basic
 
@@ -53,14 +53,13 @@ def test_unrated_override():
     assert set(sel.candidates(10, [2])) == set([1, 3])
 
 
-def test_unrated_big():
-    ratings = lktu.ml_test.ratings
-    users = ratings.user.unique()
-    items = ratings.item.unique()
-    user_items = ratings.set_index("user").item
+def test_unrated_big(ml_ds: Dataset):
+    users = ml_ds.users.ids()
+    items = ml_ds.items.ids()
+    user_items = ml_ds.interaction_matrix("pandas", original_ids=True).set_index("user_id").item_id
 
     sel = basic.UnratedItemCandidateSelector()
-    s2 = sel.fit(from_interactions_df(ratings))
+    s2 = sel.fit(ml_ds)
     assert s2 is sel
 
     # test 100 random users
diff --git a/lenskit/tests/test_crossfold.py b/lenskit/tests/test_crossfold.py
index 75d555c80..ff512dd66 100644
--- a/lenskit/tests/test_crossfold.py
+++ b/lenskit/tests/test_crossfold.py
@@ -10,21 +10,21 @@
 
 import numpy as np
 
+import pandas as pd
 import pytest
 
 import lenskit.crossfold as xf
 import lenskit.util.test as lktu
 
 
-def test_partition_rows():
-    ratings = lktu.ml_test.ratings
-    splits = xf.partition_rows(ratings, 5)
+def test_partition_rows(ml_ratings: pd.DataFrame):
+    splits = xf.partition_rows(ml_ratings, 5)
     splits = list(splits)
     assert len(splits) == 5
 
     for s in splits:
-        assert len(s.test) + len(s.train) == len(ratings)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
         test_idx = s.test.set_index(["user", "item"]).index
         train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
@@ -40,18 +40,17 @@ def test_partition_rows():
         assert len(inter) == 0
 
     union = ft.reduce(lambda i1, i2: i1.union(i2), (s.test.index for s in splits))
-    assert len(union.unique()) == len(ratings)
+    assert len(union.unique()) == len(ml_ratings)
 
 
-def test_sample_rows():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_rows(ratings, partitions=5, size=1000)
+def test_sample_rows(ml_ratings: pd.DataFrame):
+    splits = xf.sample_rows(ml_ratings, partitions=5, size=1000)
     splits = list(splits)
     assert len(splits) == 5
 
     for s in splits:
         assert len(s.test) == 1000
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
         test_idx = s.test.set_index(["user", "item"]).index
         train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
@@ -66,15 +65,14 @@ def test_sample_rows():
         assert len(inter) == 0
 
 
-def test_sample_rows_more_smaller_parts():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_rows(ratings, partitions=10, size=500)
+def test_sample_rows_more_smaller_parts(ml_ratings: pd.DataFrame):
+    splits = xf.sample_rows(ml_ratings, partitions=10, size=500)
     splits = list(splits)
     assert len(splits) == 10
 
     for s in splits:
         assert len(s.test) == 500
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
         test_idx = s.test.set_index(["user", "item"]).index
         train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
@@ -89,15 +87,14 @@ def test_sample_rows_more_smaller_parts():
         assert len(inter) == 0
 
 
-def test_sample_non_disjoint():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_rows(ratings, partitions=10, size=1000, disjoint=False)
+def test_sample_non_disjoint(ml_ratings: pd.DataFrame):
+    splits = xf.sample_rows(ml_ratings, partitions=10, size=1000, disjoint=False)
     splits = list(splits)
     assert len(splits) == 10
 
     for s in splits:
         assert len(s.test) == 1000
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
         test_idx = s.test.set_index(["user", "item"]).index
         train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
@@ -112,28 +109,25 @@ def test_sample_non_disjoint():
 
 
 @pytest.mark.slow
-def test_sample_oversize():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_rows(ratings, 50, 10000)
+def test_sample_oversize(ml_ratings: pd.DataFrame):
+    splits = xf.sample_rows(ml_ratings, 50, 10000)
     splits = list(splits)
     assert len(splits) == 50
 
     for s in splits:
-        assert len(s.test) + len(s.train) == len(ratings)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
         test_idx = s.test.set_index(["user", "item"]).index
         train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
 
-def test_sample_n():
-    ratings = lktu.ml_test.ratings
-
-    users = np.random.choice(ratings.user.unique(), 5, replace=False)
+def test_sample_n(ml_ratings: pd.DataFrame):
+    users = np.random.choice(ml_ratings.user.unique(), 5, replace=False)
 
     s5 = xf.SampleN(5)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = s5(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) == 5
@@ -141,20 +135,19 @@ def test_sample_n():
 
     s10 = xf.SampleN(10)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = s10(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) == 10
         assert len(tst) + len(trn) == len(udf)
 
 
-def test_sample_frac():
-    ratings = lktu.ml_test.ratings
-    users = np.random.choice(ratings.user.unique(), 5, replace=False)
+def test_sample_frac(ml_ratings: pd.DataFrame):
+    users = np.random.choice(ml_ratings.user.unique(), 5, replace=False)
 
     samp = xf.SampleFrac(0.2)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) + len(trn) == len(udf)
@@ -163,7 +156,7 @@ def test_sample_frac():
 
     samp = xf.SampleFrac(0.5)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) + len(trn) == len(udf)
@@ -171,13 +164,12 @@ def test_sample_frac():
         assert len(tst) <= math.ceil(len(udf) * 0.5)
 
 
-def test_last_n():
-    ratings = lktu.ml_test.ratings
-    users = np.random.choice(ratings.user.unique(), 5, replace=False)
+def test_last_n(ml_ratings: pd.DataFrame):
+    users = np.random.choice(ml_ratings.user.unique(), 5, replace=False)
 
     samp = xf.LastN(5)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) == 5
@@ -186,7 +178,7 @@ def test_last_n():
 
     samp = xf.LastN(7)
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) == 7
@@ -194,13 +186,12 @@ def test_last_n():
         assert tst.timestamp.min() >= trn.timestamp.max()
 
 
-def test_last_frac():
-    ratings = lktu.ml_test.ratings
-    users = np.random.choice(ratings.user.unique(), 5, replace=False)
+def test_last_frac(ml_ratings: pd.DataFrame):
+    users = np.random.choice(ml_ratings.user.unique(), 5, replace=False)
 
     samp = xf.LastFrac(0.2, "timestamp")
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) + len(trn) == len(udf)
@@ -210,7 +201,7 @@ def test_last_frac():
 
     samp = xf.LastFrac(0.5, "timestamp")
     for u in users:
-        udf = ratings[ratings.user == u]
+        udf = ml_ratings[ml_ratings.user == u]
         tst = samp(udf)
         trn = udf.loc[udf.index.difference(tst.index), :]
         assert len(tst) + len(trn) == len(udf)
@@ -219,40 +210,38 @@ def test_last_frac():
         assert tst.timestamp.min() >= trn.timestamp.max()
 
 
-def test_partition_users():
-    ratings = lktu.ml_test.ratings
-    splits = xf.partition_users(ratings, 5, xf.SampleN(5))
+def test_partition_users(ml_ratings: pd.DataFrame):
+    splits = xf.partition_users(ml_ratings, 5, xf.SampleN(5))
     splits = list(splits)
     assert len(splits) == 5
 
     for s in splits:
         ucounts = s.test.groupby("user").agg("count")
         assert all(ucounts == 5)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
         assert all(s.train["user"].isin(s.train["user"].unique()))
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
     users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
-    assert len(users) == ratings.user.nunique()
-    assert users == set(ratings.user)
+    assert len(users) == ml_ratings.user.nunique()
+    assert users == set(ml_ratings.user)
 
 
-def test_partition_may_skip_train():
-    """Partitioning when users may not have enough ratings to be in the train set and test set."""
-    ratings = lktu.ml_test.ratings
+def test_partition_may_skip_train(ml_ratings: pd.DataFrame):
+    """Partitioning when users may not have enough ml_ratings to be in the train set and test set."""
     # make a data set where some users only have 1 rating
-    ratings = ratings.sample(frac=0.1)
-    users = ratings.groupby("user")["rating"].count()
+    ml_ratings = ml_ratings.sample(frac=0.1)
+    users = ml_ratings.groupby("user")["rating"].count()
     assert users.min() == 1.0  # we should have some small users!
     users.name = "ur_count"
 
-    splits = xf.partition_users(ratings, 5, xf.SampleN(1))
+    splits = xf.partition_users(ml_ratings, 5, xf.SampleN(1))
     splits = list(splits)
     assert len(splits) == 5
 
-    # now we go make sure we're missing some users! And don't have any NaN ratings
+    # now we go make sure we're missing some users! And don't have any NaN ml_ratings
     for train, test in splits:
-        # no null ratings
+        # no null ml_ratings
         assert all(train["rating"].notna())
         # see if test users with 1 rating are missing from train
         test = test.join(users, on="user")
@@ -261,30 +250,28 @@ def test_partition_may_skip_train():
         assert all(test.loc[test["ur_count"] > 1, "user"].isin(train["user"].unique()))
 
 
-def test_partition_users_frac():
-    ratings = lktu.ml_test.ratings
-    splits = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
+def test_partition_users_frac(ml_ratings: pd.DataFrame):
+    splits = xf.partition_users(ml_ratings, 5, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby("user").item.count()
+    ucounts = ml_ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
         tucs = s.test.groupby("user").item.count()
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
     # we have all users
     users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
-    assert len(users) == ratings.user.nunique()
-    assert users == set(ratings.user)
+    assert len(users) == ml_ratings.user.nunique()
+    assert users == set(ml_ratings.user)
 
 
-def test_sample_users():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_users(ratings, 5, 100, xf.SampleN(5))
+def test_sample_users(ml_ratings: pd.DataFrame):
+    splits = xf.sample_users(ml_ratings, 5, 100, xf.SampleN(5))
     splits = list(splits)
     assert len(splits) == 5
 
@@ -293,8 +280,8 @@ def test_sample_users():
         assert len(s.test) == 5 * 100
         assert len(ucounts) == 100
         assert all(ucounts == 5)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
     # no overlapping users
     for s1, s2 in it.product(splits, splits):
@@ -305,12 +292,11 @@ def test_sample_users():
         assert len(np.intersect1d(us1, us2)) == 0
 
 
-def test_sample_users_frac():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_users(ratings, 5, 100, xf.SampleFrac(0.2))
+def test_sample_users_frac(ml_ratings: pd.DataFrame):
+    splits = xf.sample_users(ml_ratings, 5, 100, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby("user").item.count()
+    ucounts = ml_ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
@@ -318,8 +304,8 @@ def test_sample_users_frac():
         assert len(tucs) == 100
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
     # no overlapping users
     for s1, s2 in it.product(splits, splits):
@@ -331,9 +317,8 @@ def test_sample_users_frac():
 
 
 @pytest.mark.slow
-def test_sample_users_frac_oversize():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_users(ratings, 20, 100, xf.SampleN(5))
+def test_sample_users_frac_oversize(ml_ratings: pd.DataFrame):
+    splits = xf.sample_users(ml_ratings, 20, 100, xf.SampleN(5))
     splits = list(splits)
     assert len(splits) == 20
 
@@ -341,12 +326,12 @@ def test_sample_users_frac_oversize():
         ucounts = s.test.groupby("user").agg("count")
         assert len(ucounts) < 100
         assert all(ucounts == 5)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
     users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
-    assert len(users) == ratings.user.nunique()
-    assert users == set(ratings.user)
+    assert len(users) == ml_ratings.user.nunique()
+    assert users == set(ml_ratings.user)
     for s1, s2 in it.product(splits, splits):
         if s1 is s2:
             continue
@@ -356,9 +341,8 @@ def test_sample_users_frac_oversize():
         assert len(np.intersect1d(us1, us2)) == 0
 
 
-def test_sample_users_frac_oversize_ndj():
-    ratings = lktu.ml_test.ratings
-    splits = xf.sample_users(ratings, 20, 100, xf.SampleN(5), disjoint=False)
+def test_sample_users_frac_oversize_ndj(ml_ratings: pd.DataFrame):
+    splits = xf.sample_users(ml_ratings, 20, 100, xf.SampleN(5), disjoint=False)
     splits = list(splits)
     assert len(splits) == 20
 
@@ -367,41 +351,37 @@ def test_sample_users_frac_oversize_ndj():
         assert len(ucounts) == 100
         assert len(s.test) == 5 * 100
         assert all(ucounts == 5)
-        assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert len(s.test) + len(s.train) == len(ratings)
+        assert all(s.test.index.union(s.train.index) == ml_ratings.index)
+        assert len(s.test) + len(s.train) == len(ml_ratings)
 
 
-def test_non_unique_index_partition_users():
+def test_non_unique_index_partition_users(ml_ratings: pd.DataFrame):
     """Partitioning users when dataframe has non-unique indices"""
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index("user")  ##forces non-unique index
+    ml_ratings = ml_ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
-        for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
+        for split in xf.partition_users(ml_ratings, 5, xf.SampleN(5)):
             pass
 
 
-def test_sample_users_dup_index():
+def test_sample_users_dup_index(ml_ratings: pd.DataFrame):
     """Sampling users when dataframe has non-unique indices"""
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index("user")  ##forces non-unique index
+    ml_ratings = ml_ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
-        for split in xf.sample_users(ratings, 5, 100, xf.SampleN(5)):
+        for split in xf.sample_users(ml_ratings, 5, 100, xf.SampleN(5)):
             pass
 
 
-def test_sample_rows_dup_index():
-    """Sampling ratings when dataframe has non-unique indices"""
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index("user")  ##forces non-unique index
+def test_sample_rows_dup_index(ml_ratings: pd.DataFrame):
+    """Sampling ml_ratings when dataframe has non-unique indices"""
+    ml_ratings = ml_ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
-        for split in xf.sample_rows(ratings, partitions=5, size=1000):
+        for split in xf.sample_rows(ml_ratings, partitions=5, size=1000):
             pass
 
 
-def test_partition_users_dup_index():
-    """Partitioning ratings when dataframe has non-unique indices"""
-    ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index("user")  ##forces non-unique index
+def test_partition_users_dup_index(ml_ratings: pd.DataFrame):
+    """Partitioning ml_ratings when dataframe has non-unique indices"""
+    ml_ratings = ml_ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
-        for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
+        for split in xf.partition_users(ml_ratings, 5, xf.SampleN(5)):
             pass
diff --git a/lenskit/tests/test_dataset_ids.py b/lenskit/tests/test_dataset_ids.py
index 205fc29b9..1605674d7 100644
--- a/lenskit/tests/test_dataset_ids.py
+++ b/lenskit/tests/test_dataset_ids.py
@@ -12,21 +12,21 @@
 
 
 def test_from_ratings_default_names(ml_ratings: pd.DataFrame):
-    ratings = ml_ratings.rename(columns={"userId": "user_id", "movieId": "item_id"})
+    ratings = ml_ratings.rename(columns={"user": "user_id", "item": "item_id"})
     ds = from_interactions_df(ratings)
     assert ds.item_count == ratings["item_id"].nunique()
     assert ds.user_count == ratings["user_id"].nunique()
 
 
 def test_from_ratings_nosuffix(ml_ratings: pd.DataFrame):
-    ratings = ml_ratings.rename(columns={"userId": "user", "movieId": "item"})
+    ratings = ml_ratings.rename(columns={"user": "user", "item": "item"})
     ds = from_interactions_df(ratings)
     assert ds.item_count == ratings["item"].nunique()
     assert ds.user_count == ratings["user"].nunique()
 
 
 def test_from_ratings_names_upper(ml_ratings: pd.DataFrame):
-    ratings = ml_ratings.rename(columns={"userId": "USER", "movieId": "ITEM"})
+    ratings = ml_ratings.rename(columns={"user": "USER", "item": "ITEM"})
     ds = from_interactions_df(ratings)
     assert ds.item_count == ratings["ITEM"].nunique()
     assert ds.user_count == ratings["USER"].nunique()
diff --git a/lenskit/tests/test_dataset_lazy.py b/lenskit/tests/test_dataset_lazy.py
index c24b0f89c..fc9a83e56 100644
--- a/lenskit/tests/test_dataset_lazy.py
+++ b/lenskit/tests/test_dataset_lazy.py
@@ -15,24 +15,22 @@
 
 
 def test_item_stats(ml_ratings: pd.DataFrame):
-    ml_ds = LazyDataset(lambda: from_interactions_df(ml_ratings, item_col="movieId"))
+    ml_ds = LazyDataset(lambda: from_interactions_df(ml_ratings, item_col="item"))
     stats = ml_ds.item_stats()
     stats.info()
 
     assert len(stats) == ml_ds.item_count
     assert np.all(stats.index == ml_ds.items.index)
 
-    assert np.all(stats["count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items))
-    assert np.all(stats["user_count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items))
-    assert np.all(
-        stats["rating_count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items)
-    )
+    assert np.all(stats["count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
+    assert np.all(stats["user_count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
+    assert np.all(stats["rating_count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
 
     assert stats["mean_rating"].values == approx(
-        ml_ratings.groupby("movieId")["rating"].mean().reindex(ml_ds.items).values
+        ml_ratings.groupby("item")["rating"].mean().reindex(ml_ds.items).values
     )
 
-    ts = ml_ratings.groupby("movieId")["timestamp"].min().reindex(ml_ds.items)
+    ts = ml_ratings.groupby("item")["timestamp"].min().reindex(ml_ds.items)
     bad = stats["first_time"] != ts
     nbad = np.sum(bad)
     if nbad:
@@ -42,23 +40,23 @@ def test_item_stats(ml_ratings: pd.DataFrame):
 
 
 def test_user_stats(ml_ratings: pd.DataFrame):
-    ml_ds = LazyDataset(lambda: from_interactions_df(ml_ratings, item_col="movieId"))
+    ml_ds = LazyDataset(lambda: from_interactions_df(ml_ratings, item_col="item"))
     stats = ml_ds.user_stats()
     stats.info()
 
     assert len(stats) == ml_ds.user_count
     assert np.all(stats.index == ml_ds.users.index)
 
-    assert np.all(stats["count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
-    assert np.all(stats["user_count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
-    assert np.all(stats["rating_count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["user_count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["rating_count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
 
     assert stats["mean_rating"].values == approx(
-        ml_ratings.groupby("userId")["rating"].mean().reindex(ml_ds.users).values
+        ml_ratings.groupby("user")["rating"].mean().reindex(ml_ds.users).values
     )
     assert np.all(
-        stats["first_time"] == ml_ratings.groupby("userId")["timestamp"].min().reindex(ml_ds.users)
+        stats["first_time"] == ml_ratings.groupby("user")["timestamp"].min().reindex(ml_ds.users)
     )
     assert np.all(
-        stats["last_time"] == ml_ratings.groupby("userId")["timestamp"].max().reindex(ml_ds.users)
+        stats["last_time"] == ml_ratings.groupby("user")["timestamp"].max().reindex(ml_ds.users)
     )
diff --git a/lenskit/tests/test_dataset_log.py b/lenskit/tests/test_dataset_log.py
index e56ac69ba..3ce3db7c7 100644
--- a/lenskit/tests/test_dataset_log.py
+++ b/lenskit/tests/test_dataset_log.py
@@ -25,9 +25,9 @@ def test_pandas_log_defaults(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     uids = ml_ds.users.ids(int_df["user_num"])
     iids = ml_ds.items.ids(int_df["item_num"])
 
-    ml_df = ml_ratings.sort_values(["userId", "movieId"])
-    assert np.all(uids == ml_df["userId"])
-    assert np.all(iids == ml_df["movieId"])
+    ml_df = ml_ratings.sort_values(["user", "item"])
+    assert np.all(uids == ml_df["user"])
+    assert np.all(iids == ml_df["item"])
     assert np.all(int_df["rating"] == ml_df["rating"])
     assert np.all(int_df["timestamp"] == ml_df["timestamp"])
 
@@ -48,9 +48,9 @@ def test_pandas_log_ids(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     # the interact
     int_df = int_df.sort_values(["user_id", "item_id"])
 
-    ml_df = ml_ratings.sort_values(["userId", "movieId"])
-    assert np.all(int_df["user_id"] == ml_df["userId"])
-    assert np.all(int_df["item_id"] == ml_df["movieId"])
+    ml_df = ml_ratings.sort_values(["user", "item"])
+    assert np.all(int_df["user_id"] == ml_df["user"])
+    assert np.all(int_df["item_id"] == ml_df["item"])
     assert np.all(int_df["rating"] == ml_df["rating"])
     assert np.all(int_df["timestamp"] == ml_df["timestamp"])
 
@@ -72,9 +72,9 @@ def test_pandas_log_no_ts(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     uids = ml_ds.users.ids(int_df["user_num"])
     iids = ml_ds.items.ids(int_df["item_num"])
 
-    ml_df = ml_ratings.sort_values(["userId", "movieId"])
-    assert np.all(uids == ml_df["userId"])
-    assert np.all(iids == ml_df["movieId"])
+    ml_df = ml_ratings.sort_values(["user", "item"])
+    assert np.all(uids == ml_df["user"])
+    assert np.all(iids == ml_df["item"])
     assert np.all(int_df["rating"] == ml_df["rating"])
 
     # and the total length
diff --git a/lenskit/tests/test_dataset_matrix.py b/lenskit/tests/test_dataset_matrix.py
index de7b458c7..71b7f8df5 100644
--- a/lenskit/tests/test_dataset_matrix.py
+++ b/lenskit/tests/test_dataset_matrix.py
@@ -19,40 +19,40 @@
 
 
 def _check_user_offset_counts(ml_ds: Dataset, ml_ratings: pd.DataFrame, offsets: ArrayLike):
-    user_counts = ml_ratings["userId"].value_counts().reindex(ml_ds.users.index)
+    user_counts = ml_ratings["user"].value_counts().reindex(ml_ds.users.index)
     row_lens = np.diff(offsets)
     assert np.all(row_lens == user_counts)
 
 
 def _check_user_number_counts(ml_ds: Dataset, ml_ratings: pd.DataFrame, nums: ArrayLike):
     users, counts = np.unique(nums, return_counts=True)
-    user_counts = ml_ratings["userId"].value_counts().reindex(ml_ds.users.ids(users))
+    user_counts = ml_ratings["user"].value_counts().reindex(ml_ds.users.ids(users))
     assert np.all(counts == user_counts)
 
 
 def _check_item_number_counts(ml_ds: Dataset, ml_ratings: pd.DataFrame, nums: ArrayLike):
     items, counts = np.unique(nums, return_counts=True)
-    item_counts = ml_ratings["movieId"].value_counts().reindex(ml_ds.items.ids(items))
+    item_counts = ml_ratings["item"].value_counts().reindex(ml_ds.items.ids(items))
     assert np.all(counts == item_counts)
 
 
 def _check_user_ids(ml_ds: Dataset, ml_ratings: pd.DataFrame, nums: ArrayLike):
-    ml_ratings = ml_ratings.sort_values(["userId", "movieId"])
-    assert np.all(ml_ds.users.ids(np.asarray(nums)) == ml_ratings["userId"])
+    ml_ratings = ml_ratings.sort_values(["user", "item"])
+    assert np.all(ml_ds.users.ids(np.asarray(nums)) == ml_ratings["user"])
 
 
 def _check_item_ids(ml_ds: Dataset, ml_ratings: pd.DataFrame, nums: ArrayLike):
-    ml_ratings = ml_ratings.sort_values(["userId", "movieId"])
-    assert np.all(ml_ds.items.ids(np.asarray(nums)) == ml_ratings["movieId"])
+    ml_ratings = ml_ratings.sort_values(["user", "item"])
+    assert np.all(ml_ds.items.ids(np.asarray(nums)) == ml_ratings["item"])
 
 
 def _check_ratings(ml_ds: Dataset, ml_ratings: pd.DataFrame, rates: ArrayLike):
-    ml_ratings = ml_ratings.sort_values(["userId", "movieId"])
+    ml_ratings = ml_ratings.sort_values(["user", "item"])
     assert np.all(rates == ml_ratings["rating"])
 
 
 def _check_timestamp(ml_ds: Dataset, ml_ratings: pd.DataFrame, ts: ArrayLike):
-    ml_ratings = ml_ratings.sort_values(["userId", "movieId"])
+    ml_ratings = ml_ratings.sort_values(["user", "item"])
     assert np.all(ts == ml_ratings["timestamp"])
 
 
@@ -68,8 +68,8 @@ def test_matrix_structure(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert isinstance(log, CSRStructure)
     assert log.nnz == len(ml_ratings)
 
-    assert log.nrows == ml_ratings["userId"].nunique()
-    assert log.ncols == ml_ratings["movieId"].nunique()
+    assert log.nrows == ml_ratings["user"].nunique()
+    assert log.ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.rowptrs)
     _check_item_number_counts(ml_ds, ml_ratings, log.colinds)
@@ -148,7 +148,7 @@ def test_matrix_pandas_indicator(ml_ratings: pd.DataFrame, ml_ds: Dataset):
 
 
 def test_matrix_pandas_missing_rating(ml_ratings: pd.DataFrame):
-    ml_ds = from_interactions_df(ml_ratings[["userId", "movieId", "timestamp"]], item_col="movieId")
+    ml_ds = from_interactions_df(ml_ratings[["user", "item", "timestamp"]], item_col="item")
     log = ml_ds.interaction_matrix(format="pandas", field="rating")
     assert isinstance(log, pd.DataFrame)
     assert len(log) == len(ml_ratings)
@@ -169,8 +169,8 @@ def test_matrix_scipy_coo(ml_ratings: pd.DataFrame, ml_ds: Dataset, generation):
     assert log.nnz == len(ml_ratings)
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     assert log.row.dtype == np.int32
     assert log.col.dtype == np.int32
@@ -191,8 +191,8 @@ def test_matrix_scipy_csr(ml_ratings: pd.DataFrame, ml_ds: Dataset, generation):
     assert log.nnz == len(ml_ratings)
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     assert log.indptr.dtype == np.int32
     assert log.indices.dtype == np.int32
@@ -209,8 +209,8 @@ def test_matrix_scipy_timestamp(ml_ratings: pd.DataFrame, ml_ds: Dataset, genera
     assert log.nnz == len(ml_ratings)
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.indptr)
     _check_item_number_counts(ml_ds, ml_ratings, log.indices)
@@ -225,8 +225,8 @@ def test_matrix_scipy_indicator(ml_ratings: pd.DataFrame, ml_ds: Dataset, genera
     assert log.nnz == len(ml_ratings)
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.indptr)
     _check_item_number_counts(ml_ds, ml_ratings, log.indices)
@@ -238,14 +238,14 @@ def test_matrix_scipy_indicator(ml_ratings: pd.DataFrame, ml_ds: Dataset, genera
 
 @mark.parametrize("generation", ["modern", "legacy"])
 def test_matrix_scipy_missing_rating(ml_ratings: pd.DataFrame, generation):
-    ml_ds = from_interactions_df(ml_ratings[["userId", "movieId", "timestamp"]], item_col="movieId")
+    ml_ds = from_interactions_df(ml_ratings[["user", "item", "timestamp"]], item_col="item")
     log = ml_ds.interaction_matrix(format="scipy", field="rating", legacy=generation == "legacy")
     assert isinstance(log, sps.csr_array if generation == "modern" else sps.csr_matrix)
     assert log.nnz == len(ml_ratings)
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.indptr)
     _check_item_number_counts(ml_ds, ml_ratings, log.indices)
@@ -260,8 +260,8 @@ def test_matrix_torch_csr(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert log.values().shape == torch.Size([len(ml_ratings)])
 
     nrows, ncols = log.shape
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.crow_indices())
     _check_item_number_counts(ml_ds, ml_ratings, log.col_indices())
@@ -279,8 +279,8 @@ def test_matrix_torch_indicator(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert log.values().shape == torch.Size([len(ml_ratings)])
 
     nrows, ncols = log.shape
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.crow_indices())
     _check_item_number_counts(ml_ds, ml_ratings, log.col_indices())
@@ -295,8 +295,8 @@ def test_matrix_torch_coo(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert log.values().shape == torch.Size([len(ml_ratings)])
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_number_counts(ml_ds, ml_ratings, log.indices()[0, :])
     _check_user_ids(ml_ds, ml_ratings, log.indices()[0, :])
@@ -306,15 +306,15 @@ def test_matrix_torch_coo(ml_ratings: pd.DataFrame, ml_ds: Dataset):
 
 
 def test_matrix_torch_missing_rating(ml_ratings: pd.DataFrame):
-    ml_ds = from_interactions_df(ml_ratings[["userId", "movieId", "timestamp"]], item_col="movieId")
+    ml_ds = from_interactions_df(ml_ratings[["user", "item", "timestamp"]], item_col="item")
     log = ml_ds.interaction_matrix(format="torch", field="rating")
     assert isinstance(log, torch.Tensor)
     assert log.is_sparse_csr
     assert log.values().shape == torch.Size([len(ml_ratings)])
 
     nrows, ncols = cast(tuple[int, int], log.shape)
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.crow_indices())
     _check_item_number_counts(ml_ds, ml_ratings, log.col_indices())
@@ -329,8 +329,8 @@ def test_matrix_torch_timestamp(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert log.values().shape == torch.Size([len(ml_ratings)])
 
     nrows, ncols = log.shape
-    assert nrows == ml_ratings["userId"].nunique()
-    assert ncols == ml_ratings["movieId"].nunique()
+    assert nrows == ml_ratings["user"].nunique()
+    assert ncols == ml_ratings["item"].nunique()
 
     _check_user_offset_counts(ml_ds, ml_ratings, log.crow_indices())
     _check_item_number_counts(ml_ds, ml_ratings, log.col_indices())
diff --git a/lenskit/tests/test_dataset_stats.py b/lenskit/tests/test_dataset_stats.py
index fad32704d..321fec6b9 100644
--- a/lenskit/tests/test_dataset_stats.py
+++ b/lenskit/tests/test_dataset_stats.py
@@ -18,17 +18,15 @@ def test_item_stats(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert len(stats) == ml_ds.item_count
     assert np.all(stats.index == ml_ds.items.index)
 
-    assert np.all(stats["count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items))
-    assert np.all(stats["user_count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items))
-    assert np.all(
-        stats["rating_count"] == ml_ratings["movieId"].value_counts().reindex(ml_ds.items)
-    )
+    assert np.all(stats["count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
+    assert np.all(stats["user_count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
+    assert np.all(stats["rating_count"] == ml_ratings["item"].value_counts().reindex(ml_ds.items))
 
     assert stats["mean_rating"].values == approx(
-        ml_ratings.groupby("movieId")["rating"].mean().reindex(ml_ds.items).values
+        ml_ratings.groupby("item")["rating"].mean().reindex(ml_ds.items).values
     )
 
-    ts = ml_ratings.groupby("movieId")["timestamp"].min().reindex(ml_ds.items)
+    ts = ml_ratings.groupby("item")["timestamp"].min().reindex(ml_ds.items)
     bad = stats["first_time"] != ts
     nbad = np.sum(bad)
     if nbad:
@@ -44,16 +42,16 @@ def test_user_stats(ml_ratings: pd.DataFrame, ml_ds: Dataset):
     assert len(stats) == ml_ds.user_count
     assert np.all(stats.index == ml_ds.users.index)
 
-    assert np.all(stats["count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
-    assert np.all(stats["user_count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
-    assert np.all(stats["rating_count"] == ml_ratings["userId"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["user_count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
+    assert np.all(stats["rating_count"] == ml_ratings["user"].value_counts().reindex(ml_ds.users))
 
     assert stats["mean_rating"].values == approx(
-        ml_ratings.groupby("userId")["rating"].mean().reindex(ml_ds.users).values
+        ml_ratings.groupby("user")["rating"].mean().reindex(ml_ds.users).values
     )
     assert np.all(
-        stats["first_time"] == ml_ratings.groupby("userId")["timestamp"].min().reindex(ml_ds.users)
+        stats["first_time"] == ml_ratings.groupby("user")["timestamp"].min().reindex(ml_ds.users)
     )
     assert np.all(
-        stats["last_time"] == ml_ratings.groupby("userId")["timestamp"].max().reindex(ml_ds.users)
+        stats["last_time"] == ml_ratings.groupby("user")["timestamp"].max().reindex(ml_ds.users)
     )
diff --git a/lenskit/tests/test_fallback.py b/lenskit/tests/test_fallback.py
index 5de40de33..2a50edbd9 100644
--- a/lenskit/tests/test_fallback.py
+++ b/lenskit/tests/test_fallback.py
@@ -11,6 +11,7 @@
 
 from pytest import approx
 
+from lenskit.data.dataset import Dataset
 import lenskit.util.test as lktu
 from lenskit import util as lku
 from lenskit.algorithms import basic
@@ -85,8 +86,10 @@ def test_fallback_predict(ml_ratings, ml_ds):
     def exp_val(user, item):
         v = bias.mean_
         if user is not None:
+            assert bias.user_offsets_ is not None
             v += bias.user_offsets_.loc[user]
         if item is not None:
+            assert bias.item_offsets_ is not None
             v += bias.item_offsets_.loc[item]
         return v
 
@@ -114,7 +117,7 @@ def exp_val(user, item):
     assert preds.loc[-23081] == approx(exp_val(10, None))
 
 
-def test_fallback_save_load(tmp_path, ml_ds):
+def test_fallback_save_load(tmp_path, ml_ratings: pd.DataFrame, ml_ds: Dataset):
     original = basic.Fallback(basic.Memorized(simple_df), Bias())
     original.fit(ml_ds)
 
@@ -125,7 +128,7 @@ def test_fallback_save_load(tmp_path, ml_ds):
         algo = pickle.load(pf)
 
     bias = algo.algorithms[1]
-    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())
+    assert bias.mean_ == approx(ml_ratings.rating.mean())
 
     def exp_val(user, item):
         v = bias.mean_
diff --git a/lenskit/tests/test_knn_item_item.py b/lenskit/tests/test_knn_item_item.py
index d12c6cff0..e69ca3d16 100644
--- a/lenskit/tests/test_knn_item_item.py
+++ b/lenskit/tests/test_knn_item_item.py
@@ -56,9 +56,9 @@
 @fixture(scope="module")
 def ml_subset(ml_ratings):
     "Fixture that returns a subset of the MovieLens database."
-    icounts = ml_ratings.groupby("movieId").rating.count()
+    icounts = ml_ratings.groupby("item").rating.count()
     top = icounts.nlargest(500)
-    top_rates = ml_ratings[ml_ratings["movieId"].isin(top.index)]
+    top_rates = ml_ratings[ml_ratings["item"].isin(top.index)]
     _log.info("top 500 items yield %d of %d ratings", len(top_rates), len(ml_ratings))
     return top_rates
 
@@ -200,13 +200,11 @@ def test_ii_warns_wa_with_no_use_ratings():
 
 @lktu.wantjit
 @mark.slow
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_ii_train_ml100k(tmp_path):
+def test_ii_train_ml100k(tmp_path, ml_100k):
     "Test an unbounded model on ML-100K"
-    ratings = lktu.ml100k.ratings
     algo = knn.ItemItem(30)
     _log.info("training model")
-    algo.fit(from_interactions_df(ratings))
+    algo.fit(from_interactions_df(ml_100k))
 
     _log.info("testing model")
 
@@ -218,7 +216,7 @@ def test_ii_train_ml100k(tmp_path):
 
     assert algo.item_counts_.sum() == len(algo.sim_matrix_.values())
 
-    means = ratings.groupby("item").rating.mean()
+    means = ml_100k.groupby("item").rating.mean()
     assert means[algo.items_.ids()].values == approx(algo.item_means_)
 
     # save
@@ -258,18 +256,18 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
     # a little tolerance
     assert algo_lim.sim_matrix_.values().max() <= 1
 
-    means = ml_ratings.groupby("movieId").rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_lim.items_.ids()].values == approx(algo_lim.item_means_)
 
     assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.values())))
     assert algo_ub.sim_matrix_.values().min() > 0
     assert algo_ub.sim_matrix_.values().max() <= 1
 
-    means = ml_ratings.groupby("movieId").rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_ub.items_.ids()].values == approx(algo_ub.item_means_)
 
     mc_rates = (
-        ml_ratings.set_index("movieId")
+        ml_ratings.set_index("item")
         .join(pd.DataFrame({"item_mean": means}))
         .assign(rating=lambda df: df.rating - df.item_mean)
     )
@@ -297,7 +295,7 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
         ipos = algo_ub.items_.number(i)
         _log.debug("checking item %d at position %d", i, ipos)
         assert ipos == algo_lim.items_.number(i)
-        irates = mc_rates.loc[[i], :].set_index("userId").rating
+        irates = mc_rates.loc[[i], :].set_index("user").rating
 
         ub_row = mat_ub[ipos]
         b_row = mat_lim[ipos]
@@ -322,7 +320,7 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
         _log.debug("checking equal similarities")
         for n in rng.choice(ub_cols, min(10, len(ub_cols))):
             n_id = algo_ub.items_.id(n)
-            n_rates = mc_rates.loc[n_id, :].set_index("userId").rating
+            n_rates = mc_rates.loc[n_id, :].set_index("user").rating
             ir, nr = irates.align(n_rates, fill_value=0)
             cor = ir.corr(nr)
             assert mat_ub[ipos, n].item() == approx(cor, abs=1.0e-6)
@@ -371,10 +369,10 @@ def test_ii_implicit_large(rng, ml_ratings):
     algo = knn.ItemItem(NBRS, feedback="implicit")
     _log.info("agg: %s", algo.aggregate)
     algo = Recommender.adapt(algo)
-    algo.fit(from_interactions_df(ml_ratings[["userId", "movieId"]], item_col="movieId"))
+    algo.fit(from_interactions_df(ml_ratings[["user", "item"]], item_col="item"))
     assert isinstance(algo, TopN)
 
-    users = rng.choice(ml_ratings["userId"].unique(), NUSERS)
+    users = rng.choice(ml_ratings["user"].unique(), NUSERS)
 
     items: Vocabulary[EntityId] = algo.predictor.items_
     mat: torch.Tensor = algo.predictor.sim_matrix_.to_dense()
@@ -383,9 +381,9 @@ def test_ii_implicit_large(rng, ml_ratings):
         recs = algo.recommend(user, NRECS)
         _log.info("user %s recs\n%s", user, recs)
         assert len(recs) == NRECS
-        urates = ml_ratings[ml_ratings["userId"] == user]
+        urates = ml_ratings[ml_ratings["user"] == user]
 
-        smat = mat[torch.from_numpy(items.numbers(urates["movieId"].values)), :]
+        smat = mat[torch.from_numpy(items.numbers(urates["item"].values)), :]
         for row in recs.itertuples():
             col = smat[:, items.number(row.item)]
             top, _is = torch.topk(col, NBRS)
@@ -407,7 +405,7 @@ def test_ii_save_load(tmp_path, ml_ratings, ml_subset):
     "Save and load a model"
     original = knn.ItemItem(30, save_nbrs=500)
     _log.info("building model")
-    original.fit(from_interactions_df(ml_subset, item_col="movieId"))
+    original.fit(from_interactions_df(ml_subset, item_col="item"))
 
     fn = tmp_path / "ii.mod"
     _log.info("saving model to %s", fn)
@@ -435,7 +433,7 @@ def test_ii_save_load(tmp_path, ml_ratings, ml_subset):
     o_mat = original.sim_matrix_
     assert all(r_mat.crow_indices() == o_mat.crow_indices())
 
-    means = ml_ratings.groupby("movieId").rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.items_.ids()].values == approx(original.item_means_)
 
 
@@ -444,7 +442,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
     "Save and load a model"
     original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate="sum")
     _log.info("building model")
-    original.fit(from_interactions_df(ml_subset.loc[:, ["userId", "movieId"]], item_col="movieId"))
+    original.fit(from_interactions_df(ml_subset.loc[:, ["user", "item"]], item_col="item"))
 
     fn = tmp_path / "ii.mod"
     _log.info("saving model to %s", fn)
@@ -475,9 +473,9 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
 @mark.slow
 def test_ii_old_implicit(ml_ratings):
     algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
-    data = ml_ratings.loc[:, ["userId", "movieId"]]
+    data = ml_ratings.loc[:, ["user", "item"]]
 
-    algo.fit(from_interactions_df(data, item_col="movieId"))
+    algo.fit(from_interactions_df(data, item_col="item"))
     assert algo.item_counts_.sum() == algo.sim_matrix_.values().shape[0]
     assert all(algo.sim_matrix_.values() > 0)
     assert all(algo.item_counts_ <= 100)
@@ -490,7 +488,7 @@ def test_ii_old_implicit(ml_ratings):
 @mark.slow
 def test_ii_no_ratings(ml_ratings, ml_ds):
     a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
-    a1.fit(from_interactions_df(ml_ratings.loc[:, ["userId", "movieId"]], item_col="movieId"))
+    a1.fit(from_interactions_df(ml_ratings.loc[:, ["user", "item"]], item_col="item"))
 
     algo = knn.ItemItem(20, save_nbrs=100, feedback="implicit")
 
@@ -508,15 +506,12 @@ def test_ii_no_ratings(ml_ratings, ml_ds):
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_ii_batch_accuracy():
+def test_ii_batch_accuracy(ml_100k):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
     from lenskit import batch
     from lenskit.algorithms import basic, bias
 
-    ratings = lktu.ml100k.ratings
-
     ii_algo = knn.ItemItem(30)
     algo = basic.Fallback(ii_algo, bias.Bias())
 
@@ -527,7 +522,7 @@ def eval(train, test):
         return batch.predict(algo, test, n_jobs=1)
 
     preds = pd.concat(
-        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+        (eval(train, test) for (train, test) in xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2)))
     )
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.70, abs=0.025)
@@ -583,14 +578,11 @@ def test_ii_known_preds(ml_ds):
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K not available")
 @mark.parametrize("ncpus", [1, 2])
-def test_ii_batch_recommend(ncpus):
+def test_ii_batch_recommend(ml_100k, ncpus):
     import lenskit.crossfold as xf
     from lenskit import topn
 
-    ratings = lktu.ml100k.ratings
-
     def eval(train, test):
         _log.info("running training")
         algo = knn.ItemItem(30)
@@ -602,7 +594,7 @@ def eval(train, test):
 
     test_frames = []
     recs = []
-    for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)):
+    for train, test in xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2)):
         test_frames.append(test)
         recs.append(eval(train, test))
 
diff --git a/lenskit/tests/test_knn_user_user.py b/lenskit/tests/test_knn_user_user.py
index 5c6db519b..6ce8932de 100644
--- a/lenskit/tests/test_knn_user_user.py
+++ b/lenskit/tests/test_knn_user_user.py
@@ -15,7 +15,8 @@
 from pytest import approx, fail, mark
 
 import lenskit.algorithms.knn.user as knn
-from lenskit.data.dataset import from_interactions_df
+from lenskit.algorithms.ranking import TopN
+from lenskit.data.dataset import Dataset, from_interactions_df
 import lenskit.util.test as lktu
 from lenskit.algorithms import Recommender
 from lenskit.util import clone
@@ -69,20 +70,20 @@ def test_uu_train(ml_ratings, ml_ds):
     # it should have computed correct means
     u_stats = ml_ds.user_stats()
     mlmeans = pd.Series(algo.user_means_.numpy(), index=algo.users_.ids(), name="mean")
-    mlmeans.index.name = "userId"
+    mlmeans.index.name = "user"
     umeans, mlmeans = u_stats["mean_rating"].align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(["userId", "movieId"]).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     rates = algo.user_ratings_.to_sparse_coo()
     ui_rbdf = pd.DataFrame(
         {
-            "userId": algo.users_.ids(rates.indices()[0]),
-            "movieId": algo.items_.ids(rates.indices()[1]),
+            "user": algo.users_.ids(rates.indices()[0]),
+            "item": algo.items_.ids(rates.indices()[1]),
             "nrating": rates.values(),
         }
-    ).set_index(["userId", "movieId"])
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
     ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
     ui_rbdf["orig_rating"] = uir
@@ -96,6 +97,7 @@ def test_uu_train_adapt(ml_ds):
     uu = knn.UserUser(30)
     uu = Recommender.adapt(uu)
     ret = uu.fit(ml_ds)
+    assert isinstance(uu, TopN)
     assert ret is uu
     assert isinstance(uu.predictor, knn.UserUser)
 
@@ -132,10 +134,10 @@ def test_uu_predict_too_few_blended(ml_ds):
 
 def test_uu_predict_live_ratings(ml_ratings):
     algo = knn.UserUser(30, min_nbrs=2)
-    no4 = ml_ratings[ml_ratings.userId != 4]
-    algo.fit(from_interactions_df(no4, item_col="movieId"))
+    no4 = ml_ratings[ml_ratings.user != 4]
+    algo.fit(from_interactions_df(no4, item_col="item"))
 
-    ratings = ml_ratings[ml_ratings.userId == 4].set_index("movieId").rating
+    ratings = ml_ratings[ml_ratings.user == 4].set_index("item").rating
 
     preds = algo.predict_for_user(20381, [1016, 2091], ratings)
     assert len(preds) == 2
@@ -162,20 +164,20 @@ def test_uu_save_load(tmp_path, ml_ratings, ml_ds):
     # it should have computed correct means
     umeans = ml_ds.user_stats()["mean_rating"]
     mlmeans = pd.Series(algo.user_means_, index=algo.users_, name="mean")
-    mlmeans.index.name = "userId"
+    mlmeans.index.name = "user"
     umeans, mlmeans = umeans.align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(["userId", "movieId"]).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     rates = algo.user_ratings_.to_sparse_coo()
     ui_rbdf = pd.DataFrame(
         {
-            "userId": algo.users_.ids(rates.indices()[0]),
-            "movieId": algo.items_.ids(rates.indices()[1]),
+            "user": algo.users_.ids(rates.indices()[0]),
+            "item": algo.items_.ids(rates.indices()[1]),
             "nrating": rates.values(),
         }
-    ).set_index(["userId", "movieId"])
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
     ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
     ui_rbdf["orig_rating"] = uir
@@ -200,9 +202,9 @@ def test_uu_predict_unknown_empty(ml_ds):
 def test_uu_implicit(ml_ratings):
     "Train and use user-user on an implicit data set."
     algo = knn.UserUser(20, feedback="implicit")
-    data = ml_ratings.loc[:, ["userId", "movieId"]]
+    data = ml_ratings.loc[:, ["user", "item"]]
 
-    algo.fit(from_interactions_df(data, item_col="movieId"))
+    algo.fit(from_interactions_df(data, item_col="item"))
     assert algo.user_means_ is None
 
     mat = algo.user_vectors_
@@ -218,9 +220,9 @@ def test_uu_implicit(ml_ratings):
 def test_uu_save_load_implicit(tmp_path, ml_ratings):
     "Save and load user-user on an implicit data set."
     orig = knn.UserUser(20, feedback="implicit")
-    data = ml_ratings.loc[:, ["userId", "movieId"]]
+    data = ml_ratings.loc[:, ["user", "item"]]
 
-    orig.fit(from_interactions_df(data, item_col="movieId"))
+    orig.fit(from_interactions_df(data, item_col="item"))
     ser = pickle.dumps(orig)
 
     algo = pickle.loads(ser)
@@ -231,12 +233,12 @@ def test_uu_save_load_implicit(tmp_path, ml_ratings):
 
 
 @mark.slow
-def test_uu_known_preds():
+def test_uu_known_preds(ml_ds: Dataset):
     from lenskit import batch
 
     algo = knn.UserUser(30, min_sim=1.0e-6)
     _log.info("training %s on ml data", algo)
-    algo.fit(from_interactions_df(lktu.ml_test.ratings))
+    algo.fit(ml_ds)
 
     dir = Path(__file__).parent
     pred_file = dir / "user-user-preds.csv"
@@ -275,18 +277,15 @@ def __batch_eval(job):
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_uu_batch_accuracy():
+def test_uu_batch_accuracy(ml_100k: pd.DataFrame):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
     from lenskit.algorithms import basic, bias
 
-    ratings = lktu.ml100k.ratings
-
     uu_algo = knn.UserUser(30)
     algo = basic.Fallback(uu_algo, bias.Bias())
 
-    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
+    folds = xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2))
     preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
     preds = pd.concat(preds)
     mae = pm.mae(preds.prediction, preds.rating)
@@ -298,16 +297,13 @@ def test_uu_batch_accuracy():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_uu_implicit_batch_accuracy():
+def test_uu_implicit_batch_accuracy(ml_100k: pd.DataFrame):
     import lenskit.crossfold as xf
     from lenskit import batch, topn
 
-    ratings = lktu.ml100k.ratings
-
     algo = knn.UserUser(30, center=False, aggregate="sum")
 
-    folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    folds = list(xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2)))
     all_test = pd.concat(f.test for f in folds)
 
     rec_lists = []
diff --git a/lenskit/tests/test_ml20m.py b/lenskit/tests/test_ml20m.py
index 4ed03d1cd..e0703f42a 100644
--- a/lenskit/tests/test_ml20m.py
+++ b/lenskit/tests/test_ml20m.py
@@ -17,21 +17,17 @@
 from lenskit.algorithms import Recommender
 from lenskit.algorithms.basic import PopScore
 from lenskit.data.dataset import Dataset, from_interactions_df
-from lenskit.datasets import MovieLens
+from lenskit.data.movielens import load_movielens
 
 _log = logging.getLogger(__name__)
 
 _ml_path = Path("data/ml-20m")
-if _ml_path.exists():
-    _ml_20m = MovieLens(_ml_path)
-else:
-    _ml_20m = None
 
 
-@pytest.fixture
+@pytest.fixture(scope="module")
 def ml20m():
-    if _ml_20m:
-        return from_interactions_df(_ml_20m.ratings)
+    if _ml_path.exists():
+        return load_movielens(_ml_path)
     else:
         pytest.skip("ML-20M not available")
 
diff --git a/lenskit/tests/test_predict_metrics.py b/lenskit/tests/test_predict_metrics.py
index 805e136b5..18ad019d9 100644
--- a/lenskit/tests/test_predict_metrics.py
+++ b/lenskit/tests/test_predict_metrics.py
@@ -166,13 +166,11 @@ def test_mae_series_two():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_batch_rmse():
+def test_batch_rmse(ml_100k):
     import lenskit.algorithms.bias as bs
     import lenskit.batch as batch
     import lenskit.crossfold as xf
 
-    ratings = lktu.ml100k.ratings
     algo = bs.Bias(damping=5)
 
     def eval(train, test):
@@ -181,13 +179,13 @@ def eval(train, test):
         return preds.set_index(["user", "item"])
 
     results = pd.concat(
-        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5)))
+        (eval(train, test) for (train, test) in xf.partition_users(ml_100k, 5, xf.SampleN(5)))
     )
 
     user_rmse = results.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
 
     # we should have all users
-    users = ratings.user.unique()
+    users = ml_100k.user.unique()
     assert len(user_rmse) == len(users)
     missing = np.setdiff1d(users, user_rmse.index)
     assert len(missing) == 0
@@ -200,12 +198,12 @@ def eval(train, test):
 
 
 @mark.slow
-def test_global_metric():
+def test_global_metric(ml_ratings: pd.DataFrame):
     import lenskit.batch as batch
     import lenskit.crossfold as xf
     from lenskit.algorithms.bias import Bias
 
-    train, test = next(xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
+    train, test = next(xf.sample_users(ml_ratings, 1, 200, xf.SampleFrac(0.5)))
     algo = Bias()
     algo.fit(from_interactions_df(train))
 
@@ -219,12 +217,12 @@ def test_global_metric():
 
 
 @mark.slow
-def test_user_metric():
+def test_user_metric(ml_ratings: pd.DataFrame):
     import lenskit.batch as batch
     import lenskit.crossfold as xf
     from lenskit.algorithms.bias import Bias
 
-    train, test = next(xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
+    train, test = next(xf.sample_users(ml_ratings, 1, 200, xf.SampleFrac(0.5)))
     algo = Bias()
     algo.fit(from_interactions_df(train))
 
diff --git a/lenskit/tests/test_svd.py b/lenskit/tests/test_svd.py
index 9e69147ba..b183565f5 100644
--- a/lenskit/tests/test_svd.py
+++ b/lenskit/tests/test_svd.py
@@ -12,7 +12,7 @@
 
 from pytest import approx, mark
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
 import lenskit.util.test as lktu
 from lenskit.algorithms import svd
 from lenskit.util import clone
@@ -86,11 +86,9 @@ def test_svd_clone():
 
 @need_skl
 @mark.slow
-def test_svd_save_load():
-    ratings = lktu.ml_test.ratings
-
+def test_svd_save_load(ml_ds: Dataset):
     original = svd.BiasedSVD(20)
-    original.fit(from_interactions_df(ratings))
+    original.fit(ml_ds)
 
     mod = pickle.dumps(original)
     _log.info("serialized to %d bytes", len(mod))
@@ -105,15 +103,12 @@ def test_svd_save_load():
 @need_skl
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
-def test_svd_batch_accuracy():
+def test_svd_batch_accuracy(ml_100k: pd.DataFrame):
     import lenskit.crossfold as xf
     import lenskit.metrics.predict as pm
     from lenskit import batch
     from lenskit.algorithms import basic, bias
 
-    ratings = lktu.ml100k.ratings
-
     svd_algo = svd.BiasedSVD(25, damping=10)
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
@@ -123,7 +118,7 @@ def eval(train, test):
         _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
-    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
+    folds = xf.partition_users(ml_100k, 5, xf.SampleFrac(0.2))
     preds = pd.concat(eval(train, test) for (train, test) in folds)
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
diff --git a/lenskit/tests/test_topn_analysis.py b/lenskit/tests/test_topn_analysis.py
index ee951a8bf..459a6320c 100644
--- a/lenskit/tests/test_topn_analysis.py
+++ b/lenskit/tests/test_topn_analysis.py
@@ -19,7 +19,7 @@
 from lenskit.algorithms.knn.user import UserUser
 from lenskit.data.dataset import from_interactions_df
 from lenskit.metrics.topn import _dcg, precision, recall
-from lenskit.util.test import demo_recs, ml_test  # noqa: F401
+from lenskit.util.test import demo_recs  # noqa: F401
 
 _log = logging.getLogger(__name__)
 
@@ -205,93 +205,6 @@ def test_java_equiv():
     assert umm["err"].values == approx(0, abs=1.0e-6)
 
 
-@mark.skip("disabled for user-user")
-@mark.slow
-def test_fill_users():
-    rla = topn.RecListAnalysis()
-    rla.add_metric(topn.precision)
-    rla.add_metric(topn.recall)
-
-    algo = UserUser(20, min_nbrs=10)
-    algo = Recommender.adapt(algo)
-
-    splits = xf.sample_users(ml_test.ratings, 1, 50, xf.SampleN(5))
-    train, test = next(splits)
-    algo.fit(from_interactions_df(train))
-
-    rec_users = test["user"].sample(50).unique()
-    assert len(rec_users) < 50
-    recs = batch.recommend(algo, rec_users, 25)
-
-    scores = rla.compute(recs, test, include_missing=True)
-    assert len(scores) == test["user"].nunique()
-    assert scores["recall"].notna().sum() == len(rec_users)
-    assert all(scores["ntruth"] == 5)
-
-    mscores = rla.compute(recs, test)
-    assert len(mscores) < len(scores)
-
-    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
-    recall, mrecall = recall.align(mscores["recall"])
-    assert all(recall == mrecall)
-
-
-@mark.skip("disabled for user-user")
-@mark.slow
-def test_adv_fill_users():
-    rla = topn.RecListAnalysis()
-    rla.add_metric(topn.precision)
-    rla.add_metric(topn.recall)
-
-    a_uu = UserUser(30, min_nbrs=10)
-    a_uu = Recommender.adapt(a_uu)
-    a_ii = ItemItem(20, min_nbrs=4)
-    a_ii = Recommender.adapt(a_ii)
-
-    splits = xf.sample_users(ml_test.ratings, 2, 50, xf.SampleN(5))
-    all_recs = {}
-    all_test = {}
-    for i, (train, test) in enumerate(splits):
-        a_uu.fit(from_interactions_df(train))
-        rec_users = test["user"].sample(50).unique()
-        all_recs[(i + 1, "UU")] = batch.recommend(a_uu, rec_users, 25)
-
-        a_ii.fit(from_interactions_df(train))
-        rec_users = test["user"].sample(50).unique()
-        all_recs[(i + 1, "II")] = batch.recommend(a_ii, rec_users, 25)
-        all_test[i + 1] = test
-
-    recs = pd.concat(all_recs, names=["part", "algo"])
-    recs.reset_index(["part", "algo"], inplace=True)
-    recs.reset_index(drop=True, inplace=True)
-
-    test = pd.concat(all_test, names=["part"])
-    test.reset_index(["part"], inplace=True)
-    test.reset_index(drop=True, inplace=True)
-
-    scores = rla.compute(recs, test, include_missing=True)
-    inames = scores.index.names
-    scores.sort_index(inplace=True)
-    assert len(scores) == 50 * 4
-    assert all(scores["ntruth"] == 5)
-    assert scores["recall"].isna().sum() > 0
-    _log.info("scores:\n%s", scores)
-
-    ucounts = scores.reset_index().groupby("algo")["user"].agg(["count", "nunique"])
-    assert all(ucounts["count"] == 100)
-    assert all(ucounts["nunique"] == 100)
-
-    mscores = rla.compute(recs, test)
-    mscores = mscores.reset_index().set_index(inames)
-    mscores.sort_index(inplace=True)
-    assert len(mscores) < len(scores)
-    _log.info("mscores:\n%s", mscores)
-
-    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
-    recall, mrecall = recall.align(mscores["recall"])
-    assert all(recall == mrecall)
-
-
 @mark.parametrize("drop_rating", [False, True])
 def test_pr_bulk_match(demo_recs, drop_rating):
     "bulk and normal match"
diff --git a/lenskit/tests/test_topn_recs.py b/lenskit/tests/test_topn_recs.py
index bbb922d9b..1e00b0d61 100644
--- a/lenskit/tests/test_topn_recs.py
+++ b/lenskit/tests/test_topn_recs.py
@@ -9,7 +9,7 @@
 
 from pytest import approx
 
-from lenskit.data.dataset import from_interactions_df
+from lenskit.data.dataset import Dataset, from_interactions_df
 import lenskit.util.test as lktu
 from lenskit.algorithms import basic, bias
 
@@ -47,14 +47,13 @@ def test_topn_config():
     assert rs.startswith("TopN/")
 
 
-def test_topn_big():
-    ratings = lktu.ml_test.ratings
-    users = ratings.user.unique()
-    items = ratings.item.unique()
-    user_items = ratings.set_index("user").item
+def test_topn_big(ml_ds: Dataset):
+    users = ml_ds.users.ids()
+    items = ml_ds.items.ids()
+    user_items = ml_ds.interaction_matrix("pandas", original_ids=True).set_index("user_id").item_id
 
     algo = basic.TopN(bias.Bias())
-    a2 = algo.fit(from_interactions_df(ratings))
+    a2 = algo.fit(ml_ds)
     assert a2 is algo
 
     # test 100 random users
diff --git a/lkdev/workflows/test.py b/lkdev/workflows/test.py
index 78ab152c7..bd5cc3f1b 100644
--- a/lkdev/workflows/test.py
+++ b/lkdev/workflows/test.py
@@ -211,7 +211,7 @@ def steps_mldata(options: JobOptions, datasets: list[str]) -> list[GHStep]:
                         data
                         !data/*.zip
                     """),
-                "key": f"test-mldata-000-{ds_hash}",
+                "key": f"test-mldata-001-{ds_hash}",
             },
         },
         {
diff --git a/utils/dump-iknn.py b/utils/dump-iknn.py
index 3b5f22213..0435b94a5 100644
--- a/utils/dump-iknn.py
+++ b/utils/dump-iknn.py
@@ -26,7 +26,7 @@
 from docopt import docopt
 
 from lenskit.algorithms.knn.item import ItemItem
-from lenskit.datasets import MovieLens
+from lenskit.data import load_movielens
 
 _log = logging.getLogger("dump-iknn")
 
@@ -35,7 +35,7 @@ def main(args):
     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
     data = args["--dataset"]
     _log.info("loading data %s", data)
-    ml = MovieLens(f"data/{data}")
+    ml = load_movielens(f"data/{data}")
 
     ii_args = {}
     if args["-n"]:
@@ -47,11 +47,11 @@ def main(args):
 
     algo = ItemItem(20, **ii_args)
     _log.info("training algorithm")
-    algo.fit(ml.ratings)
+    algo.fit(ml)
 
     i_outf = args["--item-output"]
     _log.info("saving items to %s", i_outf)
-    items = algo.item_index_
+    items = ml.items.ids()
     stats = pd.DataFrame(
         {"mean": algo.item_means_.numpy(), "nnbrs": algo.item_counts_.numpy()}, index=items
     )
diff --git a/utils/recommend.py b/utils/recommend.py
index 0bdb17c4f..abec6c7e5 100755
--- a/utils/recommend.py
+++ b/utils/recommend.py
@@ -33,7 +33,7 @@
 from docopt import docopt
 
 from lenskit import batch
-from lenskit.datasets import MovieLens
+from lenskit.data import load_movielens
 
 _log = logging.getLogger("test-algo")
 
@@ -44,7 +44,7 @@ def main(args):
 
     data = args["--dataset"]
     _log.info("loading data %s", data)
-    ml = MovieLens(f"data/{data}")
+    ml = load_movielens(f"data/{data}")
 
     _log.info("reading model from %s", args["MODEL"])
     with open(args["MODEL"], "rb") as f:
@@ -55,7 +55,7 @@ def main(args):
     if args["--random-users"]:
         n = int(args["--random-users"])
         _log.info("selecting %d random users", n)
-        users = rng.choice(ml.ratings["user"].unique(), n)
+        users = rng.choice(ml.users.ids(), n)
     else:
         _log.info("using %d specified users", len(args["USER"]))
         users = [int(u) for u in args["USER"]]
diff --git a/utils/train-model.py b/utils/train-model.py
index 8b7cfe06d..1b2e4d49b 100755
--- a/utils/train-model.py
+++ b/utils/train-model.py
@@ -26,7 +26,7 @@
 
 from lenskit.algorithms import Recommender
 from lenskit.algorithms.knn.item import ItemItem
-from lenskit.datasets import MovieLens
+from lenskit.data import load_movielens
 
 _log = logging.getLogger("train-model")
 
@@ -35,7 +35,7 @@ def main(args):
     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
     data = args["--dataset"]
     _log.info("loading data %s", data)
-    ml = MovieLens(f"data/{data}")
+    ml = load_movielens(f"data/{data}")
 
     if args["--item-item"]:
         algo = ItemItem(20)
@@ -45,7 +45,7 @@ def main(args):
 
     algo = Recommender.adapt(algo)
     _log.info("training algorithm")
-    algo.fit(ml.ratings)
+    algo.fit(ml)
     _log.info("training complete")
 
     file = args["FILE"]