explain extra padding token

rasbt · Jul 17, 2024 · be14f06 · be14f06
1 parent 070a69f
commit be14f06
Showing 1 changed file with 6 additions and 3 deletions.
diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb
@@ -618,6 +618,8 @@
     "    device=\"cpu\"\n",
     "):\n",
     "    # Find the longest sequence in the batch\n",
+    "    # and increase the max length by +1, which will add one extra\n",
+    "    # padding token below\n",
     "    batch_max_length = max(len(item)+1 for item in batch)\n",
     "\n",
     "    # Pad and prepare inputs\n",
@@ -627,13 +629,14 @@
     "        new_item = item.copy()\n",
     "        # Add an <|endoftext|> token\n",
     "        new_item += [pad_token_id]\n",
-    "        # Pad sequences to max_length\n",
-    "        # this always adds at least 1 additional padding tokens\n",
+    "        # Pad sequences to batch_max_length\n",
     "        padded = (\n",
     "            new_item + [pad_token_id] * \n",
     "            (batch_max_length - len(new_item))\n",
     "        )\n",
-    "        # We remove this extra padded token again here\n",
+    "        # Via padded[:-1], we remove the extra padded token \n",
+    "        # that has been added via the +1 setting in batch_max_length\n",
+    "        # (the extra padding token will be relevant in later codes)\n",
     "        inputs = torch.tensor(padded[:-1])\n",
     "        inputs_lst.append(inputs)\n",
     "\n",