From be14f0644245bacc697d12c1ce29d17edaa2bd84 Mon Sep 17 00:00:00 2001
From: rasbt <mail@sebastianraschka.com>
Date: Wed, 17 Jul 2024 07:38:19 -0500
Subject: [PATCH] explain extra padding token

---
 ch07/01_main-chapter-code/ch07.ipynb | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb
index 33ccf6b3..f57cc879 100644
--- a/ch07/01_main-chapter-code/ch07.ipynb
+++ b/ch07/01_main-chapter-code/ch07.ipynb
@@ -618,6 +618,8 @@
     "    device=\"cpu\"\n",
     "):\n",
     "    # Find the longest sequence in the batch\n",
+    "    # and increase the max length by +1, which will add one extra\n",
+    "    # padding token below\n",
     "    batch_max_length = max(len(item)+1 for item in batch)\n",
     "\n",
     "    # Pad and prepare inputs\n",
@@ -627,13 +629,14 @@
     "        new_item = item.copy()\n",
     "        # Add an <|endoftext|> token\n",
     "        new_item += [pad_token_id]\n",
-    "        # Pad sequences to max_length\n",
-    "        # this always adds at least 1 additional padding tokens\n",
+    "        # Pad sequences to batch_max_length\n",
     "        padded = (\n",
     "            new_item + [pad_token_id] * \n",
     "            (batch_max_length - len(new_item))\n",
     "        )\n",
-    "        # We remove this extra padded token again here\n",
+    "        # Via padded[:-1], we remove the extra padded token \n",
+    "        # that has been added via the +1 setting in batch_max_length\n",
+    "        # (the extra padding token will be relevant in later codes)\n",
     "        inputs = torch.tensor(padded[:-1])\n",
     "        inputs_lst.append(inputs)\n",
     "\n",