Skip to content

Commit

Permalink
More pythonic way to find the longest sequence (#512)
Browse files Browse the repository at this point in the history
* More pythonic way to find the longest sequence

* pep8 fix
  • Loading branch information
rasbt authored Feb 1, 2025
1 parent 0e14c76 commit 8cfa52b
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 1 deletion.
3 changes: 3 additions & 0 deletions appendix-E/01_main-chapter-code/previous_chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,9 @@ def _longest_encoded_length(self):
if encoded_length > max_length:
max_length = encoded_length
return max_length
# Note: A more pythonic version to implement this method
# is the following, which is also used in the next chapter:
# return max(len(encoded_text) for encoded_text in self.encoded_texts)


@torch.no_grad() # Disable gradient tracking for efficiency
Expand Down
5 changes: 4 additions & 1 deletion ch06/01_main-chapter-code/ch06.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,10 @@
" encoded_length = len(encoded_text)\n",
" if encoded_length > max_length:\n",
" max_length = encoded_length\n",
" return max_length"
" return max_length\n",
" # Note: A more pythonic version to implement this method\n",
" # is the following, which is also used in the next chapter:\n",
" # return max(len(encoded_text) for encoded_text in self.encoded_texts)"
]
},
{
Expand Down
3 changes: 3 additions & 0 deletions ch06/01_main-chapter-code/gpt_class_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def _longest_encoded_length(self):
if encoded_length > max_length:
max_length = encoded_length
return max_length
# Note: A more pythonic version to implement this method
# is the following, which is also used in the next chapter:
# return max(len(encoded_text) for encoded_text in self.encoded_texts)


def calc_accuracy_loader(data_loader, model, device, num_batches=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ def _longest_encoded_length(self, tokenizer):
if encoded_length > max_length:
max_length = encoded_length
return max_length
# Note: A more pythonic version to implement this method
# is the following, which is also used in the next chapter:
# return max(len(encoded_text) for encoded_text in self.encoded_texts)


def download_and_unzip(url, zip_path, extract_to, new_file_path):
Expand Down
3 changes: 3 additions & 0 deletions ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def _longest_encoded_length(self, tokenizer):
if encoded_length > max_length:
max_length = encoded_length
return max_length
# Note: A more pythonic version to implement this method
# is the following, which is also used in the next chapter:
# return max(len(encoded_text) for encoded_text in self.encoded_texts)


def download_and_unzip(url, zip_path, extract_to, new_file_path):
Expand Down

0 comments on commit 8cfa52b

Please sign in to comment.