From 267419d39b089433a77b95668f042011d5a7e9ae Mon Sep 17 00:00:00 2001 From: rasbt Date: Sun, 21 Apr 2024 13:49:06 -0500 Subject: [PATCH] remove requests dependency --- ch05/01_main-chapter-code/gpt_download.py | 38 ++++++++++++++++++++++- ch05/01_main-chapter-code/gpt_generate.py | 36 ++++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/ch05/01_main-chapter-code/gpt_download.py b/ch05/01_main-chapter-code/gpt_download.py index 786620c4..91a49d48 100644 --- a/ch05/01_main-chapter-code/gpt_download.py +++ b/ch05/01_main-chapter-code/gpt_download.py @@ -1,5 +1,9 @@ + + import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -36,6 +40,7 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -62,6 +67,37 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" + + +def download_file(url, destination): + # Send a GET request to download the file + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch05/01_main-chapter-code/gpt_generate.py b/ch05/01_main-chapter-code/gpt_generate.py index c8f54aa9..d7abaf06 100644 --- a/ch05/01_main-chapter-code/gpt_generate.py +++ b/ch05/01_main-chapter-code/gpt_generate.py @@ -6,7 +6,9 @@ import json import numpy as np import os -import requests +import urllib.request + +# import requests import tensorflow as tf import tiktoken import torch @@ -57,6 +59,7 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -83,6 +86,37 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" + + +def download_file(url, destination): + # Send a GET request to download the file + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar def load_gpt2_params_from_tf_ckpt(ckpt_path, settings):