Skip to content

Commit

Permalink
Refactored Snowflake data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
IGuy37 committed Jul 11, 2024
1 parent 19e5d50 commit aadc0a0
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions snowflake/generate_data_for_snowflake.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@


import pandas as pd
import numpy as np
from datetime import datetime

# Generate random data
num_socks = 1_000_000
data = {
'Date': [datetime.now().strftime('%Y-%m-%d') for _ in range(1000000)],
'Size': np.random.choice(['Small', 'Medium', 'Large'], size=1000000),
'Color': np.random.choice(['Red', 'Blue', 'Green', 'Yellow', 'Black'], size=1000000),
'Pattern': np.random.choice(['Striped', 'Dotted', 'Solid'], size=1000000),
'Material': np.random.choice(['Cotton', 'Wool', 'Polyester'], size=1000000),
'Condition': np.random.choice(['Excellent', 'Good', 'Fair'], size=1000000),
'ForFoot': np.random.choice(['Left', 'Right', 'Both'], size=1000000),
'Price': np.random.uniform(5, 50, size=1000000).round(2),
'Comments': ['Sample comment' for _ in range(1000000)]
'Date': [datetime.now().strftime('%Y-%m-%d') for _ in range(num_socks)],
'Size': np.random.choice(['Small', 'Medium', 'Large'], size=num_socks),
'Color': np.random.choice(['Red', 'Blue', 'Green', 'Yellow', 'Black'], size=num_socks),
'Pattern': np.random.choice(['Striped', 'Dotted', 'Solid'], size=num_socks),
'Material': np.random.choice(['Cotton', 'Wool', 'Polyester'], size=num_socks),
'Condition': np.random.choice(['Excellent', 'Good', 'Fair'], size=num_socks),
'ForFoot': np.random.choice(['Left', 'Right', 'Both'], size=num_socks),
'Price': np.random.uniform(5, 50, size=num_socks).round(2),
'Comments': ['Sample comment' for _ in range(num_socks)]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Add Sock_ID attribute
df['Sock_ID'] = np.arange(2, 1000002) # Incrementing number starting at 2
start_id = 2
df['Sock_ID'] = np.arange(start_id, num_socks + start_id) # Incrementing number starting at 2

# Save DataFrame to Parquet file
parquet_file = 'sock_exchange_data.parquet'
Expand Down

0 comments on commit aadc0a0

Please sign in to comment.