-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
123 lines (103 loc) · 4.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
def load_model(model_name):
"""Load model with half precision to save memory"""
try:
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
trust_remote_code=True
)
return model
except Exception as e:
print(f"Error loading model {model_name}: {e}")
return None
def analyze_weight_distribution(model, model_name):
"""Analyze weight distribution across model layers"""
stats = defaultdict(dict)
layer_weights = defaultdict(list)
# Iterate through named parameters
for name, param in model.named_parameters():
if param.requires_grad: # Only analyze trainable parameters
# Convert weights to numpy for analysis
weights = param.detach().cpu().numpy().flatten()
# Categorize layers
if 'layer' in name:
layer_num = name.split('layer')[1].split('.')[0]
layer_type = 'attention' if 'attention' in name else 'ffn' if 'mlp' in name else 'other'
key = f"layer_{layer_num}_{layer_type}"
else:
key = 'other_params'
# Calculate statistics
stats[key]['mean'] = float(np.mean(weights))
stats[key]['std'] = float(np.std(weights))
stats[key]['min'] = float(np.min(weights))
stats[key]['max'] = float(np.max(weights))
stats[key]['sparsity'] = float(np.sum(np.abs(weights) < 1e-6) / len(weights))
layer_weights[key] = weights
# Plotting
plt.figure(figsize=(15, 10))
# Plot 1: Weight Distribution Violin Plot
plt.subplot(2, 1, 1)
data = []
labels = []
for key in sorted(layer_weights.keys()):
if 'layer' in key: # Only plot main layers
data.append(layer_weights[key])
labels.append(key)
violin_parts = plt.violinplot(data, points=100, vert=False)
plt.yticks(range(1, len(labels) + 1), labels)
plt.xlabel('Weight Values')
plt.title(f'Weight Distribution Across Layers - {model_name}')
# Plot 2: Statistics Summary
plt.subplot(2, 1, 2)
layer_numbers = []
means = []
stds = []
sparsities = []
for key in sorted(stats.keys()):
if 'layer' in key:
layer_numbers.append(key)
means.append(stats[key]['mean'])
stds.append(stats[key]['std'])
sparsities.append(stats[key]['sparsity'] * 100) # Convert to percentage
x = range(len(layer_numbers))
plt.plot(x, means, 'b-', label='Mean')
plt.plot(x, stds, 'r-', label='Std Dev')
plt.plot(x, sparsities, 'g-', label='Sparsity %')
plt.xticks(x, layer_numbers, rotation=45)
plt.legend()
plt.title('Layer Statistics Summary')
plt.tight_layout()
plt.savefig(f'{model_name}_weight_analysis.png')
plt.close()
return stats
def main():
# Models to analyze
models = [
"meta-llama/Llama-2-7b-hf", # Llama 2 7B model
"Qwen/Qwen-7B" # Qwen 7B model
]
for model_name in models:
print(f"\nAnalyzing {model_name}...")
model = load_model(model_name)
if model is not None:
stats = analyze_weight_distribution(model, model_name.split('/')[-1])
# Print summary statistics
print(f"\nSummary Statistics for {model_name}:")
print("=" * 50)
for layer, layer_stats in stats.items():
if 'layer' in layer: # Only print main layers
print(f"\n{layer}:")
print(f"Mean: {layer_stats['mean']:.6f}")
print(f"Std Dev: {layer_stats['std']:.6f}")
print(f"Sparsity: {layer_stats['sparsity']*100:.2f}%")
# Clear model from memory
del model
torch.cuda.empty_cache()
if __name__ == "__main__":
main()