-
Notifications
You must be signed in to change notification settings - Fork 0
/
BLP_market_share.py
128 lines (104 loc) · 4.56 KB
/
BLP_market_share.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
"""
Created on Mon May 21 09:58:58 2018
@author: Shih-Yang Lin
This script calculate the market share.
"""
import numpy as np
from progress import progress as pg
import time
def calculate_utility(estimates, sigma, data, end_var, v):
'''
Calculate mean utility of each products base on the estimates and data.
input:
estimates: list, the estimates of BLP.
sigma: list, the estimated standard errors of random coefficients.
data: n x k numpy ndarray, the independent variables include constant
term.
end_var: list, the position of endogenous variables.
v: (len(sigma), ) numpy ndarray, consumer taste.
output:
uti: numpy array, the mean utility of each products.
'''
# Examine the number of variables
Worning_Message_0 = ('The length of estimates is ' + str(len(estimates)) +
', but the number of columns of data is ' +
str(data.shape[1]) + '.')
assert len(estimates) == data.shape[1], Worning_Message_0
Worning_Message_1 = ('The length of sigma is ' + str(len(sigma)) +
', but the number of endogenous variables is ' +
str(len(end_var)) + '.')
assert len(sigma) == len(end_var), Worning_Message_1
Worning_Message_2 = ('The length of sigma is ' + str(len(sigma)) +
', but the number of consumer taste variables is ' +
str(len(v)) + '.')
assert len(v) == len(sigma), Worning_Message_2
del Worning_Message_0, Worning_Message_1, Worning_Message_2
# Data preparation
estimates = np.array(estimates).reshape(len(estimates), 1)
sigma = np.array(sigma).reshape(len(sigma), 1)
endogenous_variables = data.iloc[:, end_var]
# Calculate utilities
uti = np.dot(data, estimates) + np.dot(endogenous_variables, sigma*v)
return(uti)
def calculate_one_consumer_market_share(uti, mkt):
'''
Calculate simulated market share of each products in each markets
base on their utilities.
input:
uti: n x 1 numpy array, the utilities of each products in each markets.
mkt: n x 1 numpy array, the market id of each products in each markets.
output:
mkt_shr: n x 1 numpy array, the market share of each products in each
markets.
'''
# Generate new market id
mkt_list = np.unique(mkt)
new_mkt = np.zeros((len(mkt), ))
new_mkt_id = 0
for mkt_id in mkt_list:
new_mkt[mkt == mkt_id] = new_mkt_id
new_mkt_id += 1
# Calculate the denominators of market shares in each markets
denominator = list()
for i in range(len(mkt_list)):
denominator.append(1 + np.sum(np.exp(uti[new_mkt == i])))
assert len(denominator) == len(mkt_list)
denominator = np.array(denominator)
denominator = denominator[new_mkt.astype(int)]
# Calculate the market share
mkt_shr = np.exp(uti.reshape(len(uti), ))/denominator
return(mkt_shr)
def calculate_n_consumer_market_share(estimates, sigma, data, end_var, mkt,
n_consumers,
work = 'Calculating the market share.',
congrats = 'Finished.', seed = 0):
'''
Calculate mean utility of each products base on the estimates and data.
input:
estimates: list, the estimates of BLP.
sigma: list, the estimated standard errors of random coefficients.
data: n x k numpy ndarray, the independent variables include constant
term.
end_var: list, the position of endogenous variables.
mkt: n x 1 numpy array, the market id of each products in each markets.
n_consumers: int, number of simulated consumers in each market.
output:
mkt_shr: numpy array, the average market share of n consumers.
'''
tic = time.time()
if seed == 0:
seed = np.random.randint(1, 2**32 - 2)
# v_set: n_consumers x len(sigma) numpy ndarray, a set of consumer tastes.
v_set = np.random.RandomState(seed = seed).normal(size = (n_consumers, len(sigma)))
mkt_shr = np.zeros((len(data), ))
for i in range(n_consumers):
uti = calculate_utility(estimates, sigma, data, end_var, v_set[i])
mkt_shr += calculate_one_consumer_market_share(uti, mkt)
del uti
pg(i + 1, n_consumers, work, congrats)
mkt_shr = mkt_shr/n_consumers
toc = time.time()
print('')
print('Total time elaped is ' + str(round((toc - tic)/60, 2)) + ' mins.')
return(mkt_shr)