-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSample_Agglomeration.py
67 lines (57 loc) · 3.23 KB
/
Sample_Agglomeration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
########################################################################################################################
# This code illustrates how to use the function "agglomeration"
# Files you need:
# (1) Clustering_Agglomeration.py
# (2) sample.csv
# Outputs:
# (1) figure: average agglomeration of industries
# Reference: Lock Yue Chew, Ning Ning Chung, Wen Xuan Sia, Hoai Nguyen Huynh, Glenn Sim, Alvin Chua, and Zhongwen Huang.
# A Data Analytic Elucidation on the Spatial Agglomeration of Singapore Maritime Industry.
# International Journal on Smart and Sustainable Cities 1, 2340001 (2023).
########################################################################################################################
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from Clustering_Agglomeration import agglomeration, agglomeration_grid
############################################## parameters ##############################################################
dmin = 200 # minimum percolation distance
dmax1 = 2000 # maximum percolation distance for the first average
dmax2 = 5000 # maximum percolation distance for the second average
delta = 200 # interval for the percolation distance
m = 1.0 + (dmax1 - dmin) / float(delta)
a = np.arange(dmin,dmax2,delta) # a range for the percolation distance
######################################### load data ####################################################################
cols = ['x coordinate', 'y coordinate', 'category']
dtypes = {'x coordinate': float, 'y coordinate': float, 'category': int}
data = pd.read_csv('sample.csv', usecols=cols, dtype=dtypes, skiprows=0)
xc = data['x coordinate']
yc = data['y coordinate']
catec = data['category']
xc = xc.to_numpy()
yc = yc.to_numpy()
catec = catec.to_numpy()
Cate, countsCate = np.unique(catec, return_counts=True)
Agg = np.zeros(len(Cate), dtype=float) # average agglomeration from 200 to 2000
Agg2 = np.zeros(len(Cate), dtype=float) # average agglomeration from 200 to 5000
Agg_grid = np.zeros(len(Cate), dtype=float)
for d in a:
agg, cat, n_cat = agglomeration(xc,yc,catec,d)
Agg2 += agg / (len(a)+0.0)
if d <= dmax1:
Agg += agg/m
fig1 = plt.figure(num=1, figsize=(6,4.5))
ax = fig1.add_subplot(111)
order = np.argsort(Agg)
plt.xlabel('Category', fontsize=10)
plt.ylabel('Average agglomeration', fontsize=10)
plt.plot(np.arange(0,len(Agg)), Agg[order], color='C1', marker='s', markersize=6, linewidth=0, fillstyle='none', markeredgewidth=1.5, label='Maximum percolation distance: ' + str(dmax1) + 'm')
plt.plot(np.arange(0,len(Agg2)), Agg2[order], color='C4', marker='1', markersize=6, linewidth=0, fillstyle='none', markeredgewidth=1.5, label='Maximum percolation distance: ' + str(dmax2) + 'm')
plt.legend(fontsize=10)
ax.set_xticks(np.arange(0,len(Agg)))
ax.set_xticklabels(cat[order], fontsize=9)
plt.ylim([0,1])
plt.xlim([-1,len(Agg)])
plt.subplots_adjust(top=0.94, bottom=0.12, left=0.14, right=0.94)
ax.tick_params(axis='both', which='major', labelsize=9)
plt.savefig('Agglomeration.png', format='png', dpi=300)
plt.show()