-
Notifications
You must be signed in to change notification settings - Fork 0
/
exec_dataset_creation.sh
executable file
·118 lines (85 loc) · 2.48 KB
/
exec_dataset_creation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
#####################
#SBATCH --job-name=dicoh-dataset-creation
#SBATCH --output=/ukp-storage-1/mesgar/DiCoh/_dataset_creation.output
#SBATCH --mail-user=mesgar@ukp.informatik.tu-darmstadt.de
#SBATCH --mail-type=ALL
#SBATCH --partition=ukp
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --mem=16GB
#####################
DiCoh=/ukp-storage-1/mesgar/DiCoh
DataDailyDialog=$DiCoh/data/daily_dialog
DataSwitchBoard=$DiCoh/data/switchboard
################
source /ukp-storage-1/mesgar/anaconda3/bin/activate dicoh
################
if [[ -d $DataDailyDialog ]];
then
rm -r $DataDailyDialog
mkdir $DataDailyDialog
fi
cd $DataDailyDialog
wget http://yanran.li/files/ijcnlp_dailydialog.zip
unzip -qq ijcnlp_dailydialog.zip
unzip -qq ijcnlp_dailydialog/train.zip
unzip -qq ijcnlp_dailydialog/validation.zip
unzip -qq ijcnlp_dailydialog/test.zip
rm -r ijcnlp_dailydialog
rm ijcnlp_dailydialog.zip
################
if [[ -d $DataSwitchBoard ]];
then
rm -r $DataSwitchBoard
mkdir $DataSwitchBoard
fi
cd $DiCoh
if [[ -d $DiCoh/swda ]];
then
rm -rf $DiCoh/swda
fi
git clone https://github.com/cgpotts/swda.git
cp $DiCoh/swda/swda.zip $DataSwitchBoard/
cd $DataSwitchBoard
unzip -qq swda.zip 'swda/*'
cd $DiCoh
mv $DataSwitchBoard/swda/* $DataSwitchBoard/
rm -rf $DataSwitchBoard/swda
################
TASKS=(up)
: '
up is UO in the paper
hup is EUO in the paper
ui is Ui in the paper
us is UR in the paper
'
DATASETS=(train validation test)
################
CORPUS="DailyDialog"
for TASK in ${TASKS[@]};
do
for DSET in ${DATASETS[@]};
do
echo $CORPUS $DSET $TASK
python $DiCoh/create_coherency_dataset.py --corpus $CORPUS \
--seed 135486 \
--datadir $DataDailyDialog/$DSET \
--amount 20 \
--task $TASK
done
done
################
echo ****************************
################
CORPUS="Switchboard"
for TASK in ${TASKS[@]};
do
echo $CORPUS $TASK
python $DiCoh/create_coherency_dataset.py --corpus $CORPUS \
--seed 135486 \
--datadir $DataSwitchBoard \
--task $TASK \
--amount 20
done
################