-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_new_feature.go
147 lines (127 loc) · 4.05 KB
/
gen_new_feature.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package main
import (
"fmt"
"os"
"bufio"
"strings"
"strconv"
)
// split the feas with splitting points dict
func split(fea_dict map[string]float64, fea_sp_dict map[string][]float64) map[string]float64{
new_fea_dict := map[string]float64{}
for f, v := range fea_dict{
split_points, is_exist := fea_sp_dict[f]
if is_exist == false{
continue
}
split_index := float64(get_index(v, split_points, 1))
new_fea_dict[f] = split_index
}
return new_fea_dict
}
func add_missing(fea_dict map[string]float64, missing_feas []string, missing_value float64) map[string]float64{
for _, fea := range missing_feas{
fea_dict[fea] = missing_value
}
return fea_dict
}
func extend_feature(fea_dict map[string]float64) map[string]float64{
new_fea_dict := map[string]float64{}
for k, v := range fea_dict{
new_f := fmt.Sprintf("%s_%d", k, round(v))
new_v := float64(1)
new_fea_dict[new_f] = new_v
}
return new_fea_dict
}
func add_bias(fea_dict map[string]float64, bias_name string, bias_value float64) map[string]float64{
fea_dict[bias_name] = bias_value
return fea_dict
}
func transform_feature(fea_dict map[string]float64, fea_sp_dict map[string][]float64, total_fs_set []string) map[string]float64{
new_fea_dict := split(fea_dict, fea_sp_dict)
missing_feas := []string{}
for _, fea := range total_fs_set{
_, is_exist := new_fea_dict[fea]
if is_exist == false{
missing_feas = append(missing_feas, fea)
}
}
var missing_value float64 = -1
new_fea_dict = add_missing(new_fea_dict, missing_feas, missing_value)
new_fea_dict = extend_feature(new_fea_dict)
var bias_name string = "bias"
var bias_value float64 = 1
new_fea_dict = add_bias(new_fea_dict, bias_name, bias_value)
return new_fea_dict
}
func process_feature(feature_file string, split_points_file string, out_file string){
fea_sp_dict := read_split_points(split_points_file)
total_fs_set := []string{}
for k,_ := range fea_sp_dict{
total_fs_set = append(total_fs_set, k)
}
// read the feature data
fp, err := os.Open(feature_file)
if err != nil{
panic(fmt.Sprintf("Error When Open file: %s", feature_file))
}
defer fp.Close()
reader := bufio.NewReader(fp)
// create the writing file
fp_out, err := os.OpenFile(out_file, os.O_RDWR|os.O_CREATE,0644)
defer fp_out.Close()
n := 0
for{
line, err := reader.ReadString('\n')
if err != nil{
fmt.Println(err)
break
}
samp := new(Sample)
samp.parse(line)
new_fea_dict := transform_feature(samp.Fea_dict, fea_sp_dict, total_fs_set)
samp2 := new(Sample)
samp2.Label = samp.Label
samp2.Fea_dict = new_fea_dict
//fmt.Println(samp2.output())
fp_out.WriteString(samp2.output())
fp_out.WriteString("\n")
n += 1
}
fmt.Println("Total Line Count: ", n)
}
func read_split_points(filename string) map[string][]float64{
fp, err := os.Open(filename)
if err != nil{
panic(fmt.Sprintf("Error When openning file: %s", filename))
}
defer fp.Close()
reader := bufio.NewReader(fp)
fea_sp_dict := map[string][]float64{}
for{
line, err := reader.ReadString('\n')
if err != nil{
fmt.Println(err)
break
}
fea, sps := parse_line(line)
fea_sp_dict[fea] = sps
}
return fea_sp_dict
}
func parse_line(line string)(string, []float64){
items := strings.Split(strings.Trim(line, "\n"), "\t")
fea_name := strings.Trim(items[0], " ")
sps_str := strings.Split(items[1], ",")
sps := []float64{}
for _, v := range sps_str{
sp, err := strconv.ParseFloat(strings.Trim(v, " "), 8)
if err != nil{
panic(fmt.Sprintf("Error when Converse value `%s` to float64", v))
}
sps = append(sps, sp)
}
return fea_name, sps
}
//func output(label int, fea_dict map[string]float64) string