-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathexperiment.py
45 lines (42 loc) · 1.68 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
def tmp_write_json__extract_words(self):
tag_items_cache_json = self.model.cache_dir + "/tags_words_question_items.json"
tags_words_question_items = self.tags_items
result = dict()
for i1 in tags_words_question_items:
tw = i1.tags_word; tw.pop('processed', None)
result[str(i1.item_id)] = tw
json.dump(result, open(tag_items_cache_json, 'wb'))
def tmp_output(self):
tags_words_question_items = self.tags_items
random.shuffle(tags_words_question_items)
for i1 in tags_words_question_items[0:199]:
print i1; i1.tags_word.pop('processed', None); print i1.tags_word
"""
R语言分析代码
c1 <- as.numeric(read.table('/Users/mvj3/17zuoye/hg/vox-etl/tag_count.json', sep=','))
c2 <- as.numeric(read.table('/Users/mvj3/17zuoye/hg/vox-etl/features_count.json', sep=','))
x1 <- seq(0, lenselfh(c1)-1, 1)
plot(x1, c1, col='red')
lines(x1, c2, col='green')
初步结论 是 在每种知识点数量里的每一个数量里,程序抽取的特征值也是递降的,而且少的更多。
疑问: 为什么featuers的排列成规律?
"""
##########################################################################################################################################
### 测试
# 优化记录
# [default_guess_count, related_to_max_percent -> 召回, 正确]
# [ 5, 0.95 -> 0.47, 0.50]
# [ 5, 0.90 -> 0.48, 0.52]
# [ 5, 0.80 -> 0.48, 0.45]
# [20, 0.60 -> 0.60, 0.38]
# [10, 0.60 -> 0.59, 0.37]
# [ 8, 0.60 -> 0.58, 0.35]
# [ 8, 0.40 -> 0.65, 0.26]
# [ 8, 0.30 -> 0.75, 0.23]
# [ 5, 0.60 -> 0.62, 0.40]
# [ 5, 0.50 -> 0.65, 0.35]
# [ 5, 0.30 -> 0.71, 0.26]
# [ 4, 0.33 -> 0.65, 0.29]
# [ 3, 0.23 -> 0.65, 0.29]
# [ 2, 0.30 -> 0.57, 0.38]