-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathml.py
72 lines (58 loc) · 1.62 KB
/
ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import re
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
import _pickle as Pickle
categories=[
'alt.atheism',
'comp.os.ms-windows.misc',
'comp.sys.ibm.pc.hardware',
'comp.sys.mac.hardware',
'comp.windows.x',
'misc.forsale',
'rec.autos',
'rec.motorcycles',
'rec.sport.baseball',
'rec.sport.hockey',
'sci.crypt',
'sci.electronics',
'sci.med',
'sci.space',
'talk.politics.guns',
'talk.politics.mideast',
'talk.politics.misc',
'talk.religion.misc'
]
lib={
'alt.atheism':0,
'comp.os.ms-windows.misc':1,
'comp.sys.ibm.pc.hardware':1,
'comp.sys.mac.hardware':1,
'comp.windows.x':1,
'misc.forsale':2,
'rec.autos':3,
'rec.motorcycles':3,
'rec.sport.baseball':4,
'rec.sport.hockey':4,
'sci.crypt':9,
'sci.electronics':5,
'sci.med':7,
'sci.space':8,
'talk.politics.guns':6,
'talk.politics.mideast':6,
'talk.politics.misc':6,
'talk.religion.misc':6
}
maplib={7:'health and fitness',0:'atheism',1:'technology',2:'sales',3:'transport',4:'sports',5:'electroic science',8:'space',6:'social political',9:'cyber security'}
news_train=fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
news_test=fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
text_clf = Pipeline([('vect', TfidfVectorizer()), ('clf', MultinomialNB()) ])
# train the model
def _PredictML(data):
text_clf.fit(news_train.data, news_train.target)
ata=[data]
vect = text_clf.predict(ata)
return maplib.get(lib.get(categories[vect[0]]))