-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSentiment.py
70 lines (56 loc) · 1.75 KB
/
Sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import _helper
import pandas as pd
import re
from pandas import read_csv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import json
#Enter the column which has string data type
col=''
def main():
df = _helper.data()
if df.empty:
raise ValueError('Data Loading failed !')
else:
pass
#clean html
def cleanhtml(sent):
clean = re.sub(r'<*?>', r' ', str(sent))
return clean
#clean punctuation
def cleanpunc(word):
cleanr = re.sub(r'[?|!|\'|"|#]', r' ', str(word))
cleaned = re.sub(r'[)|(|\|/]', r' ', str(cleanr))
return cleaned
lst_of_sent = []
str1 = ''
final_str = []
#Enter the column which has string data type
if col in df.columns:
for sent in df[col].values:
filtered_sent = []
sent = cleanhtml(sent)
for word in sent.split():
for clean_words in cleanpunc(word).split():
if (clean_words.isalpha()):
filtered_sent.append(clean_words.lower())
lst_of_sent.append(filtered_sent)
else:
return None
for lst in lst_of_sent:
str1 = ' '.join(lst)
final_str.append(str1)
df[col] = final_str
#Extract sentiment from text
analyzer = SentimentIntensityAnalyzer()
lst_of_sent = []
for sentence in final_str:
vs = analyzer.polarity_scores(sentence)
for key, item in vs.items():
if key == 'compound':
lst_of_sent.append(item)
elif key=='neg' or key=='pos'or key== 'neu':
pass
else:
return None
df['compound'] = lst_of_sent
return _helper.publish(df)