Add files via upload

veeragandhi · May 5, 2017 · cd97849 · cd97849
1 parent d4abd85
commit cd97849
Show file tree

Hide file tree

Showing 5 changed files with 202 additions and 0 deletions.
diff --git a/CalculateCredibilityMapper.py b/CalculateCredibilityMapper.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import sys,math,nltk,itertools
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+
+for line in sys.stdin:
+  try:
+    #tweets_text = line
+    Tokens = nltk.word_tokenize(line)
+    cnt = len(Tokens)
+    sample_space = cnt * cnt
+    if sample_space!=0:
+        individual = (float((2 * cnt) - 1)) / (sample_space)
+    topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+    for i in topicfiles:
+        alphabet = readFileandReturnAnArray(i, "r", True)
+        topicId = alphabet.pop(0)
+        for subset in itertools.combinations(alphabet, 2):
+                if subset.__len__() != 0:
+                    first = Tokens.count(subset[0])
+                    words = nltk.word_tokenize(subset[0])
+                    if(words.__len__()==2):
+                        cnt1 = Tokens.count(words[0])
+                        cnt2 = Tokens.count(words[1])
+                        if (cnt1 == cnt2):
+                            first = cnt1
+                        elif (cnt1 < cnt2):
+                            first = cnt1
+                        else:
+                            first = cnt2
+                    second = Tokens.count(subset[1])
+                    words1 = nltk.word_tokenize(subset[1])
+                    if (words1.__len__() == 2):
+                        cnt1 = Tokens.count(words1[0])
+                        cnt2 = Tokens.count(words1[1])
+                        if (cnt1 == cnt2):
+                            second = cnt1
+                        elif (cnt1 < cnt2):
+                            second = cnt1
+                        else:
+                            second = cnt2
+                    result = 0
+                    if first != 0 and second != 0:
+                        intermediate = 2.00/sample_space
+                        firstresult = (float(first)) * intermediate
+                        secresult = (float(second)) * intermediate
+                        combine = float(firstresult) + secresult
+                        final = (float(combine)) / (individual * individual)
+                        result = math.log(final)
+                        print '%s\t%s' % (topicId,result)
+  except ValueError:
+        continue
diff --git a/CalculateEntropReducer.py b/CalculateEntropReducer.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+from operator import itemgetter
+import sys
+
+en_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    topicId, score = line.split('\t')
+
+    if topicId in en_score:
+        en_score[topicId].append(float(score))
+    else:
+        en_score[topicId] = []
+        en_score[topicId].append(float(score))
+
+
+for topic in en_score.keys():
+    avg_score = sum(en_score[topic])/len(en_score[topic])
+    print '%s\t%s'% (topic, avg_score)
diff --git a/CalculateEntropyMapper.py b/CalculateEntropyMapper.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+
+import sys, math, nltk
+sys.path.append('./')
+
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+for line in sys.stdin:
+    try:
+        Tokens = nltk.word_tokenize(line)
+        length = len(Tokens)
+        topicfiles =["foodtopic1", "foodtopic2", "foodtopic3"]
+        for i in topicfiles:
+            alphabet = readFileandReturnAnArray(i, "r", True)
+            topicId = alphabet.pop(0)
+            freqList = []
+            for symbol in alphabet:
+                #cnt=line.count(symbol)
+                words = nltk.word_tokenize(symbol)
+                if words.__len__() != 1:
+                     cntList=[]
+                     newcnt = 0
+                     for word in words:
+                         newcnt = Tokens.count(word)
+                         cntList.append(newcnt)
+                     if all(map(lambda x: x == cntList[0], cntList)) == True and len(cntList) != 0:
+                         cnt = cntList[0]
+                     else:
+                         cnt=0
+                else:
+                     cnt = Tokens.count(symbol)
+                if cnt != 0 and length != 0:
+                    wordProb = float(cnt)/length
+                    freqList.append(wordProb)
+            # Shannon entropy
+            ent = 0.0
+            for freq in freqList:
+                ent = ent + float(freq) * math.log(freq, 2)
+            if ent != 0.0:
+                ent = -ent
+                print '%s\t%s' % (topicId,ent)
+    except ValueError:
+        continue
diff --git a/CalculateObjectivityMapper.py b/CalculateObjectivityMapper.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+
+
+from textblob import TextBlob
+
+import sys
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+
+for line in sys.stdin:
+    tweets_text = line
+    topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+    for i in topicfiles:
+        alphabet = readFileandReturnAnArray(i, "r", True)
+        topicId = alphabet.pop(0)
+        for symbol in alphabet:
+            if symbol in tweets_text  :
+                objectivityTruthful = TextBlob(tweets_text)
+                print '%s\t%s' % (topicId, objectivityTruthful.subjectivity)
diff --git a/CalculateTruthfulnessMapper.py b/CalculateTruthfulnessMapper.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+
+
+from textblob import TextBlob
+
+import sys
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+
+for line in sys.stdin:
+    tweets_text = line
+    topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+    for i in topicfiles:
+        alphabet = readFileandReturnAnArray(i, "r", True)
+        topicId = alphabet.pop(0)
+        for symbol in alphabet:
+            if symbol in tweets_text  :
+                objectivityTruthful = TextBlob(tweets_text)
+                print '%s\t%s' % (topicId, objectivityTruthful.polarity)