From d97efad12a74435da0389a735117883ba14e5fb2 Mon Sep 17 00:00:00 2001
From: jyotsnaparyani <jyotsna.paryani@okstate.edu>
Date: Mon, 8 May 2017 14:42:52 -0500
Subject: [PATCH] Add files via upload

---
 CalculateDiffusionIndexMapper.py   | 31 ++++++++++++++++++++
 CalculateDiffusionIndexReducer.py  | 42 ++++++++++++++++++++++++++++
 CalculateGeographicIndexMapper.py  | 33 ++++++++++++++++++++++
 CalculateGeographicIndexReducer.py | 45 ++++++++++++++++++++++++++++++
 CalculateSpamIndexReducer.py       | 40 ++++++++++++++++++++++++++
 CalculateTweetCountMapper.py       |  8 ++++++
 CalculateTweetCountReducer.py      | 16 +++++++++++
 7 files changed, 215 insertions(+)
 create mode 100644 CalculateDiffusionIndexMapper.py
 create mode 100644 CalculateDiffusionIndexReducer.py
 create mode 100644 CalculateGeographicIndexMapper.py
 create mode 100644 CalculateGeographicIndexReducer.py
 create mode 100644 CalculateSpamIndexReducer.py
 create mode 100644 CalculateTweetCountMapper.py
 create mode 100644 CalculateTweetCountReducer.py

diff --git a/CalculateDiffusionIndexMapper.py b/CalculateDiffusionIndexMapper.py
new file mode 100644
index 0000000..c236b3a
--- /dev/null
+++ b/CalculateDiffusionIndexMapper.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+for line in sys.stdin:
+    parsed_json_tweet = json.loads(line)
+    tweets_text = parsed_json_tweet['text'].lstrip().strip()
+    user_handle = parsed_json_tweet['user']['screen_name'].strip()
+    user_handle = user_handle.encode('ascii', 'ignore')
+    if user_handle is not None:
+        username = user_handle.strip().lstrip()
+    topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+    for i in topicfiles:
+        topics = readFileandReturnAnArray(i, "r", True)
+        topicId = topics.pop(0)
+        for keyword in topics:
+                if keyword in tweets_text  :
+                        print '%s\t%s' %(topicId,username)
diff --git a/CalculateDiffusionIndexReducer.py b/CalculateDiffusionIndexReducer.py
new file mode 100644
index 0000000..98bbe32
--- /dev/null
+++ b/CalculateDiffusionIndexReducer.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+
+from operator import itemgetter
+import sys
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+totalTweetCount = readFileandReturnAnArray("tweetCount", "r", True)
+totalTweetCount = float(totalTweetCount)
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        topicId, username = line.split('\t')
+
+        if topicId in geo_score:
+                existingValues = geo_score.get(topicId)
+                if username is not "" and username != None and username not in existingValues:
+                        geo_score[topicId].append(username)
+        else:
+                geo_score[topicId] = []
+                if username is not "" and username != None:
+                        geo_score[topicId].append(username)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    final_score = float(length)/totalTweetCount
+    print '%s\t%s'% (topic, final_score)
diff --git a/CalculateGeographicIndexMapper.py b/CalculateGeographicIndexMapper.py
new file mode 100644
index 0000000..778c288
--- /dev/null
+++ b/CalculateGeographicIndexMapper.py
@@ -0,0 +1,33 @@
+
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+for line in sys.stdin:
+    parsed_json_tweet = json.loads(line)
+    tweets_text = parsed_json_tweet['text'].lstrip().strip()
+    user_location = parsed_json_tweet['user']['location']
+    if user_location is not None:
+        user_location = user_location.encode('ascii', 'ignore')
+        location = user_location.strip().lstrip()
+        topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+        for i in topicfiles:
+                topics = readFileandReturnAnArray(i, "r", True)
+                topicId = topics.pop(0)
+                for keyword in topics:
+                        if keyword in tweets_text  :
+                                if location is not None and location is not '' and location is not "  " and location!="":
+                                        print '%s\t%s' %(topicId,location)
diff --git a/CalculateGeographicIndexReducer.py b/CalculateGeographicIndexReducer.py
new file mode 100644
index 0000000..a7559dd
--- /dev/null
+++ b/CalculateGeographicIndexReducer.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+totalTweetCount = readFileandReturnAnArray("foodTweetCount1", "r", True)
+totalTweetCount = float(totalTweetCount)
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        cols = line.split('\t')
+        topicId = line[0]
+        if line[1:] is not None:
+            location = line[1:]
+        else:
+            location = None
+
+        if topicId in geo_score:
+                existingValues = geo_score.get(topicId)
+                if location is not "" and location != None and location not in existingValues and location!="noLocation":
+                        geo_score[topicId].append(location)
+        else:
+                geo_score[topicId] = []
+                if location is not "" and location != None and location!="noLocation":
+                         geo_score[topicId].append(location)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    final_score = float(length)/totalTweetCount
+    print '%s\t%s'% (topic, final_score)
diff --git a/CalculateSpamIndexReducer.py b/CalculateSpamIndexReducer.py
new file mode 100644
index 0000000..725fee4
--- /dev/null
+++ b/CalculateSpamIndexReducer.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import sys
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+totalTweetCount = readFileandReturnAnArray("tweetCount", "r", True)
+totalTweetCount = float(totalTweetCount)
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        topicId, username = line.split('\t')
+        if topicId in geo_score:
+                existingValues = geo_score.get(topicId)
+                if username is not "" and username != None and username not in existingValues:
+                        geo_score[topicId].append(username)
+        else:
+                geo_score[topicId] = []
+                if username is not "" and username != None:
+                        geo_score[topicId].append(username)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    spamIndex = 1.00/length
+    final_score = float(spamIndex)/totalTweetCount
+    print '%s\t%s'% (topic, final_score)
diff --git a/CalculateTweetCountMapper.py b/CalculateTweetCountMapper.py
new file mode 100644
index 0000000..465d98e
--- /dev/null
+++ b/CalculateTweetCountMapper.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+import sys,json
+
+
+for line in sys.stdin:
+    if len(line.strip()) > 1:
+        print "%s\t%s" %("Tweet Count:",1)
diff --git a/CalculateTweetCountReducer.py b/CalculateTweetCountReducer.py
new file mode 100644
index 0000000..b3bf357
--- /dev/null
+++ b/CalculateTweetCountReducer.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+from operator import itemgetter
+import sys
+
+current_count = 0
+
+for line in sys.stdin:
+    line = line.strip()
+    tweetcount, count = line.split('\t', 1)
+    try:
+        count = int(count)
+    except ValueError:
+        continue
+    current_count += count
+print '%s' % (current_count)