Add files via upload

veeragandhi · May 15, 2017 · 8148e9b · 8148e9b
1 parent d97efad
commit 8148e9b
Show file tree

Hide file tree

Showing 7 changed files with 239 additions and 0 deletions.
diff --git a/CalculateDiffusionIndexMapperDate.py b/CalculateDiffusionIndexMapperDate.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+for line in sys.stdin:
+    parsed_json_tweet = json.loads(line)
+    tweets_text = parsed_json_tweet['text'].lstrip().strip()
+    user_handle = parsed_json_tweet['user']['screen_name'].strip()
+    user_handle = user_handle.encode('ascii', 'ignore')
+    dateTime = parsed_json_tweet['created_at'].lstrip().strip()
+    dateTime = dateTime.encode('ascii', 'ignore')
+    dateTimeList=dateTime.split()
+    exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
+    if user_handle is not None:
+        username = user_handle.strip().lstrip()
+    topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+    for i in topicfiles:
+        topics = readFileandReturnAnArray(i, "r", True)
+        topicId = topics.pop(0)
+        for keyword in topics:
+                if keyword in tweets_text  :
+                        print '%s\t%s\t%s' %(topicId,username,exactDate)
diff --git a/CalculateDiffusionIndexReducerDate.py b/CalculateDiffusionIndexReducerDate.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        cols = line.split()
+        topicId = cols[0]
+        location=cols[1]
+        date=cols[2]
+        dateTopicId=date + " " + topicId
+
+
+        if dateTopicId in geo_score:
+                existingValues = geo_score.get(dateTopicId)
+                if location is not "" and location != None and location not in existingValues:
+                        geo_score[dateTopicId].append(location)
+        else:
+                geo_score[dateTopicId] = []
+                if location is not "" and location != None:
+                        geo_score[dateTopicId].append(location)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    print '%s\t%s'% (topic, length)
diff --git a/CalculateGeographicIndexMapperDate.py b/CalculateGeographicIndexMapperDate.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    myArray=[]
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            myArray.append(lineRead.strip().lstrip())
+    readHandle.close()
+    return myArray
+
+
+for line in sys.stdin:
+    parsed_json_tweet = json.loads(line)
+    tweets_text = parsed_json_tweet['text'].lstrip().strip()
+    user_location = parsed_json_tweet['user']['location']
+    dateTime = parsed_json_tweet['created_at'].lstrip().strip()
+    dateTime = dateTime.encode('ascii', 'ignore')
+    dateTimeList=dateTime.split()
+    exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
+    if user_location is not None:
+        user_location = user_location.encode('ascii', 'ignore')
+        location = user_location.strip().lstrip()
+        topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
+        for i in topicfiles:
+                topics = readFileandReturnAnArray(i, "r", True)
+                topicId = topics.pop(0)
+                for keyword in topics:
+                        if keyword in tweets_text  :
+                                if location is not None and location is not '' and location is not "  " and location!="":
+                                        print '%s\t%s\t%s' %(topicId,location,exactDate)
diff --git a/CalculateGeographicIndexReducerDate.py b/CalculateGeographicIndexReducerDate.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        cols = line.split()
+        topicId = cols[0]
+        if len(cols)>1:
+                location=cols[1:-1]
+                date=cols[-1]
+                dateTopicId=date + " " + topicId
+
+                if dateTopicId in geo_score:
+                        existingValues = geo_score.get(dateTopicId)
+                        if location is not "" and location != None and location not in existingValues and location!="noLocation":
+                                geo_score[dateTopicId].append(location)
+                else:
+                        geo_score[dateTopicId] = []
+                        if location is not "" and location != None and location!="noLocation":
+                                geo_score[dateTopicId].append(location)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    print '%s\t%s'% (topic, length)
diff --git a/CalculateSpamIndexReducerDate.py b/CalculateSpamIndexReducerDate.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+
+import sys,json
+
+def readFileandReturnAnArray(fileName, readMode, isLower):
+    with open(fileName, readMode) as readHandle:
+        for line in readHandle.readlines():
+            lineRead = line
+            if isLower:
+                lineRead = lineRead.lower()
+            count = (lineRead.strip().lstrip())
+    readHandle.close()
+    return count
+
+geo_score = {}
+
+for line in sys.stdin:
+    line = line.strip()
+    if line!="" and line is not None:
+        splitLine = line.split()
+        topicId = splitLine[0]
+        location = splitLine[1]
+        date = splitLine[2]
+        dateTopicId = date + " " + topicId
+
+
+        if dateTopicId in geo_score:
+                existingValues = geo_score.get(dateTopicId)
+                if location is not "" and location != None and location not in existingValues:
+                        geo_score[dateTopicId].append(location)
+        else:
+                geo_score[dateTopicId] = []
+                if location is not "" and location != None:
+                        geo_score[dateTopicId].append(location)
+
+
+for topic in geo_score.keys():
+    list_of_values = geo_score[topic]
+    length = len(list_of_values)
+    spamIndex = 1.00/length
+    print '%s\t%s'% (topic, spamIndex)
diff --git a/CalculateTweetCountMapperDate.py b/CalculateTweetCountMapperDate.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import sys,json
+
+
+for line in sys.stdin:
+    if len(line.strip()) > 1:
+        parsed_json_tweet = json.loads(line)
+        dateTime = parsed_json_tweet['created_at'].lstrip().strip()
+        dateTime = dateTime.encode('ascii', 'ignore')
+        dateTimeList=dateTime.split()
+        exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
+        print "%s\t%s" %(exactDate,1)
diff --git a/CalculateTweetCountReducerDate.py b/CalculateTweetCountReducerDate.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+from operator import itemgetter
+import sys
+
+current_count = 0
+current_date = None
+date = None
+
+for line in sys.stdin:
+    line = line.strip()
+    date, count = line.split('\t', 1)
+    try:
+        count = int(count)
+    except ValueError:
+        continue
+
+
+    if current_date == date:
+        current_count += count
+    else:
+        if current_date:
+            print '%s\t%s' % (current_date, current_count)
+        current_count = count
+        current_date = date
+
+
+if current_date == date:
+    print '%s\t%s' % (current_date, current_count)