Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
veeragandhi authored May 15, 2017
1 parent d97efad commit 8148e9b
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 0 deletions.
35 changes: 35 additions & 0 deletions CalculateDiffusionIndexMapperDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
myArray=[]
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
myArray.append(lineRead.strip().lstrip())
readHandle.close()
return myArray


for line in sys.stdin:
parsed_json_tweet = json.loads(line)
tweets_text = parsed_json_tweet['text'].lstrip().strip()
user_handle = parsed_json_tweet['user']['screen_name'].strip()
user_handle = user_handle.encode('ascii', 'ignore')
dateTime = parsed_json_tweet['created_at'].lstrip().strip()
dateTime = dateTime.encode('ascii', 'ignore')
dateTimeList=dateTime.split()
exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
if user_handle is not None:
username = user_handle.strip().lstrip()
topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
for i in topicfiles:
topics = readFileandReturnAnArray(i, "r", True)
topicId = topics.pop(0)
for keyword in topics:
if keyword in tweets_text :
print '%s\t%s\t%s' %(topicId,username,exactDate)
42 changes: 42 additions & 0 deletions CalculateDiffusionIndexReducerDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count


geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
cols = line.split()
topicId = cols[0]
location=cols[1]
date=cols[2]
dateTopicId=date + " " + topicId


if dateTopicId in geo_score:
existingValues = geo_score.get(dateTopicId)
if location is not "" and location != None and location not in existingValues:
geo_score[dateTopicId].append(location)
else:
geo_score[dateTopicId] = []
if location is not "" and location != None:
geo_score[dateTopicId].append(location)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
print '%s\t%s'% (topic, length)
36 changes: 36 additions & 0 deletions CalculateGeographicIndexMapperDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
myArray=[]
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
myArray.append(lineRead.strip().lstrip())
readHandle.close()
return myArray


for line in sys.stdin:
parsed_json_tweet = json.loads(line)
tweets_text = parsed_json_tweet['text'].lstrip().strip()
user_location = parsed_json_tweet['user']['location']
dateTime = parsed_json_tweet['created_at'].lstrip().strip()
dateTime = dateTime.encode('ascii', 'ignore')
dateTimeList=dateTime.split()
exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
if user_location is not None:
user_location = user_location.encode('ascii', 'ignore')
location = user_location.strip().lstrip()
topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
for i in topicfiles:
topics = readFileandReturnAnArray(i, "r", True)
topicId = topics.pop(0)
for keyword in topics:
if keyword in tweets_text :
if location is not None and location is not '' and location is not " " and location!="":
print '%s\t%s\t%s' %(topicId,location,exactDate)
42 changes: 42 additions & 0 deletions CalculateGeographicIndexReducerDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count


geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
cols = line.split()
topicId = cols[0]
if len(cols)>1:
location=cols[1:-1]
date=cols[-1]
dateTopicId=date + " " + topicId

if dateTopicId in geo_score:
existingValues = geo_score.get(dateTopicId)
if location is not "" and location != None and location not in existingValues and location!="noLocation":
geo_score[dateTopicId].append(location)
else:
geo_score[dateTopicId] = []
if location is not "" and location != None and location!="noLocation":
geo_score[dateTopicId].append(location)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
print '%s\t%s'% (topic, length)
42 changes: 42 additions & 0 deletions CalculateSpamIndexReducerDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count

geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
splitLine = line.split()
topicId = splitLine[0]
location = splitLine[1]
date = splitLine[2]
dateTopicId = date + " " + topicId


if dateTopicId in geo_score:
existingValues = geo_score.get(dateTopicId)
if location is not "" and location != None and location not in existingValues:
geo_score[dateTopicId].append(location)
else:
geo_score[dateTopicId] = []
if location is not "" and location != None:
geo_score[dateTopicId].append(location)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
spamIndex = 1.00/length
print '%s\t%s'% (topic, spamIndex)
13 changes: 13 additions & 0 deletions CalculateTweetCountMapperDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env python

import sys,json


for line in sys.stdin:
if len(line.strip()) > 1:
parsed_json_tweet = json.loads(line)
dateTime = parsed_json_tweet['created_at'].lstrip().strip()
dateTime = dateTime.encode('ascii', 'ignore')
dateTimeList=dateTime.split()
exactDate = dateTimeList[0] + dateTimeList[1]+dateTimeList[2]+dateTimeList[5]
print "%s\t%s" %(exactDate,1)
29 changes: 29 additions & 0 deletions CalculateTweetCountReducerDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

from operator import itemgetter
import sys

current_count = 0
current_date = None
date = None

for line in sys.stdin:
line = line.strip()
date, count = line.split('\t', 1)
try:
count = int(count)
except ValueError:
continue


if current_date == date:
current_count += count
else:
if current_date:
print '%s\t%s' % (current_date, current_count)
current_count = count
current_date = date


if current_date == date:
print '%s\t%s' % (current_date, current_count)

0 comments on commit 8148e9b

Please sign in to comment.