Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
veeragandhi authored May 8, 2017
1 parent cd97849 commit d97efad
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 0 deletions.
31 changes: 31 additions & 0 deletions CalculateDiffusionIndexMapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
myArray=[]
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
myArray.append(lineRead.strip().lstrip())
readHandle.close()
return myArray


for line in sys.stdin:
parsed_json_tweet = json.loads(line)
tweets_text = parsed_json_tweet['text'].lstrip().strip()
user_handle = parsed_json_tweet['user']['screen_name'].strip()
user_handle = user_handle.encode('ascii', 'ignore')
if user_handle is not None:
username = user_handle.strip().lstrip()
topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
for i in topicfiles:
topics = readFileandReturnAnArray(i, "r", True)
topicId = topics.pop(0)
for keyword in topics:
if keyword in tweets_text :
print '%s\t%s' %(topicId,username)
42 changes: 42 additions & 0 deletions CalculateDiffusionIndexReducer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python


from operator import itemgetter
import sys

def readFileandReturnAnArray(fileName, readMode, isLower):

with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count

totalTweetCount = readFileandReturnAnArray("tweetCount", "r", True)
totalTweetCount = float(totalTweetCount)

geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
topicId, username = line.split('\t')

if topicId in geo_score:
existingValues = geo_score.get(topicId)
if username is not "" and username != None and username not in existingValues:
geo_score[topicId].append(username)
else:
geo_score[topicId] = []
if username is not "" and username != None:
geo_score[topicId].append(username)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
final_score = float(length)/totalTweetCount
print '%s\t%s'% (topic, final_score)
33 changes: 33 additions & 0 deletions CalculateGeographicIndexMapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
myArray=[]
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
myArray.append(lineRead.strip().lstrip())
readHandle.close()
return myArray


for line in sys.stdin:
parsed_json_tweet = json.loads(line)
tweets_text = parsed_json_tweet['text'].lstrip().strip()
user_location = parsed_json_tweet['user']['location']
if user_location is not None:
user_location = user_location.encode('ascii', 'ignore')
location = user_location.strip().lstrip()
topicfiles = ["foodtopic1", "foodtopic2", "foodtopic3"]
for i in topicfiles:
topics = readFileandReturnAnArray(i, "r", True)
topicId = topics.pop(0)
for keyword in topics:
if keyword in tweets_text :
if location is not None and location is not '' and location is not " " and location!="":
print '%s\t%s' %(topicId,location)
45 changes: 45 additions & 0 deletions CalculateGeographicIndexReducer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python


import sys,json

def readFileandReturnAnArray(fileName, readMode, isLower):
with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count

totalTweetCount = readFileandReturnAnArray("foodTweetCount1", "r", True)
totalTweetCount = float(totalTweetCount)

geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
cols = line.split('\t')
topicId = line[0]
if line[1:] is not None:
location = line[1:]
else:
location = None

if topicId in geo_score:
existingValues = geo_score.get(topicId)
if location is not "" and location != None and location not in existingValues and location!="noLocation":
geo_score[topicId].append(location)
else:
geo_score[topicId] = []
if location is not "" and location != None and location!="noLocation":
geo_score[topicId].append(location)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
final_score = float(length)/totalTweetCount
print '%s\t%s'% (topic, final_score)
40 changes: 40 additions & 0 deletions CalculateSpamIndexReducer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python

import sys

def readFileandReturnAnArray(fileName, readMode, isLower):

with open(fileName, readMode) as readHandle:
for line in readHandle.readlines():
lineRead = line
if isLower:
lineRead = lineRead.lower()
count = (lineRead.strip().lstrip())
readHandle.close()
return count

totalTweetCount = readFileandReturnAnArray("tweetCount", "r", True)
totalTweetCount = float(totalTweetCount)

geo_score = {}

for line in sys.stdin:
line = line.strip()
if line!="" and line is not None:
topicId, username = line.split('\t')
if topicId in geo_score:
existingValues = geo_score.get(topicId)
if username is not "" and username != None and username not in existingValues:
geo_score[topicId].append(username)
else:
geo_score[topicId] = []
if username is not "" and username != None:
geo_score[topicId].append(username)


for topic in geo_score.keys():
list_of_values = geo_score[topic]
length = len(list_of_values)
spamIndex = 1.00/length
final_score = float(spamIndex)/totalTweetCount
print '%s\t%s'% (topic, final_score)
8 changes: 8 additions & 0 deletions CalculateTweetCountMapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python

import sys,json


for line in sys.stdin:
if len(line.strip()) > 1:
print "%s\t%s" %("Tweet Count:",1)
16 changes: 16 additions & 0 deletions CalculateTweetCountReducer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env python

from operator import itemgetter
import sys

current_count = 0

for line in sys.stdin:
line = line.strip()
tweetcount, count = line.split('\t', 1)
try:
count = int(count)
except ValueError:
continue
current_count += count
print '%s' % (current_count)

0 comments on commit d97efad

Please sign in to comment.