#!/usr/bin/python # # Author: Johnson Kachikaran (johnsoncharles26@gmail.com) # Date: 30th May 2016 # Purpose: SWIFT 2016, Colorado State University # # Copyright (c) 2016 Johnson Kachikaran # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software # and associated documentation files (the "Software"), to deal in the Software without restriction, # including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or substantial # portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT # LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # ********* Usage ************ # swift = Swift(key=, key_secret=, token=, token_secret=) # # ==== To get tweets having Text - Memorial Day, results returned in 60 seconds ==== # tweets = swift.get_tweets(track="MemorialDay") # # ==== To get tweets having Text - Memorial Day, results returned in specified timeout in seconds ==== # tweets = swift.get_tweets(track="MemorialDay", timeout=180) # # ==== To get a specified number of tweets having Text - Memorial Day ==== # tweets = swift.get_tweets(number=50, track="MemorialDay") # # ==== To get a specfied number of tweets ==== # tweets = swift.get_tweets(number=50) # # ==== To print the list of tweets returned from any of the previous calls ==== # swift.print_tweets(tweets) # # ==== To print a single tweet ==== # single_tweet = tweets[0] # swift.print_tweet(single_tweet) # from __future__ import print_function import datetime as dt from urllib.request import urlopen import calendar from twython import TwythonStreamer class Reader: def __init__(self): gist = "https://goo.gl/xQ7Tdl" self.app_key = 'YT17H8JArRtK4XPpusXHn8AW6' self.app_key_secret = 'NrEazP4L1AAWQ64Pl5rx9L97BBuGKxfuvxGX4nRdnX9sr4FzVK' self.access_token = '734805952898072580-OZ4mrfyWrJDllVkwVD7K7GzrT8hFumw' self.access_token_secret = 'OEejoUGnb2D8Vieoz3GcacYRumMICdYHEhS10Vhqr8KeJ' self.words = [] self.log = open('log.txt', 'ab') data = urlopen(gist) for line in data: self.words.append(line[:-1]) def get_tweets(self, number=None, track=None, timeout=60, hashtags=True): # timeout in seconds """ gets the live tweets. either number or track is required in params. :param number: integer >= 1, optional :param track: text that should be present in the tweet, optional. A comma-separated list of phrases which will be used to determine what Tweets will be delivered on the stream. A phrase may be one or more terms separated by spaces, and a phrase will match if all of the terms in the phrase are present in the Tweet, regardless of order and ignoring case. :param timeout: integer specifying number of seconds. optional. default is 60 seconds :param hashtags: if true considers only the tweets having hashtags, optional. default True. :return: list of tweets. """ if number is not None and track is not None: if number < 1: print("number of tweets must be greater than or equal to 1") elif len(track) > 60 or len(track) < 1: print("track text must be between 1 and 60 characters, inclusive.") else: stream = Streamer(words=self.words, key=self.app_key, key_secret=self.app_key_secret, token=self.access_token, token_secret=self.access_token_secret, type="number", number=number) stream.statuses.filter(track=track) while stream.connected: pass # wait until the stream is disconnected return stream.tweets elif number is not None: if number < 1: print("number of tweets must be greater than or equal to 1") else: stream = Streamer(words=self.words, key=self.app_key, key_secret=self.app_key_secret, token=self.access_token, token_secret=self.access_token_secret, type="number", number=number) stream.statuses.sample(language='en') while stream.connected: pass # wait until the stream is disconnected return stream.tweets elif track is not None: if len(track) > 60 or len(track) < 1: print("track text must be between 1 and 60 characters, inclusive.") else: stream = Streamer(words=self.words, key=self.app_key, key_secret=self.app_key_secret, token=self.access_token, token_secret=self.access_token_secret, type="track", timeout=timeout, hashtags=hashtags) stream.statuses.filter(track=track) while stream.connected: pass # wait until the stream is disconnected return stream.tweets else: print("you should specify either a number or a track phrase") def print_tweets(self, tweets): try: for tweet in tweets: self.print_tweet(tweet) except BaseException as e: print(e, file=self.log) print("***********Print Failed***********\n") def print_tweet(self, tweet): try: print("{") print(" id: " + str(tweet["id"])) print(" created_at: " + str(tweet["created_at"])) print(" text: " + tweet["text"]) print(" hashtags: " + (", ".join(tweet["hashtags"]) if len(tweet["hashtags"]) > 0 else "none")) print(" user: {") print(" author: " + str(tweet["user"]["author"])) print(" id: " + str(tweet["user"]["id"])) print(" statuses: " + str(tweet["user"]["statuses"])) print(" favorites: " + str(tweet["user"]["favorites"])) print(" friends: " + str(tweet["user"]["friends"])) print(" followers: " + str(tweet["user"]["followers"])) print(" following: " + str(tweet["user"]["following"])) print(" }") print("}\n") except BaseException as e: print(e, file=self.log) print("***********Print Failed***********\n") def get(self, tweet, key): if str(key) == 'author': return tweet["user"]["author"] if str(key) == 'hashtags': return ", ".join(tweet["hashtags"]) if len(tweet["hashtags"]) > 0 else "none" if str(key) == 'time': t = tweet["created_at"] t = t.split(" ") month = list(calendar.month_abbr).index(str(t[1])) if month < 10: month = '0' + str(month) month = str(month) time = month + '/' + str(t[2]) + '/' + str(t[5]) + ' ' + str(t[3]) return time if str(key) == 'text': return tweet['text'].encode('utf-8').decode('utf-8') return "Invalid Parameter, must provide a tweet, and one of: author, hashtags, time" class Streamer(TwythonStreamer): def __init__(self, words, key, key_secret, token, token_secret, type, number=None, timeout=60, hashtags=True): self.words = words self.tweets = [] self.type = type self.number = number self.start = dt.datetime.now() self.hashtags = hashtags self.timeout = timeout self.log = open('log.txt', 'ab') super(Streamer, self).__init__(app_key=key, app_secret=key_secret, oauth_token=token, oauth_token_secret=token_secret, timeout=600) # timeout for getting a post from Twitter def on_success(self, tweet): verified_tweet = {} passed = True # considering only the tweets that are classified as in english if tweet.get("lang", None) and tweet.get("text", None) and tweet["lang"] == 'en': text = tweet["text"].lower().encode('utf-8') for word in self.words: if text.find(word) != -1: passed = False break if passed: verified_tweet["id"] = str(tweet.get("id", "none")) verified_tweet["text"] = tweet["text"].encode("utf-8").decode("utf-8") hashtags = [] if tweet.get("entities", None) and tweet["entities"].get("hashtags", None): for hashtag in tweet["entities"]["hashtags"]: if hashtag.get("text", None): hashtags.append(hashtag["text"].encode("utf-8").decode("utf-8")) if (self.hashtags and len(hashtags) > 0) or not self.hashtags: verified_tweet["hashtags"] = hashtags if tweet.get("created_at", None): verified_tweet["created_at"] = tweet["created_at"] user = {"author": "none", "id": "none", "friends": "none", "followers": "none", "following": "none", "favorites": "none", "statuses": "none"} if tweet.get("user", None): if tweet["user"].get("name", None): user["author"] = tweet["user"]["name"] if tweet["user"].get("id", None): user["id"] = tweet["user"]["id"] if tweet["user"].get("followers_count", None): user["followers"] = tweet["user"]["followers_count"] if tweet["user"].get("friends_count", None): user["friends"] = tweet["user"]["friends_count"] if tweet["user"].get("following", None): user["following"] = tweet["user"]["following"] if tweet["user"].get("favourites_count", None): user["favorites"] = tweet["user"]["favourites_count"] if tweet["user"].get("statuses_count", None): user["statuses"] = tweet["user"]["statuses_count"] verified_tweet["user"] = user self.tweets.append(verified_tweet) if self.type == "number" and self.number: if len(self.tweets) == self.number: self.disconnect() if self.type == "track" and self.timeout: current = dt.datetime.now() if (current - self.start).seconds >= self.timeout: self.disconnect() def on_error(self, status_code, data): # print({"status_code": bytes(status_code, "utf-8").decode("utf-8")}, file=self.log) print(status_code) def on_timeout(self): self.disconnect()