From 2131f7bbc9ea5216264128b038414bbd4a31877c Mon Sep 17 00:00:00 2001 From: Mylloon Date: Thu, 5 Aug 2021 18:29:32 +0200 Subject: [PATCH] rework of the cleaning tweets and more logs --- main.py | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index b9400c6..8023471 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,11 @@ from dotenv import load_dotenv from os import environ from tweepy import OAuthHandler, API, StreamListener, Stream -from re import sub +from re import sub, findall from random import choice from datetime import datetime from pytz import timezone from queue import Queue -from utils.remove_unicode import emojis def load(variables) -> dict: """Load environment variables.""" @@ -29,26 +28,42 @@ def load(variables) -> dict: exit(1) return keys +def cleanTweet(tweet: str) -> str: + """Remove all unwanted elements from the tweet.""" + tweet = tweet.lower() # convert to lower case + tweet = sub(r"(https?:\/\/\S+|www.\S+)", " ", tweet) # remove URLs + hashtagMatch = findall(r"#\S+", tweet) # check all hashtags + if len(hashtagMatch) < 3: # if less than 3 + tweet = sub(r"#\S+", " ", tweet) # remove them + tweet = sub(r"@\S+", " ", tweet) # remove usernames + tweet = sub(r" *?[^\w\s]+", " ", tweet) # remove everything who is not a letter or a number or a space + tweet = sub(r"(?<=ui)i+|(?<=na)a+(? list: """Get all friends of choosen users.""" liste = [] @@ -141,8 +166,6 @@ def main(accessToken: str, accessTokenSecret: str, consumerKey: str, consumerSec listener = Listener(api, users) stream = Stream(auth = api.auth, listener = listener) - - print(f"Scroll sur Twitter avec les abonnements de @{', @'.join(users)} comme timeline...") stream.filter(track = triggerWords, languages = ["fr"], stall_warnings = True, is_async = True) if __name__ == '__main__':