From ad8c242cc732e91b64502b00f1da0537c4ce4b56 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Thu, 5 Aug 2021 19:52:54 +0200 Subject: [PATCH] Ignore tweet when too much hashtags --- main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 3f385cf..7ecfa12 100644 --- a/main.py +++ b/main.py @@ -35,6 +35,8 @@ def cleanTweet(tweet: str) -> str: hashtagMatch = findall(r"#\S+", tweet) # check all hashtags if len(hashtagMatch) < 3: # if less than 3 tweet = sub(r"#\S+", " ", tweet) # remove them + else: + return "" # too much hashtags, ignoring tweet tweet = sub(r"@\S+", " ", tweet) # remove usernames tweet = sub(r" *?[^\w\s]+", " ", tweet) # remove everything who is not a letter or a number or a space tweet = sub(r"(?<=ui)i+|(?<=na)a+(? 0 else "tweet ignoré car trop de hashtags" + print(f"Tweet trouvé de {status._json['user']['screen_name']} ({infoLastWord})...", end = " ") if lastWord in universalBase: # check if the last word found is a supported word answer = None for mot in base.items():