From 0ef9e3291f55382d354aa4fff59b02e589055cbe Mon Sep 17 00:00:00 2001 From: Mylloon Date: Fri, 6 Aug 2021 02:51:36 +0200 Subject: [PATCH] adding some word automatically --- main.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 1160eec..44cb3f5 100644 --- a/main.py +++ b/main.py @@ -35,15 +35,16 @@ def load(variables) -> dict: def cleanTweet(tweet: str) -> str: """Remove all unwanted elements from the tweet.""" - tweet = tweet.lower() # convert to lower case - tweet = sub(r"(https?:\/\/\S+|www.\S+)", " ", tweet) # remove URLs + tweet = tweet.lower() # convert to lower case + tweet = sub(r"(https?:\/\/\S+|www.\S+)", " ", tweet) # remove URLs hashtagMatch = findall(r"#\S+", tweet) # check all hashtags if len(hashtagMatch) < 3: # if less than 3 tweet = sub(r"#\S+", " ", tweet) # remove them else: return "" # too much hashtags, ignoring tweet - tweet = sub(r"@\S+", " ", tweet) # remove usernames - tweet = sub(r" *?[^\w\s]+", " ", tweet) # remove everything who is not a letter or a number or a space + tweet = sub(r"@\S+", " ", tweet) # remove usernames + tweet = sub(r" *?[^\w\s]+", " ", tweet) # remove everything who is not a letter or a number or a space + tweet = sub(r"\S+(?=si|ci)", " ", tweet) # remove element of the word only if the last syllable can be matched (so more words will be answered without adding them manually) tweet = sub(r"(?<=ui)i+|(?<=na)a+(?