Update REGEX

This commit is contained in:
Mylloon 2021-08-05 12:22:45 +02:00
parent 4b739d8677
commit f5b7e0dafd

View file

@ -42,13 +42,12 @@ class Listener(StreamListener):
if seniority(status._json["created_at"]): # verification of the age of the tweet if seniority(status._json["created_at"]): # verification of the age of the tweet
if not hasattr(status, "retweeted_status"): # ignore Retweet if not hasattr(status, "retweeted_status"): # ignore Retweet
try: # retrieve the entire tweet try: # retrieve the entire tweet
tweet = status.extended_tweet["full_text"] tweet = status.extended_tweet["full_text"].lower()
except AttributeError: except AttributeError:
tweet = status.text tweet = status.text.lower()
# recovery of the last "usable" word of the tweet # recovery of the last "usable" word of the tweet
regex = r"https?:\/\/\S+| +?\?|\?| +?\!| ?\!|-|~|(?<=ui)i+|@\S+|\.+|(?<=na)a+(?<!n)|(?<=quoi)i+|(?<=no)o+(?<!n)|…|\^+" tweetText = sub(r"https?:\/\/\S+| *\?+| *!+| *,+|-|~|\.+|…|\^+|@\S+" + f"|{emojis()}", " ", tweet) # deletion with space
regex += f"|{emojis()}" tweetText = sub(r"(?<=ui)i+|(?<=na)a+(?<!n)|(?<=quoi)i+|(?<=no)o+(?<!n)|(?<=hei)i+(?<!n)", "", tweet) # deletion without space
tweetText = sub(regex, " ", tweet.lower())
lastWord = tweetText.split()[-1:][0] lastWord = tweetText.split()[-1:][0]
if keys["VERBOSE"]: if keys["VERBOSE"]:
print(f"Tweet trouvé de {status._json['user']['screen_name']} (dernier mot : \"{lastWord}\")...", end = " ") print(f"Tweet trouvé de {status._json['user']['screen_name']} (dernier mot : \"{lastWord}\")...", end = " ")