adding some word automatically
This commit is contained in:
parent
ca209798f3
commit
0ef9e3291f
1 changed files with 6 additions and 5 deletions
3
main.py
3
main.py
|
@ -44,6 +44,7 @@ def cleanTweet(tweet: str) -> str:
|
|||
return "" # too much hashtags, ignoring tweet
|
||||
tweet = sub(r"@\S+", " ", tweet) # remove usernames
|
||||
tweet = sub(r" *?[^\w\s]+", " ", tweet) # remove everything who is not a letter or a number or a space
|
||||
tweet = sub(r"\S+(?=si|ci)", " ", tweet) # remove element of the word only if the last syllable can be matched (so more words will be answered without adding them manually)
|
||||
tweet = sub(r"(?<=ui)i+|(?<=na)a+(?<!n)|(?<=quoi)i+|(?<=no)o+(?<!n)|(?<=hei)i+(?<!n)|(?<=si)i+", "", tweet) # remove key smashing in certains words
|
||||
|
||||
return tweet.strip()
|
||||
|
@ -204,7 +205,7 @@ if __name__ == "__main__":
|
|||
"non": ["non", "nn"],
|
||||
"nan": ["nan"],
|
||||
"hein": ["hein", "1"],
|
||||
"ci": ["ci", "si", "aussi"],
|
||||
"ci": ["ci", "si"],
|
||||
"con": ["con"],
|
||||
"ok": ["ok", "okay", "oké", "k"],
|
||||
"ouais": ["ouais", "oué"],
|
||||
|
|
Reference in a new issue