This repository has been archived on 2022-04-07. You can view files and clone it, but cannot push or open issues or pull requests.
mobilismScrap/main.py
2021-08-23 18:19:04 +02:00

98 lines
4.1 KiB
Python

from sys import argv
from os import environ
from dotenv import load_dotenv
from cloudscraper import CloudScraper, create_scraper
from re import findall
class Scraper:
def __init__(self, pseudo, password, app, debug = False):
self.debug = debug
self.url = "https://forum.mobilism.org"
self.requested_app = app
self.loginData = {
"username": pseudo,
"password": password,
"login": "Login"
}
def errorFormat(self, code: int = None, message: str = "") -> str:
return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}"
def connect(self) -> CloudScraper:
session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows
if self.debug: print("Retrieval of the login SID...", end = " ")
reponse = session.get(f"{self.url}/ucp.php", params = {"mode": "login"}) # get login page to get "sid"
if reponse.status_code != 200:
raise ConnectionError(self.errorFormat(code = reponse.status_code))
try:
self.loginData["sid"] = reponse.cookies.get_dict()["ppcw_29d3s_sid"] # register "sid"
except:
raise ValueError(self.errorFormat(message = "Cookie containing the SID not found."))
if self.debug: print("SID retrieval done,", end = " ")
if self.debug: print("connection attempt...", end = " ")
reponse = session.post(f"{self.url}/ucp.php", data = self.loginData, params = {"mode": "login"}) # connect to the forum using credentials
if reponse.status_code != 200:
raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code))
if self.debug: print("Connection done.")
reponse = session.get(f"{self.url}/index.php", cookies = reponse.cookies, params = {"sid": self.loginData["sid"]}) # back to index page
return session
def search(self, session) -> str:
if self.debug: print("Going to search page...", end = " ")
reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"})
if self.debug: print("Results retrieval...", end = " ")
linkList = self.parse(reponse.text)
# if self.debug: print(reponse.status_code, reponse.url)
# with open("temp2.log", "w") as f: # debug
# f.writelines(res)
link = "No link for your application was found."
return link
def parse(self, htmlPage: str) -> list:
elements = htmlPage.split("<tr>\n<td>")[1:]
elements[-1] = elements[-1].split("</td>\n</tr>")[0]
for i in range(0, len(elements)):
try:
_title = findall(r"class=\"topictitle\">(.*)<\/a>", elements[i])[0]
except:
_title = None
try:
_author = findall(r"<br />\n<i class=\"icon-user\"></i> by <a href=\"\./memberlist\.php\?mode=viewprofile&amp;u=\d+\"( style=\"color: #.*;\" class=\"username-coloured\")?>(.*)</a>", elements[i])[0][-1]
except:
_author = None
print(elements[i])
try:
_link = findall(r"\./viewtopic\.php\?f=(\d*)&amp;t=(\d*)&amp", elements[i])[0]
_link = {"f": _link[0], "t": _link[1]}
except:
_link = None
elements[i] = {"title": _title, "author": _author, "linkParams": _link}
return elements
def work(self) -> str:
session = self.connect()
link = self.search(session)
return link
if __name__ == "__main__":
argv = argv[1:]
if len(argv) >= 3 and len(argv) <= 4:
print(Scraper(*argv).work())
else:
try:
load_dotenv()
try:
debug = environ["DEBUG_MOBILISM"].lower() in ("yes", "true", "1")
except:
debug = False
print(Scraper(environ["PSEUDO_MOBILISM"], environ["PASSWORD_MOBILISM"], environ["APP_MOBILISM"], debug).work())
except KeyError:
print('Please fill in the username and password (with ") by args or with .env file.')