from sys import argv from os import environ from dotenv import load_dotenv from cloudscraper import CloudScraper, create_scraper from re import findall class Scraper: def __init__(self, pseudo, password, app, debug = False): self.debug = debug self.url = "https://forum.mobilism.org" self.requested_app = app self.loginData = { "username": pseudo, "password": password, "login": "Login" } def errorFormat(self, code: int = None, message: str = "") -> str: return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}" def connect(self) -> CloudScraper: session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows if self.debug: print("Retrieval of the login SID...", end = " ") reponse = session.get(f"{self.url}/ucp.php", params = {"mode": "login"}) # get login page to get "sid" if reponse.status_code != 200: raise ConnectionError(self.errorFormat(code = reponse.status_code)) try: self.loginData["sid"] = reponse.cookies.get_dict()["ppcw_29d3s_sid"] # register "sid" except: raise ValueError(self.errorFormat(message = "Cookie containing the SID not found.")) if self.debug: print("SID retrieval done,", end = " ") if self.debug: print("connection attempt...", end = " ") reponse = session.post(f"{self.url}/ucp.php", data = self.loginData, params = {"mode": "login"}) # connect to the forum using credentials if reponse.status_code != 200: raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code)) if self.debug: print("Connection done.") reponse = session.get(f"{self.url}/index.php", cookies = reponse.cookies, params = {"sid": self.loginData["sid"]}) # back to index page return session def search(self, session) -> str: if self.debug: print("Going to search page...", end = " ") reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) if self.debug: print("Results retrieval...", end = " ") linkList = self.parse(reponse.text) # if self.debug: print(reponse.status_code, reponse.url) # with open("temp2.log", "w") as f: # debug # f.writelines(res) link = "No link for your application was found." return link def parse(self, htmlPage: str) -> list: elements = htmlPage.split("