diff --git a/main.py b/main.py index 9e0859e..d9790fe 100644 --- a/main.py +++ b/main.py @@ -16,15 +16,17 @@ class Scraper: } def errorFormat(self, code: int = None, message: str = "") -> str: - return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}" + return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}." def connect(self) -> CloudScraper: session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows + if not session: + raise SystemError(self.errorFormat(message = "The creation of the session failed")) if self.debug: print("Retrieval of the login SID...", end = " ") reponse = session.get(f"{self.url}/ucp.php", params = {"mode": "login"}) # get login page to get "sid" if reponse.status_code != 200: - raise ConnectionError(self.errorFormat(code = reponse.status_code)) + raise ConnectionError(self.errorFormat(code = reponse.status_code, message = "Login page not available")) try: self.loginData["sid"] = reponse.cookies.get_dict()["ppcw_29d3s_sid"] # register "sid" except: @@ -34,27 +36,28 @@ class Scraper: if self.debug: print("connection attempt...", end = " ") reponse = session.post(f"{self.url}/ucp.php", data = self.loginData, params = {"mode": "login"}) # connect to the forum using credentials if reponse.status_code != 200: - raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code)) + raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code, message = "Unable to connect")) if self.debug: print("Connection done.") reponse = session.get(f"{self.url}/index.php", cookies = reponse.cookies, params = {"sid": self.loginData["sid"]}) # back to index page + if reponse.status_code != 200: + raise ConnectionError(self.errorFormat(code = reponse.status_code, message = "Unable to get to the index page")) return session - def search(self, session) -> str: + def search(self, session) -> list: if self.debug: print("Going to search page...", end = " ") reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) + if reponse.status_code != 200: + raise ConnectionError(self.errorFormat(code = reponse.status_code, message = "Impossible to make the search")) - if self.debug: print("Results retrieval...", end = " ") - linkList = self.parse(reponse.text) - # if self.debug: print(reponse.status_code, reponse.url) - # with open("temp2.log", "w") as f: # debug - # f.writelines(res) + if self.debug: print(f"Results retrieval for {self.requested_app}...", end = " ") - link = "No link for your application was found." - return link + return self.parse(reponse.text) def parse(self, htmlPage: str) -> list: + if "No suitable matches were found." in htmlPage: + return [] elements = htmlPage.split("\n")[1:] elements[-1] = elements[-1].split("\n")[0] for i in range(0, len(elements)): @@ -66,13 +69,12 @@ class Scraper: _author = findall(r"
\n by (.*)", elements[i])[0][-1] except: _author = None - print(elements[i]) try: _link = findall(r"\./viewtopic\.php\?f=(\d*)&t=(\d*)&", elements[i])[0] _link = {"f": _link[0], "t": _link[1]} except: _link = None - elements[i] = {"title": _title, "author": _author, "linkParams": _link} + elements[i] = {"title": _title, "author": _author, "link": f"https://forum.mobilism.org/viewtopic.php?f={_link['f']}&t={_link['t']}", "linkParams": _link} return elements @@ -82,10 +84,27 @@ class Scraper: return link +def save(elements): + taille = len(elements) + if taille == 0: + print("Aucun élément n'a été trouvé avec la recherche.") + return + filename = "results.csv" + with open(filename, "w") as f: + f.write(";".join(list(elements[0].keys())[:-1])) + f.write("\n") + for element in elements: + if element != "linkParams": + print(";".join(list(element.values())[:-1])) + f.write(";".join(list(element.values())[:-1])) + f.write("\n") + print(f"{taille} éléments ont étés enrengistés dans le fichier {filename}.") + + if __name__ == "__main__": argv = argv[1:] if len(argv) >= 3 and len(argv) <= 4: - print(Scraper(*argv).work()) + save(Scraper(*argv).work()) else: try: load_dotenv() @@ -93,6 +112,6 @@ if __name__ == "__main__": debug = environ["DEBUG_MOBILISM"].lower() in ("yes", "true", "1") except: debug = False - print(Scraper(environ["PSEUDO_MOBILISM"], environ["PASSWORD_MOBILISM"], environ["APP_MOBILISM"], debug).work()) + save(Scraper(environ["PSEUDO_MOBILISM"], environ["PASSWORD_MOBILISM"], environ["APP_MOBILISM"], debug).work()) except KeyError: print('Please fill in the username and password (with ") by args or with .env file.')