From 6ec0518ff4d841aaee8868635e85f75399e23c19 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Tue, 24 Aug 2021 13:30:53 +0200 Subject: [PATCH] skip some step when nothing was found and remove potential html garbage --- main.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index ac0ad5b..2721d7c 100644 --- a/main.py +++ b/main.py @@ -82,10 +82,13 @@ class Scraper: def getInfos(self, session: CloudScraper, elements: list) -> list: """Go to the first n pages and get a lot of infos""" + size = len(elements) + if size == 0: + return [] page = 3 if self.debug: print(f"Going to the {page} first pages...", end = " ") results = [] - for i in range(0, len(elements)): + for i in range(0, size): if i < page: reponse = session.get(f"{self.url}/viewtopic.php", params = elements[i]["linkParams"]) # fetch results page results.append(reponse) @@ -122,6 +125,7 @@ class Scraper: _downloadLinks = sub(r"\n|||\">(\S*)", "", _downloadLinks) # remove html garbage _downloadLinks = sub(r"
\n?", "\n", _downloadLinks) # convert newline html to \n _downloadLinks = sub(r"Mirrors(?!:)|Mirror(?!s)(?!:)", "Mirror:", _downloadLinks) # add ":" + _downloadLinks = _downloadLinks.split('">')[0] elements[i] = {"changelogs": _changelogs, "downloadLinks": _downloadLinks} return elements @@ -129,6 +133,8 @@ class Scraper: def prettyPrint(self, topics: tuple[list[dict], list[dict]]) -> list: """Show a pretty message with all the specialized infos""" topics, topicsInfos = topics + if len(topics) == 0: + return [] print("\n") result = [] for i in range(0, len(topicsInfos)): @@ -154,8 +160,8 @@ class Scraper: def save(self, elements: list) -> None: """Save all the results parsed to a CSV file.""" - taille = len(elements) - if taille == 0: + size = len(elements) + if size == 0: print("No elements were found with the search.") return filename = "results.csv" @@ -168,7 +174,7 @@ class Scraper: if element != "linkParams": f.write(";".join(str(e) for e in list(element.values())[:-1])) f.write("\n") - print(f"{taille} elements have been registered in the {filename} file.") + print(f"{size} elements have been registered in the {filename} file.") if __name__ == "__main__": argv = argv[1:]