better comments
This commit is contained in:
parent
c5ea9e4f39
commit
5d4ac14044
1 changed files with 11 additions and 11 deletions
22
main.py
22
main.py
|
@ -5,7 +5,7 @@ from cloudscraper import CloudScraper, create_scraper
|
||||||
from re import findall, sub
|
from re import findall, sub
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
def __init__(self, pseudo, password, app, debug = False):
|
def __init__(self, pseudo: str, password: str, app: str, debug: bool = False):
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
self.url = "https://forum.mobilism.org"
|
self.url = "https://forum.mobilism.org"
|
||||||
self.requested_app = app
|
self.requested_app = app
|
||||||
|
@ -32,7 +32,7 @@ class Scraper:
|
||||||
|
|
||||||
return session
|
return session
|
||||||
|
|
||||||
def search(self, session) -> tuple[list[dict], list[dict]]:
|
def search(self, session: CloudScraper) -> tuple[list[dict], list[dict]]:
|
||||||
"""Do the research."""
|
"""Do the research."""
|
||||||
if self.debug: print("Going to search page and check connection...", end = " ")
|
if self.debug: print("Going to search page and check connection...", end = " ")
|
||||||
reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) # fetch results page
|
reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) # fetch results page
|
||||||
|
@ -80,7 +80,7 @@ class Scraper:
|
||||||
|
|
||||||
return elements
|
return elements
|
||||||
|
|
||||||
def getInfos(self, session, elements: list) -> list:
|
def getInfos(self, session: CloudScraper, elements: list) -> list:
|
||||||
"""Go to the first n pages and get a lot of infos"""
|
"""Go to the first n pages and get a lot of infos"""
|
||||||
page = 3
|
page = 3
|
||||||
if self.debug: print(f"Going to the {page} first pages...", end = " ")
|
if self.debug: print(f"Going to the {page} first pages...", end = " ")
|
||||||
|
@ -118,14 +118,14 @@ class Scraper:
|
||||||
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)</a></div>", elements[i])[0]
|
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)</a></div>", elements[i])[0]
|
||||||
except:
|
except:
|
||||||
_downloadLinks = None
|
_downloadLinks = None
|
||||||
_downloadLinks = sub(r"\n|<a class=\"postlink\" href=\"|\(Closed Filehost\) ?|<span style=\"font-weight: bold\">|</span>|\">(\S*)</a>", "", _downloadLinks)
|
_downloadLinks = sub(r"\n|<a class=\"postlink\" href=\"|\(Closed Filehost\) ?|<span style=\"font-weight: bold\">|</span>|\">(\S*)</a>", "", _downloadLinks) # remove html garbage
|
||||||
_downloadLinks = sub(r"<br />\n?", "\n", _downloadLinks)
|
_downloadLinks = sub(r"<br />\n?", "\n", _downloadLinks) # convert newline html to \n
|
||||||
_downloadLinks = sub(r"Mirrors(?!:)|Mirror(?!s)(?!:)", "Mirror:", _downloadLinks)
|
_downloadLinks = sub(r"Mirrors(?!:)|Mirror(?!s)(?!:)", "Mirror:", _downloadLinks) # add ":"
|
||||||
elements[i] = {"changelogs": _changelogs, "downloadLinks": _downloadLinks}
|
elements[i] = {"changelogs": _changelogs, "downloadLinks": _downloadLinks}
|
||||||
|
|
||||||
return elements
|
return elements
|
||||||
|
|
||||||
def prettyPrint(self, topics: tuple[list[dict], list[dict]]):
|
def prettyPrint(self, topics: tuple[list[dict], list[dict]]) -> list:
|
||||||
"""Show a pretty message with all the specialized infos"""
|
"""Show a pretty message with all the specialized infos"""
|
||||||
topics, topicsInfos = topics
|
topics, topicsInfos = topics
|
||||||
print("\n")
|
print("\n")
|
||||||
|
@ -147,15 +147,15 @@ class Scraper:
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def work(self) -> str:
|
def work(self) -> list:
|
||||||
"""Call all the others methods."""
|
"""Call all the others methods."""
|
||||||
return self.prettyPrint(self.search(self.connect()))
|
return self.prettyPrint(self.search(self.connect()))
|
||||||
|
|
||||||
def save(self, elements):
|
def save(self, elements: list) -> None:
|
||||||
"""Save all the results parsed to a CSV file."""
|
"""Save all the results parsed to a CSV file."""
|
||||||
taille = len(elements)
|
taille = len(elements)
|
||||||
if taille == 0:
|
if taille == 0:
|
||||||
print("Aucun élément n'a été trouvé avec la recherche.")
|
print("No elements were found with the search.")
|
||||||
return
|
return
|
||||||
filename = "results.csv"
|
filename = "results.csv"
|
||||||
with open(filename, "w") as f:
|
with open(filename, "w") as f:
|
||||||
|
@ -167,7 +167,7 @@ class Scraper:
|
||||||
if element != "linkParams":
|
if element != "linkParams":
|
||||||
f.write(";".join(str(e) for e in list(element.values())[:-1]))
|
f.write(";".join(str(e) for e in list(element.values())[:-1]))
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
print(f"{taille} éléments ont étés enrengistés dans le fichier {filename}.")
|
print(f"{taille} elements have been registered in the {filename} file.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
argv = argv[1:]
|
argv = argv[1:]
|
||||||
|
|
Reference in a new issue