adding parser

2021-08-23 18:19:04 +02:00 · 2021-08-23 18:19:04 +02:00 · 3956b4d686
commit 3956b4d686
parent 82b261104c
1 changed files with 30 additions and 4 deletions
--- a/main.py
+++ b/main.py
@ -2,6 +2,7 @@ from sys import argv
 from os import environ
 from dotenv import load_dotenv
 from cloudscraper import CloudScraper, create_scraper
 from re import findall
 class Scraper:
    def __init__(self, pseudo, password, app, debug = False):
@ -43,13 +44,38 @@ class Scraper:
    def search(self, session) -> str:
        if self.debug: print("Going to search page...", end = " ")
        reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"})
-        if self.debug: print(reponse.status_code, reponse.url)
+
-        with open("temp.html", "w") as f: # debug
+        if self.debug: print("Results retrieval...", end = " ")
-            f.writelines(reponse.text)
+        linkList = self.parse(reponse.text)
        # if self.debug: print(reponse.status_code, reponse.url)
        # with open("temp2.log", "w") as f: # debug
        #     f.writelines(res)
        link = "No link for your application was found."
        return link
    def parse(self, htmlPage: str) -> list:
        elements = htmlPage.split("<tr>\n<td>")[1:]
        elements[-1] = elements[-1].split("</td>\n</tr>")[0]
        for i in range(0, len(elements)):
            try:
                _title = findall(r"class=\"topictitle\">(.*)<\/a>", elements[i])[0]
            except:
                _title = None
            try:
                _author = findall(r"<br />\n<i class=\"icon-user\"></i> by <a href=\"\./memberlist\.php\?mode=viewprofile&amp;u=\d+\"( style=\"color: #.*;\" class=\"username-coloured\")?>(.*)</a>", elements[i])[0][-1]
            except:
                _author = None
                print(elements[i])
            try:
                _link = findall(r"\./viewtopic\.php\?f=(\d*)&amp;t=(\d*)&amp", elements[i])[0]
                _link = {"f": _link[0], "t": _link[1]}
            except:
                _link = None
            elements[i] = {"title": _title, "author": _author, "linkParams": _link}
        return elements
    def work(self) -> str:
        session = self.connect()
        link = self.search(session)
@ -58,7 +84,7 @@ class Scraper:
 if __name__ == "__main__":
    argv = argv[1:]
-    if len(argv) == 3:
+    if len(argv) >= 3 and len(argv) <= 4:
        print(Scraper(*argv).work())
    else:
        try: