This repository has been archived on 2022-04-07. You can view files and clone it, but cannot push or open issues or pull requests.
mobilismScrap/main.py

68 lines
2.6 KiB
Python
Raw Normal View History

2021-08-23 12:03:00 +02:00
from sys import argv
from os import environ
from dotenv import load_dotenv
2021-08-23 12:27:11 +02:00
from cloudscraper import create_scraper
2021-08-23 12:03:00 +02:00
class Scraper:
2021-08-23 12:22:31 +02:00
def __init__(self, pseudo, password, app, debug = False):
self.debug = debug
2021-08-23 12:03:00 +02:00
self.url = "https://forum.mobilism.org"
self.requested_app = app
self.loginData = {
"username": pseudo,
"password": password,
2021-08-23 13:39:55 +02:00
"login": "Login"
2021-08-23 12:03:00 +02:00
}
2021-08-23 12:28:44 +02:00
def errorFormat(self, code: int, message: str = "") -> str:
2021-08-23 13:39:18 +02:00
return f"Error: [{code}]{' ' if len(message) > 0 else ''}{message}"
2021-08-23 12:22:31 +02:00
2021-08-23 12:03:00 +02:00
def connect(self):
2021-08-23 12:30:10 +02:00
session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows
2021-08-23 13:39:18 +02:00
if self.debug: print("Retrieval of the login SID...", end = " ")
2021-08-23 12:30:10 +02:00
reponse = session.get(f"{self.url}/ucp.php?mode=login") # get login page to get "sid"
2021-08-23 12:27:11 +02:00
if reponse.status_code != 200:
2021-08-23 13:39:18 +02:00
raise ConnectionError(self.errorFormat(reponse.status_code))
2021-08-23 12:30:10 +02:00
self.loginData["sid"] = reponse.cookies.get_dict()["ppcw_29d3s_sid"] # register "sid"
2021-08-23 13:39:18 +02:00
if self.debug: print("SID retrieval done, connection attempt...", end = " ")
2021-08-23 12:30:10 +02:00
reponse = session.post(f"{self.url}/ucp.php?mode=login", data = self.loginData) # connect to the forum using credentials
2021-08-23 12:27:11 +02:00
if reponse.status_code != 200:
raise ConnectionRefusedError(self.errorFormat(reponse.status_code))
2021-08-23 12:30:10 +02:00
2021-08-23 12:27:11 +02:00
if self.debug: print("Connection done.")
2021-08-23 13:39:18 +02:00
reponse = session.get(f"{self.url}/index.php", cookies = reponse.cookies, params = {"sid": self.loginData["sid"]})
2021-08-23 12:27:11 +02:00
if self.debug: print(reponse.status_code, reponse.url)
2021-08-23 12:30:10 +02:00
with open("temp.html", "w") as f: # debug
2021-08-23 12:27:11 +02:00
f.writelines(reponse.text)
return session
def search(self, session):
2021-08-23 13:39:18 +02:00
if self.debug: print("Going to search page...", end = " ")
2021-08-23 12:28:44 +02:00
2021-08-23 13:39:18 +02:00
link = "No link for your application was found."
2021-08-23 12:27:11 +02:00
return link
def work(self):
session = self.connect()
link = self.search(session)
return link
2021-08-23 12:03:00 +02:00
if __name__ == "__main__":
argv = argv[1:]
if len(argv) == 3:
2021-08-23 12:27:11 +02:00
print(Scraper(*argv).work())
2021-08-23 12:03:00 +02:00
else:
2021-08-23 12:22:31 +02:00
try:
load_dotenv()
try:
debug = environ["DEBUG_MOBILISM"].lower() in ("yes", "true", "1")
except:
debug = False
2021-08-23 12:27:11 +02:00
print(Scraper(environ["PSEUDO_MOBILISM"], environ["PASSWORD_MOBILISM"], environ["APP_MOBILISM"], debug).work())
2021-08-23 12:22:31 +02:00
except:
print('Please fill in the username and password (with ") by args or with .env file.')