finally don't use kivy
This commit is contained in:
parent
35b2db7395
commit
f703645d77
6 changed files with 199 additions and 553 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -5,8 +5,3 @@ pyvenv.cfg
|
|||
|
||||
# vscode
|
||||
.vscode/
|
||||
|
||||
# app
|
||||
.env
|
||||
results.csv
|
||||
.buildozer/
|
||||
|
|
|
@ -8,11 +8,6 @@ Or create `.env` file inside the repo folder with `PSEUDO_MOBILISM`, `PASSWORD_M
|
|||
|
||||
You can also add `DEBUG_MOBILISM` variable to add some verboses (only to `.env` file).
|
||||
|
||||
### [Build](https://kivy.org/doc/stable/guide/packaging-android.html)
|
||||
Install requirements: `pip install -r requirements.txt`
|
||||
Install his [dependencies](https://buildozer.readthedocs.io/en/latest/installation.html#targeting-android) (you may need to delete your existing `.buildozer` folder)
|
||||
Plug in your android device and run `buildozer android release`
|
||||
|
||||
#### Applications tested
|
||||
```
|
||||
Plex, Spotify, Tiktok, Clash of Clans, Clash Royale, Twitch, Brawl Stars
|
||||
|
|
329
buildozer.spec
329
buildozer.spec
|
@ -1,329 +0,0 @@
|
|||
[app]
|
||||
|
||||
# (str) Title of your application
|
||||
title = MobiDL
|
||||
|
||||
# (str) Package name
|
||||
package.name = mobidownloader
|
||||
|
||||
# (str) Package domain (needed for android/ios packaging)
|
||||
package.domain = com.mylloon
|
||||
|
||||
# (str) Source code where the main.py live
|
||||
source.dir = .
|
||||
|
||||
# (list) Source files to include (let empty to include all the files)
|
||||
source.include_exts = py,png,kv,atlas
|
||||
|
||||
# (list) List of inclusions using pattern matching
|
||||
#source.include_patterns =
|
||||
|
||||
# (list) Source files to exclude (let empty to not exclude anything)
|
||||
#source.exclude_exts = spec,md,txt,csv,gitignore,env
|
||||
|
||||
# (list) List of directory to exclude (let empty to not exclude anything)
|
||||
#source.exclude_dirs = tests, bin, lib, .vscode
|
||||
|
||||
# (list) List of exclusions using pattern matching
|
||||
#source.exclude_patterns = LICENCE,images/*/*.jpg
|
||||
|
||||
# (str) Application versioning (method 1)
|
||||
version = 0.1
|
||||
|
||||
# (str) Application versioning (method 2)
|
||||
# version.regex = __version__ = ['"](.*)['"]
|
||||
# version.filename = %(source.dir)s/main.py
|
||||
|
||||
# (list) Application requirements
|
||||
# comma separated e.g. requirements = sqlite3,kivy
|
||||
requirements = python3,kivy,requests,python-dotenv,cloudscraper
|
||||
|
||||
# (str) Custom source folders for requirements
|
||||
# Sets custom source for any requirements with recipes
|
||||
# requirements.source.kivy = ../../kivy
|
||||
|
||||
# (list) Garden requirements
|
||||
#garden_requirements =
|
||||
|
||||
# (str) Presplash of the application
|
||||
#presplash.filename = %(source.dir)s/data/presplash.png
|
||||
|
||||
# (str) Icon of the application
|
||||
#icon.filename = %(source.dir)s/data/icon.png
|
||||
|
||||
# (str) Supported orientation (one of landscape, sensorLandscape, portrait or all)
|
||||
orientation = portrait
|
||||
|
||||
# (list) List of service to declare
|
||||
#services = NAME:ENTRYPOINT_TO_PY,NAME2:ENTRYPOINT2_TO_PY
|
||||
|
||||
#
|
||||
# OSX Specific
|
||||
#
|
||||
|
||||
#
|
||||
# author = © Copyright Info
|
||||
|
||||
# change the major version of python used by the app
|
||||
osx.python_version = 3
|
||||
|
||||
# Kivy version to use
|
||||
osx.kivy_version = 1.9.1
|
||||
|
||||
#
|
||||
# Android specific
|
||||
#
|
||||
|
||||
# (bool) Indicate if the application should be fullscreen or not
|
||||
fullscreen = 0
|
||||
|
||||
# (string) Presplash background color (for new android toolchain)
|
||||
# Supported formats are: #RRGGBB #AARRGGBB or one of the following names:
|
||||
# red, blue, green, black, white, gray, cyan, magenta, yellow, lightgray,
|
||||
# darkgray, grey, lightgrey, darkgrey, aqua, fuchsia, lime, maroon, navy,
|
||||
# olive, purple, silver, teal.
|
||||
#android.presplash_color = black
|
||||
|
||||
# (list) Permissions
|
||||
#android.permissions = INTERNET
|
||||
|
||||
# (int) Target Android API, should be as high as possible.
|
||||
#android.api = 27
|
||||
|
||||
# (int) Minimum API your APK will support.
|
||||
#android.minapi = 21
|
||||
|
||||
# (int) Android SDK version to use
|
||||
#android.sdk = 20
|
||||
|
||||
# (str) Android NDK version to use
|
||||
#android.ndk = 19b
|
||||
|
||||
# (int) Android NDK API to use. This is the minimum API your app will support, it should usually match android.minapi.
|
||||
#android.ndk_api = 21
|
||||
|
||||
# (bool) Use --private data storage (True) or --dir public storage (False)
|
||||
#android.private_storage = True
|
||||
|
||||
# (str) Android NDK directory (if empty, it will be automatically downloaded.)
|
||||
#android.ndk_path =
|
||||
|
||||
# (str) Android SDK directory (if empty, it will be automatically downloaded.)
|
||||
#android.sdk_path =
|
||||
|
||||
# (str) ANT directory (if empty, it will be automatically downloaded.)
|
||||
#android.ant_path =
|
||||
|
||||
# (bool) If True, then skip trying to update the Android sdk
|
||||
# This can be useful to avoid excess Internet downloads or save time
|
||||
# when an update is due and you just want to test/build your package
|
||||
# android.skip_update = False
|
||||
|
||||
# (bool) If True, then automatically accept SDK license
|
||||
# agreements. This is intended for automation only. If set to False,
|
||||
# the default, you will be shown the license when first running
|
||||
# buildozer.
|
||||
# android.accept_sdk_license = True
|
||||
|
||||
# (str) Android entry point, default is ok for Kivy-based app
|
||||
#android.entrypoint = org.renpy.android.PythonActivity
|
||||
|
||||
# (str) Android app theme, default is ok for Kivy-based app
|
||||
# android.apptheme = "@android:style/Theme.NoTitleBar"
|
||||
|
||||
# (list) Pattern to whitelist for the whole project
|
||||
#android.whitelist =
|
||||
|
||||
# (str) Path to a custom whitelist file
|
||||
#android.whitelist_src =
|
||||
|
||||
# (str) Path to a custom blacklist file
|
||||
#android.blacklist_src =
|
||||
|
||||
# (list) List of Java .jar files to add to the libs so that pyjnius can access
|
||||
# their classes. Don't add jars that you do not need, since extra jars can slow
|
||||
# down the build process. Allows wildcards matching, for example:
|
||||
# OUYA-ODK/libs/*.jar
|
||||
#android.add_jars = foo.jar,bar.jar,path/to/more/*.jar
|
||||
|
||||
# (list) List of Java files to add to the android project (can be java or a
|
||||
# directory containing the files)
|
||||
#android.add_src =
|
||||
|
||||
# (list) Android AAR archives to add (currently works only with sdl2_gradle
|
||||
# bootstrap)
|
||||
#android.add_aars =
|
||||
|
||||
# (list) Gradle dependencies to add (currently works only with sdl2_gradle
|
||||
# bootstrap)
|
||||
#android.gradle_dependencies =
|
||||
|
||||
# (list) add java compile options
|
||||
# this can for example be necessary when importing certain java libraries using the 'android.gradle_dependencies' option
|
||||
# see https://developer.android.com/studio/write/java8-support for further information
|
||||
# android.add_compile_options = "sourceCompatibility = 1.8", "targetCompatibility = 1.8"
|
||||
|
||||
# (list) Gradle repositories to add {can be necessary for some android.gradle_dependencies}
|
||||
# please enclose in double quotes
|
||||
# e.g. android.gradle_repositories = "maven { url 'https://kotlin.bintray.com/ktor' }"
|
||||
#android.add_gradle_repositories =
|
||||
|
||||
# (list) packaging options to add
|
||||
# see https://google.github.io/android-gradle-dsl/current/com.android.build.gradle.internal.dsl.PackagingOptions.html
|
||||
# can be necessary to solve conflicts in gradle_dependencies
|
||||
# please enclose in double quotes
|
||||
# e.g. android.add_packaging_options = "exclude 'META-INF/common.kotlin_module'", "exclude 'META-INF/*.kotlin_module'"
|
||||
#android.add_gradle_repositories =
|
||||
|
||||
# (list) Java classes to add as activities to the manifest.
|
||||
#android.add_activities = com.example.ExampleActivity
|
||||
|
||||
# (str) OUYA Console category. Should be one of GAME or APP
|
||||
# If you leave this blank, OUYA support will not be enabled
|
||||
#android.ouya.category = GAME
|
||||
|
||||
# (str) Filename of OUYA Console icon. It must be a 732x412 png image.
|
||||
#android.ouya.icon.filename = %(source.dir)s/data/ouya_icon.png
|
||||
|
||||
# (str) XML file to include as an intent filters in <activity> tag
|
||||
#android.manifest.intent_filters =
|
||||
|
||||
# (str) launchMode to set for the main activity
|
||||
#android.manifest.launch_mode = standard
|
||||
|
||||
# (list) Android additional libraries to copy into libs/armeabi
|
||||
#android.add_libs_armeabi = libs/android/*.so
|
||||
#android.add_libs_armeabi_v7a = libs/android-v7/*.so
|
||||
#android.add_libs_arm64_v8a = libs/android-v8/*.so
|
||||
#android.add_libs_x86 = libs/android-x86/*.so
|
||||
#android.add_libs_mips = libs/android-mips/*.so
|
||||
|
||||
# (bool) Indicate whether the screen should stay on
|
||||
# Don't forget to add the WAKE_LOCK permission if you set this to True
|
||||
#android.wakelock = False
|
||||
|
||||
# (list) Android application meta-data to set (key=value format)
|
||||
#android.meta_data =
|
||||
|
||||
# (list) Android library project to add (will be added in the
|
||||
# project.properties automatically.)
|
||||
#android.library_references =
|
||||
|
||||
# (list) Android shared libraries which will be added to AndroidManifest.xml using <uses-library> tag
|
||||
#android.uses_library =
|
||||
|
||||
# (str) Android logcat filters to use
|
||||
#android.logcat_filters = *:S python:D
|
||||
|
||||
# (bool) Copy library instead of making a libpymodules.so
|
||||
#android.copy_libs = 1
|
||||
|
||||
# (str) The Android arch to build for, choices: armeabi-v7a, arm64-v8a, x86, x86_64
|
||||
android.arch = arm64-v8a
|
||||
|
||||
# (int) overrides automatic versionCode computation (used in build.gradle)
|
||||
# this is not the same as app version and should only be edited if you know what you're doing
|
||||
# android.numeric_version = 1
|
||||
|
||||
#
|
||||
# Python for android (p4a) specific
|
||||
#
|
||||
|
||||
# (str) python-for-android fork to use, defaults to upstream (kivy)
|
||||
#p4a.fork = kivy
|
||||
|
||||
# (str) python-for-android branch to use, defaults to master
|
||||
#p4a.branch = master
|
||||
|
||||
# (str) python-for-android git clone directory (if empty, it will be automatically cloned from github)
|
||||
#p4a.source_dir =
|
||||
|
||||
# (str) The directory in which python-for-android should look for your own build recipes (if any)
|
||||
#p4a.local_recipes =
|
||||
|
||||
# (str) Filename to the hook for p4a
|
||||
#p4a.hook =
|
||||
|
||||
# (str) Bootstrap to use for android builds
|
||||
# p4a.bootstrap = sdl2
|
||||
|
||||
# (int) port number to specify an explicit --port= p4a argument (eg for bootstrap flask)
|
||||
#p4a.port =
|
||||
|
||||
|
||||
#
|
||||
# iOS specific
|
||||
#
|
||||
|
||||
# (str) Path to a custom kivy-ios folder
|
||||
#ios.kivy_ios_dir = ../kivy-ios
|
||||
# Alternately, specify the URL and branch of a git checkout:
|
||||
ios.kivy_ios_url = https://github.com/kivy/kivy-ios
|
||||
ios.kivy_ios_branch = master
|
||||
|
||||
# Another platform dependency: ios-deploy
|
||||
# Uncomment to use a custom checkout
|
||||
#ios.ios_deploy_dir = ../ios_deploy
|
||||
# Or specify URL and branch
|
||||
ios.ios_deploy_url = https://github.com/phonegap/ios-deploy
|
||||
ios.ios_deploy_branch = 1.7.0
|
||||
|
||||
# (str) Name of the certificate to use for signing the debug version
|
||||
# Get a list of available identities: buildozer ios list_identities
|
||||
#ios.codesign.debug = "iPhone Developer: <lastname> <firstname> (<hexstring>)"
|
||||
|
||||
# (str) Name of the certificate to use for signing the release version
|
||||
#ios.codesign.release = %(ios.codesign.debug)s
|
||||
|
||||
|
||||
[buildozer]
|
||||
|
||||
# (int) Log level (0 = error only, 1 = info, 2 = debug (with command output))
|
||||
log_level = 2
|
||||
|
||||
# (int) Display warning if buildozer is run as root (0 = False, 1 = True)
|
||||
warn_on_root = 1
|
||||
|
||||
# (str) Path to build artifact storage, absolute or relative to spec file
|
||||
# build_dir = ./.buildozer
|
||||
|
||||
# (str) Path to build output (i.e. .apk, .ipa) storage
|
||||
# bin_dir = ./bin
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# List as sections
|
||||
#
|
||||
# You can define all the "list" as [section:key].
|
||||
# Each line will be considered as a option to the list.
|
||||
# Let's take [app] / source.exclude_patterns.
|
||||
# Instead of doing:
|
||||
#
|
||||
#[app]
|
||||
#source.exclude_patterns = license,data/audio/*.wav,data/images/original/*
|
||||
#
|
||||
# This can be translated into:
|
||||
#
|
||||
#[app:source.exclude_patterns]
|
||||
#license
|
||||
#data/audio/*.wav
|
||||
#data/images/original/*
|
||||
#
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Profiles
|
||||
#
|
||||
# You can extend section / key with a profile
|
||||
# For example, you want to deploy a demo version of your application without
|
||||
# HD content. You could first change the title to add "(demo)" in the name
|
||||
# and extend the excluded directories to remove the HD content.
|
||||
#
|
||||
#[app@demo]
|
||||
#title = My Application (demo)
|
||||
#
|
||||
#[app:source.exclude_patterns@demo]
|
||||
#images/hd/*
|
||||
#
|
||||
# Then, invoke the command line with the "demo" profile:
|
||||
#
|
||||
#buildozer --profile demo android debug
|
206
main.py
206
main.py
|
@ -1,10 +1,202 @@
|
|||
import kivy
|
||||
from kivy.app import App
|
||||
from kivy.uix.label import Label
|
||||
from sys import argv
|
||||
from os import environ
|
||||
from dotenv import load_dotenv
|
||||
from cloudscraper import CloudScraper, create_scraper
|
||||
from re import findall, sub
|
||||
|
||||
class MobiDL(App):
|
||||
def build(self):
|
||||
return Label(text = "...")
|
||||
class Scraper:
|
||||
def __init__(self, pseudo: str, password: str, app: str, debug: bool = False):
|
||||
self.debug = debug
|
||||
self.url = "https://forum.mobilism.org"
|
||||
self.requested_app = app
|
||||
self.loginData = {
|
||||
"username": pseudo,
|
||||
"password": password,
|
||||
"login": "Login"
|
||||
}
|
||||
|
||||
def errorFormat(self, code: int = None, message: str = "") -> str:
|
||||
"""Pretty error message."""
|
||||
return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}."
|
||||
|
||||
def connect(self) -> CloudScraper:
|
||||
"""Login to the forum using credentials."""
|
||||
session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows
|
||||
if not session:
|
||||
raise SystemError(self.errorFormat(message = "The creation of the session failed")) # called only if failed at creating the session
|
||||
|
||||
if self.debug: print("Connection attempt...")
|
||||
reponse = session.post(f"{self.url}/ucp.php", data = self.loginData, params = {"mode": "login"}) # connect to the forum using credentials - params are set by default but its in case forum changing that
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code, message = "Unable to connect")) # called only status code isn't 200
|
||||
|
||||
return session
|
||||
|
||||
def search(self, session: CloudScraper) -> tuple[list[dict], list[dict]]:
|
||||
"""Do the research."""
|
||||
if self.debug: print("Going to search page and check connection...", end = " ")
|
||||
reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) # fetch results page
|
||||
if "Sorry but you are not permitted to use the search system. If you're not logged in please" in reponse.text:
|
||||
raise ConnectionError(self.errorFormat(message = "Connection failed, check credentials")) # called only if login failed
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionError(self.errorFormat(code = reponse.status_code, message = "Impossible to make the search")) # called only status code isn't 200
|
||||
if self.debug: print(f"Connected.")
|
||||
|
||||
if self.debug: print(f"Fetching results for {self.requested_app}...", end = " ")
|
||||
|
||||
topics = self.parse(reponse.text)
|
||||
|
||||
self.save(topics)
|
||||
|
||||
return topics, self.getInfos(session, topics)
|
||||
|
||||
def parse(self, htmlPage: str) -> list[dict]:
|
||||
"""Parse HTML reponse to a clean list"""
|
||||
if "No suitable matches were found." in htmlPage:
|
||||
return []
|
||||
elements = htmlPage.split("<tr>\n<td>")[1:]
|
||||
elements[-1] = elements[-1].split("</td>\n</tr>")[0]
|
||||
for i in range(0, len(elements)):
|
||||
try:
|
||||
_title = findall(r"class=\"topictitle\">(.*)<\/a>", elements[i])[0]
|
||||
_title = sub(r" ?& ?", " ", _title)
|
||||
except:
|
||||
_title = None
|
||||
try:
|
||||
_author = findall(r"(<br />|</strong>)\n\n?<i class=\"icon-user\"></i> by <a href=\"\./memberlist\.php\?mode=viewprofile&u=\d+\"( style=\"color: #.*;\" class=\"username-coloured\")?>(.*)</a>", elements[i])[0][-1]
|
||||
except:
|
||||
_author = None
|
||||
try:
|
||||
_link = findall(r"\./viewtopic\.php\?f=(\d*)&t=(\d*)&", elements[i])[0]
|
||||
_link = {"f": _link[0], "t": _link[1]}
|
||||
except:
|
||||
_link = None
|
||||
try:
|
||||
_date = findall(r"</a> <i class=\"icon-time\"></i> <small>(.*)</small>", elements[i])[0]
|
||||
except:
|
||||
_date = None
|
||||
print("\n" + elements[i] + "\n")
|
||||
elements[i] = {"title": _title, "author": _author, "date": _date, "link": f"{self.url}/viewtopic.php?f={_link['f']}&t={_link['t']}", "linkParams": _link}
|
||||
|
||||
return elements
|
||||
|
||||
def getInfos(self, session: CloudScraper, elements: list) -> list:
|
||||
"""Go to the first n pages and get a lot of infos"""
|
||||
size = len(elements)
|
||||
if size == 0:
|
||||
return []
|
||||
page = 3
|
||||
if self.debug: print(f"Going to the {page} first pages...", end = " ")
|
||||
results = []
|
||||
for i in range(0, size):
|
||||
if i < page:
|
||||
reponse = session.get(f"{self.url}/viewtopic.php", params = elements[i]["linkParams"]) # fetch results page
|
||||
results.append(reponse)
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionError(self.errorFormat(code = reponse.status_code, message = f"Error while doing the search n°{i}")) # called only status code isn't 200
|
||||
if self.debug: print(f"Done.")
|
||||
|
||||
if self.debug: print(f"Parsing results page...", end = " ")
|
||||
results = self.parsingInfos(results)
|
||||
if self.debug: print(f"Done.")
|
||||
|
||||
return results
|
||||
|
||||
def parsingInfos(self, elements: list) -> list[dict]:
|
||||
"""Parse infos from the page of the app"""
|
||||
for i in range(0, len(elements)):
|
||||
elements[i] = elements[i].text
|
||||
if "Download Instructions" not in elements[i]:
|
||||
elements[i] = {"changelogs": None, "downloadLinks": None}
|
||||
continue
|
||||
try:
|
||||
_changelogs = findall(r"What's New:</span> ?<br />(.*)<br /><br /><span style=\"c|font-weight: bold\">T", elements[i])[0]
|
||||
if len(_changelogs) < 2: # if result none, trying other method
|
||||
_changelogs = findall(r"What's New:</span> ?<br />(.*)<br /><br /><span style=\"font-weight: bold\">T", elements[i])[0]
|
||||
except:
|
||||
_changelogs = "No changelog found."
|
||||
try:
|
||||
elements[i] = sub(r"Download Instructions:</span>(.*)?<br /><s", "Download Instructions:</span><br /><s", elements[i])
|
||||
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)<br /><br />Trouble downloading|</a></div>", elements[i])[0]
|
||||
if len(_downloadLinks) < 2:# if result none, trying other method
|
||||
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)</a></div>", elements[i])[0]
|
||||
except:
|
||||
_downloadLinks = None
|
||||
_downloadLinks = sub(r"\n|<a class=\"postlink\" href=\"|\(Closed Filehost\) ?|<span style=\"font-weight: bold\">|</span>|\">(\S*)</a>", "", _downloadLinks) # remove html garbage
|
||||
_downloadLinks = sub(r"<br />\n?", "\n", _downloadLinks) # convert newline html to \n
|
||||
_downloadLinks = sub(r"Mirrors(?!:)|Mirror(?!s)(?!:)", "Mirror:", _downloadLinks) # add ":"
|
||||
_downloadLinks = _downloadLinks.split('">')[0]
|
||||
elements[i] = {"changelogs": _changelogs, "downloadLinks": _downloadLinks}
|
||||
|
||||
return elements
|
||||
|
||||
def prettyPrint(self, topics: tuple[list[dict], list[dict]]) -> list:
|
||||
"""Show a pretty message with all the specialized infos"""
|
||||
topics, topicsInfos = topics
|
||||
if len(topics) == 0:
|
||||
return []
|
||||
print("\n")
|
||||
result = []
|
||||
for i in range(0, len(topicsInfos)):
|
||||
result.append({
|
||||
"title": topics[i]["title"],
|
||||
"author": topics[i]["author"],
|
||||
"date": topics[i]["date"],
|
||||
"changelogs": str(topicsInfos[i]["changelogs"]).replace("<br />", "\n"),
|
||||
"downloadLinks": topicsInfos[i]["downloadLinks"]
|
||||
})
|
||||
print(f"Title: {result[i]['title']}\n")
|
||||
print(f"Author: {result[i]['author']}\n")
|
||||
print(f"Date of release: {result[i]['date']}\n")
|
||||
print(f"Changelogs: \n{result[i]['changelogs']}\n")
|
||||
print(f"Download links: \n{result[i]['downloadLinks']}")
|
||||
print("\n\n---\n")
|
||||
|
||||
return result
|
||||
|
||||
def work(self) -> list:
|
||||
"""Call all the others methods."""
|
||||
return self.prettyPrint(self.search(self.connect()))
|
||||
|
||||
def save(self, elements: list) -> None:
|
||||
"""Save all the results parsed to a CSV file."""
|
||||
size = len(elements)
|
||||
if size == 0:
|
||||
print("No elements were found with the search.")
|
||||
return
|
||||
filename = "results.csv"
|
||||
with open(filename, "w") as f:
|
||||
topCSV = list(elements[0].keys()) # create a copy of the first element keys
|
||||
topCSV.remove("linkParams") # remove linkParams
|
||||
f.write(";".join(topCSV))
|
||||
f.write("\n")
|
||||
for element in elements:
|
||||
if element != "linkParams":
|
||||
f.write(";".join(str(e) for e in list(element.values())[:-1]))
|
||||
f.write("\n")
|
||||
print(f"{size} elements have been registered in the {filename} file.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
MobiDL().run()
|
||||
argv = argv[1:]
|
||||
if len(argv) < 1: # no args
|
||||
print("No App to retrieve.")
|
||||
exit(1)
|
||||
load_dotenv() # load .env file
|
||||
try:
|
||||
try: # for logs
|
||||
debug = environ["DEBUG_MOBILISM"].lower() in ("yes", "true", "1")
|
||||
except:
|
||||
debug = False
|
||||
try: # try to fetch credentials from de .env first
|
||||
pseudoMobilism = environ["PSEUDO_MOBILISM"]
|
||||
passwordMobilism = environ["PASSWORD_MOBILISM"]
|
||||
except: # if it failed try to get from the cli
|
||||
if len(argv) >= 3:
|
||||
pseudoMobilism = argv[0]
|
||||
passwordMobilism = argv[1]
|
||||
argv = argv[-2:]
|
||||
else: # if it failed again there is a problem
|
||||
raise KeyError
|
||||
Scraper(pseudoMobilism, passwordMobilism, " ".join([n for n in argv]), debug).work() # call the work() function
|
||||
except KeyError:
|
||||
print('Please fill in the username and password (with quotes) by args or with .env file and give an app to retrieve.')
|
||||
|
|
|
@ -2,8 +2,3 @@
|
|||
requests==2.26.0
|
||||
python-dotenv==0.19.0
|
||||
cloudscraper==1.2.58
|
||||
|
||||
# gui
|
||||
kivy[base]==2.0.0 # app core
|
||||
buildozer==1.2.0 # app builder
|
||||
Cython==0.29.19 # buildozer dependencie
|
||||
|
|
202
scrapper.py
202
scrapper.py
|
@ -1,202 +0,0 @@
|
|||
from sys import argv
|
||||
from os import environ
|
||||
from dotenv import load_dotenv
|
||||
from cloudscraper import CloudScraper, create_scraper
|
||||
from re import findall, sub
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, pseudo: str, password: str, app: str, debug: bool = False):
|
||||
self.debug = debug
|
||||
self.url = "https://forum.mobilism.org"
|
||||
self.requested_app = app
|
||||
self.loginData = {
|
||||
"username": pseudo,
|
||||
"password": password,
|
||||
"login": "Login"
|
||||
}
|
||||
|
||||
def errorFormat(self, code: int = None, message: str = "") -> str:
|
||||
"""Pretty error message."""
|
||||
return f"{f'[{code}]' if code else ''}{' ' if len(message) > 0 and code else ''}{message}."
|
||||
|
||||
def connect(self) -> CloudScraper:
|
||||
"""Login to the forum using credentials."""
|
||||
session = create_scraper(browser = {"browser": "chrome", "platform": "windows"}) # connect with cloudflare bypasser with a chrome browser on windows
|
||||
if not session:
|
||||
raise SystemError(self.errorFormat(message = "The creation of the session failed")) # called only if failed at creating the session
|
||||
|
||||
if self.debug: print("Connection attempt...")
|
||||
reponse = session.post(f"{self.url}/ucp.php", data = self.loginData, params = {"mode": "login"}) # connect to the forum using credentials - params are set by default but its in case forum changing that
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionRefusedError(self.errorFormat(code = reponse.status_code, message = "Unable to connect")) # called only status code isn't 200
|
||||
|
||||
return session
|
||||
|
||||
def search(self, session: CloudScraper) -> tuple[list[dict], list[dict]]:
|
||||
"""Do the research."""
|
||||
if self.debug: print("Going to search page and check connection...", end = " ")
|
||||
reponse = session.get(f"{self.url}/search.php", params = {"keywords": self.requested_app, "sr": "topics", "sf": "titleonly"}) # fetch results page
|
||||
if "Sorry but you are not permitted to use the search system. If you're not logged in please" in reponse.text:
|
||||
raise ConnectionError(self.errorFormat(message = "Connection failed, check credentials")) # called only if login failed
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionError(self.errorFormat(code = reponse.status_code, message = "Impossible to make the search")) # called only status code isn't 200
|
||||
if self.debug: print(f"Connected.")
|
||||
|
||||
if self.debug: print(f"Fetching results for {self.requested_app}...", end = " ")
|
||||
|
||||
topics = self.parse(reponse.text)
|
||||
|
||||
self.save(topics)
|
||||
|
||||
return topics, self.getInfos(session, topics)
|
||||
|
||||
def parse(self, htmlPage: str) -> list[dict]:
|
||||
"""Parse HTML reponse to a clean list"""
|
||||
if "No suitable matches were found." in htmlPage:
|
||||
return []
|
||||
elements = htmlPage.split("<tr>\n<td>")[1:]
|
||||
elements[-1] = elements[-1].split("</td>\n</tr>")[0]
|
||||
for i in range(0, len(elements)):
|
||||
try:
|
||||
_title = findall(r"class=\"topictitle\">(.*)<\/a>", elements[i])[0]
|
||||
_title = sub(r" ?& ?", " ", _title)
|
||||
except:
|
||||
_title = None
|
||||
try:
|
||||
_author = findall(r"(<br />|</strong>)\n\n?<i class=\"icon-user\"></i> by <a href=\"\./memberlist\.php\?mode=viewprofile&u=\d+\"( style=\"color: #.*;\" class=\"username-coloured\")?>(.*)</a>", elements[i])[0][-1]
|
||||
except:
|
||||
_author = None
|
||||
try:
|
||||
_link = findall(r"\./viewtopic\.php\?f=(\d*)&t=(\d*)&", elements[i])[0]
|
||||
_link = {"f": _link[0], "t": _link[1]}
|
||||
except:
|
||||
_link = None
|
||||
try:
|
||||
_date = findall(r"</a> <i class=\"icon-time\"></i> <small>(.*)</small>", elements[i])[0]
|
||||
except:
|
||||
_date = None
|
||||
print("\n" + elements[i] + "\n")
|
||||
elements[i] = {"title": _title, "author": _author, "date": _date, "link": f"{self.url}/viewtopic.php?f={_link['f']}&t={_link['t']}", "linkParams": _link}
|
||||
|
||||
return elements
|
||||
|
||||
def getInfos(self, session: CloudScraper, elements: list) -> list:
|
||||
"""Go to the first n pages and get a lot of infos"""
|
||||
size = len(elements)
|
||||
if size == 0:
|
||||
return []
|
||||
page = 3
|
||||
if self.debug: print(f"Going to the {page} first pages...", end = " ")
|
||||
results = []
|
||||
for i in range(0, size):
|
||||
if i < page:
|
||||
reponse = session.get(f"{self.url}/viewtopic.php", params = elements[i]["linkParams"]) # fetch results page
|
||||
results.append(reponse)
|
||||
if reponse.status_code != 200:
|
||||
raise ConnectionError(self.errorFormat(code = reponse.status_code, message = f"Error while doing the search n°{i}")) # called only status code isn't 200
|
||||
if self.debug: print(f"Done.")
|
||||
|
||||
if self.debug: print(f"Parsing results page...", end = " ")
|
||||
results = self.parsingInfos(results)
|
||||
if self.debug: print(f"Done.")
|
||||
|
||||
return results
|
||||
|
||||
def parsingInfos(self, elements: list) -> list[dict]:
|
||||
"""Parse infos from the page of the app"""
|
||||
for i in range(0, len(elements)):
|
||||
elements[i] = elements[i].text
|
||||
if "Download Instructions" not in elements[i]:
|
||||
elements[i] = {"changelogs": None, "downloadLinks": None}
|
||||
continue
|
||||
try:
|
||||
_changelogs = findall(r"What's New:</span> ?<br />(.*)<br /><br /><span style=\"c|font-weight: bold\">T", elements[i])[0]
|
||||
if len(_changelogs) < 2: # if result none, trying other method
|
||||
_changelogs = findall(r"What's New:</span> ?<br />(.*)<br /><br /><span style=\"font-weight: bold\">T", elements[i])[0]
|
||||
except:
|
||||
_changelogs = "No changelog found."
|
||||
try:
|
||||
elements[i] = sub(r"Download Instructions:</span>(.*)?<br /><s", "Download Instructions:</span><br /><s", elements[i])
|
||||
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)<br /><br />Trouble downloading|</a></div>", elements[i])[0]
|
||||
if len(_downloadLinks) < 2:# if result none, trying other method
|
||||
_downloadLinks = findall(r"Download Instructions:</span> ?<br />(.*|[\s\S]*)</a></div>", elements[i])[0]
|
||||
except:
|
||||
_downloadLinks = None
|
||||
_downloadLinks = sub(r"\n|<a class=\"postlink\" href=\"|\(Closed Filehost\) ?|<span style=\"font-weight: bold\">|</span>|\">(\S*)</a>", "", _downloadLinks) # remove html garbage
|
||||
_downloadLinks = sub(r"<br />\n?", "\n", _downloadLinks) # convert newline html to \n
|
||||
_downloadLinks = sub(r"Mirrors(?!:)|Mirror(?!s)(?!:)", "Mirror:", _downloadLinks) # add ":"
|
||||
_downloadLinks = _downloadLinks.split('">')[0]
|
||||
elements[i] = {"changelogs": _changelogs, "downloadLinks": _downloadLinks}
|
||||
|
||||
return elements
|
||||
|
||||
def prettyPrint(self, topics: tuple[list[dict], list[dict]]) -> list:
|
||||
"""Show a pretty message with all the specialized infos"""
|
||||
topics, topicsInfos = topics
|
||||
if len(topics) == 0:
|
||||
return []
|
||||
print("\n")
|
||||
result = []
|
||||
for i in range(0, len(topicsInfos)):
|
||||
result.append({
|
||||
"title": topics[i]["title"],
|
||||
"author": topics[i]["author"],
|
||||
"date": topics[i]["date"],
|
||||
"changelogs": str(topicsInfos[i]["changelogs"]).replace("<br />", "\n"),
|
||||
"downloadLinks": topicsInfos[i]["downloadLinks"]
|
||||
})
|
||||
print(f"Title: {result[i]['title']}\n")
|
||||
print(f"Author: {result[i]['author']}\n")
|
||||
print(f"Date of release: {result[i]['date']}\n")
|
||||
print(f"Changelogs: \n{result[i]['changelogs']}\n")
|
||||
print(f"Download links: \n{result[i]['downloadLinks']}")
|
||||
print("\n\n---\n")
|
||||
|
||||
return result
|
||||
|
||||
def work(self) -> list:
|
||||
"""Call all the others methods."""
|
||||
return self.prettyPrint(self.search(self.connect()))
|
||||
|
||||
def save(self, elements: list) -> None:
|
||||
"""Save all the results parsed to a CSV file."""
|
||||
size = len(elements)
|
||||
if size == 0:
|
||||
print("No elements were found with the search.")
|
||||
return
|
||||
filename = "results.csv"
|
||||
with open(filename, "w") as f:
|
||||
topCSV = list(elements[0].keys()) # create a copy of the first element keys
|
||||
topCSV.remove("linkParams") # remove linkParams
|
||||
f.write(";".join(topCSV))
|
||||
f.write("\n")
|
||||
for element in elements:
|
||||
if element != "linkParams":
|
||||
f.write(";".join(str(e) for e in list(element.values())[:-1]))
|
||||
f.write("\n")
|
||||
print(f"{size} elements have been registered in the {filename} file.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
argv = argv[1:]
|
||||
if len(argv) < 1: # no args
|
||||
print("No App to retrieve.")
|
||||
exit(1)
|
||||
load_dotenv() # load .env file
|
||||
try:
|
||||
try: # for logs
|
||||
debug = environ["DEBUG_MOBILISM"].lower() in ("yes", "true", "1")
|
||||
except:
|
||||
debug = False
|
||||
try: # try to fetch credentials from de .env first
|
||||
pseudoMobilism = environ["PSEUDO_MOBILISM"]
|
||||
passwordMobilism = environ["PASSWORD_MOBILISM"]
|
||||
except: # if it failed try to get from the cli
|
||||
if len(argv) >= 3:
|
||||
pseudoMobilism = argv[0]
|
||||
passwordMobilism = argv[1]
|
||||
argv = argv[-2:]
|
||||
else: # if it failed again there is a problem
|
||||
raise KeyError
|
||||
Scraper(pseudoMobilism, passwordMobilism, " ".join([n for n in argv]), debug).work() # call the work() function
|
||||
except KeyError:
|
||||
print('Please fill in the username and password (with quotes) by args or with .env file and give an app to retrieve.')
|
Reference in a new issue