From 2b2e61fbe98e53f96b26defba57ebaa977e92715 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Wed, 1 Sep 2021 02:02:31 +0200 Subject: [PATCH] First attempt to add data extraction from the search results --- .../main/java/com/mylloon/mobidl/Scraper.kt | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/com/mylloon/mobidl/Scraper.kt b/app/src/main/java/com/mylloon/mobidl/Scraper.kt index 5b97720..5d867f9 100644 --- a/app/src/main/java/com/mylloon/mobidl/Scraper.kt +++ b/app/src/main/java/com/mylloon/mobidl/Scraper.kt @@ -5,6 +5,7 @@ import com.github.kittinunf.fuel.core.FuelManager import com.github.kittinunf.fuel.httpGet import com.github.kittinunf.fuel.httpPost import com.mylloon.mobidl.MainActivity.Companion.applicationContext +import java.lang.Exception import java.lang.System.currentTimeMillis import com.github.kittinunf.result.Result.Failure as FuelFailure import com.github.kittinunf.result.Result.Success as FuelSuccess @@ -126,10 +127,42 @@ class Scraper( private fun parse(htmlPage: String): MutableList? { // Parse HTML response to a clean list. if (debug) println("Fetching results for $app...") - Toast.makeText(applicationContext(), "Fetching results for $app...", Toast.LENGTH_SHORT) - .show() // println(htmlPage) if ("No suitable matches were found." in htmlPage) return mutableListOf("noResults") + val elements: MutableList = htmlPage.split("\n").toMutableList() + val finalElements = mutableListOf>() + elements.removeFirst() + val lastIndex = elements.toList().lastIndex + elements[lastIndex] = elements[lastIndex].split("\n")[0] + for (i in elements.indices) { + var title: String? + var author: String? + var link: String? + var data: String? + title = try { + Regex(" ?& ?").replace("", Regex("""class="topictitle">(.*)""").find(elements[i])?.value!!) + } catch (e: Exception) { + null + } + author = try { + val regex = """(
|)\n\n? by (.*)""" + Regex(regex).find(elements[i])?.value!! + } catch (e: Exception) { + null + } + link = try { + Regex("""\./viewtopic\.php\?f=(\d*)&t=(\d*)&""").find(elements[i])?.value!! + } catch (e: Exception) { + null + } + data = try { + Regex(""" (.*)""").find(elements[i])?.value!! + } catch (e: Exception) { + null + } + finalElements.add(i, mapOf("title" to title, "author" to author, "link" to link, "data" to data)) + } + println(finalElements) return null }