First attempt to add data extraction from the search results
This commit is contained in:
parent
cec312c93c
commit
2b2e61fbe9
1 changed files with 35 additions and 2 deletions
|
@ -5,6 +5,7 @@ import com.github.kittinunf.fuel.core.FuelManager
|
|||
import com.github.kittinunf.fuel.httpGet
|
||||
import com.github.kittinunf.fuel.httpPost
|
||||
import com.mylloon.mobidl.MainActivity.Companion.applicationContext
|
||||
import java.lang.Exception
|
||||
import java.lang.System.currentTimeMillis
|
||||
import com.github.kittinunf.result.Result.Failure as FuelFailure
|
||||
import com.github.kittinunf.result.Result.Success as FuelSuccess
|
||||
|
@ -126,10 +127,42 @@ class Scraper(
|
|||
|
||||
private fun parse(htmlPage: String): MutableList<String>? { // Parse HTML response to a clean list.
|
||||
if (debug) println("Fetching results for $app...")
|
||||
Toast.makeText(applicationContext(), "Fetching results for $app...", Toast.LENGTH_SHORT)
|
||||
.show()
|
||||
// println(htmlPage)
|
||||
if ("No suitable matches were found." in htmlPage) return mutableListOf("noResults")
|
||||
val elements: MutableList<String> = htmlPage.split("<tr>\n<td>").toMutableList()
|
||||
val finalElements = mutableListOf<Map<String, String?>>()
|
||||
elements.removeFirst()
|
||||
val lastIndex = elements.toList().lastIndex
|
||||
elements[lastIndex] = elements[lastIndex].split("</td>\n</tr>")[0]
|
||||
for (i in elements.indices) {
|
||||
var title: String?
|
||||
var author: String?
|
||||
var link: String?
|
||||
var data: String?
|
||||
title = try {
|
||||
Regex(" ?& ?").replace("", Regex("""class="topictitle">(.*)</a>""").find(elements[i])?.value!!)
|
||||
} catch (e: Exception) {
|
||||
null
|
||||
}
|
||||
author = try {
|
||||
val regex = """(<br />|</strong>)\n\n?<i class="icon-user"></i> by <a href="\./memberlist\.php\?mode=viewprofile&u=\d+"( style="color: #.*;" class="username-coloured")?>(.*)</a>"""
|
||||
Regex(regex).find(elements[i])?.value!!
|
||||
} catch (e: Exception) {
|
||||
null
|
||||
}
|
||||
link = try {
|
||||
Regex("""\./viewtopic\.php\?f=(\d*)&t=(\d*)&""").find(elements[i])?.value!!
|
||||
} catch (e: Exception) {
|
||||
null
|
||||
}
|
||||
data = try {
|
||||
Regex("""</a> <i class="icon-time"></i> <small>(.*)</small>""").find(elements[i])?.value!!
|
||||
} catch (e: Exception) {
|
||||
null
|
||||
}
|
||||
finalElements.add(i, mapOf("title" to title, "author" to author, "link" to link, "data" to data))
|
||||
}
|
||||
println(finalElements)
|
||||
return null
|
||||
}
|
||||
|
||||
|
|
Reference in a new issue