Fixed title parsing.

This commit is contained in:
Erik C. Thauvin 2020-04-16 15:51:43 -07:00
parent fc35b6bfc8
commit c9366d856f

View file

@ -131,7 +131,7 @@ class UrlMgr(defaultTags: String, keywords: String) : AbstractCommand() {
val link = cmds[0].trim() val link = cmds[0].trim()
if (!isDupEntry(bot, sender, link, isPrivate)) { if (!isDupEntry(bot, sender, link, isPrivate)) {
val isBackup = saveDayBackup(bot) val isBackup = saveDayBackup(bot)
var title = Constants.NO_TITLE var title = ""
val tags = ArrayList<String>(defaultTags) val tags = ArrayList<String>(defaultTags)
if (cmds.size == 2) { if (cmds.size == 2) {
val data = cmds[1].trim().split("${Tags.COMMAND}:", limit = 2) val data = cmds[1].trim().split("${Tags.COMMAND}:", limit = 2)
@ -140,8 +140,14 @@ class UrlMgr(defaultTags: String, keywords: String) : AbstractCommand() {
tags.addAll(data[1].split(TAG_MATCH.toRegex())) tags.addAll(data[1].split(TAG_MATCH.toRegex()))
} }
} }
title = fetchTitle(link, title)
matchTagKeywords(title, tags) if (title.isBlank()) {
title = fetchTitle(link)
}
if (title != Constants.NO_TITLE) {
matchTagKeywords(title, tags)
}
entries.add(EntryLink(link, title, sender, login, bot.channel, tags)) entries.add(EntryLink(link, title, sender, login, bot.channel, tags))
val index: Int = entries.size - 1 val index: Int = entries.size - 1
@ -180,26 +186,19 @@ class UrlMgr(defaultTags: String, keywords: String) : AbstractCommand() {
return message.matches(LINK_MATCH.toRegex()) return message.matches(LINK_MATCH.toRegex())
} }
private fun fetchTitle(link: String, title: String): String { private fun fetchTitle(link: String): String {
if (Constants.NO_TITLE == title) { try {
try { val html = Jsoup.connect(link)
val html = Jsoup.connect(link) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") .get()
.get() val title = html.title()
val htmlTitle = html.title() if (title.isNotBlank()) {
val split = htmlTitle.split("( \\| )".toRegex(), 2) return title
return if (split.size == 2 && split[0].isNotBlank()) {
split[0]
} else if (htmlTitle.isNotBlank()) {
htmlTitle
} else {
title
}
} catch (ignore: IOException) {
// Do nothing
} }
} catch (ignore: IOException) {
// Do nothing
} }
return title return Constants.NO_TITLE
} }
private fun isDupEntry(bot: Mobibot, sender: String, link: String, isPrivate: Boolean): Boolean { private fun isDupEntry(bot: Mobibot, sender: String, link: String, isPrivate: Boolean): Boolean {
@ -217,9 +216,9 @@ class UrlMgr(defaultTags: String, keywords: String) : AbstractCommand() {
private fun matchTagKeywords(title: String, tags: ArrayList<String>) { private fun matchTagKeywords(title: String, tags: ArrayList<String>) {
for (match in keywords) { for (match in keywords) {
val m = Regex.escape(match.trim()) val m = Regex.escape(match)
if (title.matches("(?i).*\\b$m\\b.*".toRegex())) { if (title.matches("(?i).*\\b$m\\b.*".toRegex())) {
tags.add(m) tags.add(match)
} }
} }
} }