Initial commit.

This commit is contained in:
Erik C. Thauvin 2020-07-23 01:47:35 -07:00
commit 6c22c168a7
19 changed files with 695 additions and 0 deletions

View file

@ -0,0 +1,68 @@
package net.thauvin.erik.readingtime
import org.jsoup.Jsoup
import java.math.BigDecimal
import java.math.RoundingMode
/**
* Calculates the reading time of the given [text].
*
* Calculation based on this [Medium's Post](https://blog.medium.com/read-time-and-you-bc2048ab620c)
*/
class ReadingTime(
var text: String,
var wpm: Int = 275,
var postfix: String = "min read",
var plural: String = "min read",
var excludeImages: Boolean = false
) {
companion object {
@JvmStatic
fun wordCount(words: String): Int {
val s = Jsoup.parse(words).text().trim()
return if (s.isEmpty()) 0 else s.split("\\s+".toRegex()).size
}
@JvmStatic
fun imgCount(html: String): Int {
return "<img ".toRegex(RegexOption.IGNORE_CASE).findAll(html).count()
}
}
fun calcReadingTimeInSec(): Double {
var readingTime = 0.0
if (!excludeImages)
readingTime += calcImgReadingTime()
readingTime += wordCount(text) / (wpm / 60.0)
return readingTime
}
fun calcReadingTime(): String {
val readingTime = BigDecimal((calcReadingTimeInSec() / 60)).setScale(0, RoundingMode.CEILING)
return if (readingTime.compareTo(BigDecimal.ONE) == 1) {
"$readingTime $plural".trim()
} else {
"$readingTime $postfix".trim()
}
}
private fun calcImgReadingTime(): Int {
var imgTime = 0
val imgCount = imgCount(text)
var offset = 12
for (i in 1..imgCount) {
if (i > 10) {
imgTime += 3
} else {
imgTime += offset
offset--
}
}
return imgTime
}
}

View file

@ -0,0 +1,52 @@
package net.thauvin.erik.readingtime
import net.thauvin.erik.readingtime.ReadingTime.Companion.imgCount
import net.thauvin.erik.readingtime.ReadingTime.Companion.wordCount
import java.io.File
import kotlin.test.Test
import kotlin.test.assertEquals
class ReadingTimeTest {
private val rt = ReadingTime("This is a <b>test</b>.\nWith an image: <img src=\"#\">")
private val blogPost = File("src/test/resources/post.html").readText()
private val mediumPost = File("src/test/resources/medium.html").readText()
@Test
fun testWordCount() {
assertEquals(0, wordCount(" "))
assertEquals(3, wordCount("one two three"))
assertEquals(2, wordCount(" one two "))
assertEquals(7, wordCount(rt.text))
assertEquals(210, wordCount(blogPost))
}
@Test
fun testImgCount() {
assertEquals(1, imgCount(rt.text))
assertEquals(4, imgCount(blogPost))
assertEquals(3, imgCount(mediumPost))
}
@Test
fun testReadingTimeInSec() {
assertEquals((wordCount(rt.text) / (rt.wpm / 60.0)) + 12.0, rt.calcReadingTimeInSec())
rt.text = "<img src=\"#\"> <IMG src=\"#\">"
assertEquals(12.0 + 11.0, rt.calcReadingTimeInSec())
rt.text = blogPost
assertEquals((wordCount(rt.text) / (rt.wpm / 60.0)) + 12.0 + 11.0 + 10.0 + 9.0, rt.calcReadingTimeInSec())
rt.text = mediumPost
rt.wpm = 300
assertEquals(wordCount(rt.text) / (rt.wpm / 60.0) + 12.0 + 11.0 + 10.0, rt.calcReadingTimeInSec())
}
@Test
fun testReadingTime() {
rt.text = blogPost
assertEquals("2 min read", rt.calcReadingTime())
rt.plural = "mins read"
assertEquals("2 mins read", rt.calcReadingTime())
rt.text = mediumPost
rt.plural = ""
assertEquals("2", rt.calcReadingTime())
}
}

View file

@ -0,0 +1,46 @@
Eons ago, a couple of Medium engineers got fed up. They were sick of having to scroll all the way down the page to see how long a story was. It was wearing out their trackpad, it was making their fingers sore, and they figured there must be a better way. So they sat down and devised a simple formula, and the Medium read time was born.</p>
<figure class="ij ik il im in io da db paragraph-image">
<div class="ip iq ir is ai">
<div class="da db ii">
<div class="iy r ir iz">
<div class="ja jb r">
<div class="it iu s t u iv ai br iw ix"><img alt="Image for post" class="s t u iv ai jc jd je"
src="https://miro.medium.com/max/60/1*wju0JzgNZVpPzZDgfchajg.png?q=20"
width="180" height="58"/></div>
<img alt="Image for post" class="it iu s t u iv ai jf" width="180" height="58"/>
<noscript><img alt="Image for post" class="s t u iv ai"
src="https://miro.medium.com/max/360/1*wju0JzgNZVpPzZDgfchajg.png" width="180"
height="58"/></noscript>
</div>
</div>
</div>
</div>
</figure>
<p id="8a5b" class="ht hu as hv b fa hw hx fd hy hz ia ib fi ic id fl ie if fo ig di ih">With the widespread
adoption of this feature across the internet, we decided to shed some light on exactly what goes in to our read time
calculation.</p>
<blockquote class="jg"><p id="13d1" class="jh ji as eh b jj jk jl jm jn jo ig aw">Read time is based on the average
reading speed of an adult (roughly 275 WPM). We take the total word count of a post and translate it into minutes.
Then, we add 12 seconds for each inline image. <strong class="be">Boom, read time.</strong></p></blockquote>
<p id="17fc" class="ht hu as hv b fa jp hx fd jq hz ia jr fi ic js fl ie jt fo ig di ih">Lately, we have seen more and
more long form stories containing a ton of images. With our release of <a
href="https://medium.com/the-story/introducing-image-grids-c592e5bc16d8" class="co he ju jv jw jx"
target="_blank" rel="noopener">image grids</a>, we expect even more of these types of essays.</p>
<p id="c072" class="ht hu as hv b fa hw hx fd hy hz ia ib fi ic id fl ie if fo ig di ih">Our original read time
calculation was geared toward “slow” images, like comics, where you would really want to sit down and invest in the
image. This resulted in articles with crazy big read times. For instance, this <a
href="https://medium.com/@dahul/inside-medium-94931f66eebd" class="co he ju jv jw jx" target="_blank"
rel="noopener">article</a> containing 140 images was clocking in at a whopping 87 minute read. So we amended
our read time calculation to count 12 seconds for the first image, 11 for the second, and minus an additional second
for each subsequent image. Any images after the tenth image are counted at three seconds.</p>
<p id="fbc9" class="ht hu as hv b fa hw hx fd hy hz ia ib fi ic id fl ie if fo ig di ih">You might see this change
reflected across the site. Keep in mind that our estimated read time is just that: <em class="jy">an estimation</em>.
You might finish a story faster or slower depending on various factors such as how many children or cats you have,
your caffeine/alcohol intake, or if youre a time-traveler from the future and already read that story. We just want
to give you a ballpark figure so you can decide whether you have time to read one more story before the bus comes,
or if you should bookmark it for later.</p>
<p id="369b" class="ht hu as hv b fa hw hx fd hy hz ia ib fi ic id fl ie if fo ig di ih">We arent done with read time
yet. In the future, wed like to tailor it to your reading speed, account for the <a
href="https://medium.com/@fchimero/this-should-only-take-a-minute-or-four-probably-e38bb7bf2adf"
class="co he ju jv jw jx" target="_blank" rel="noopener">complexity of an article</a>, and add support for
other languages. Well be sure to let you know about these changes as they happen.</p>

View file

@ -0,0 +1,45 @@
<h1>HTML Ipsum Presents</h1>
<p><strong>Pellentesque habitant morbi tristique</strong> senectus et netus et malesuada fames ac turpis egestas. Vestibulum tortor quam, feugiat vitae, ultricies eget, tempor sit amet, ante. Donec eu libero sit amet quam egestas semper. <em>Aenean ultricies mi vitae est.</em> Mauris placerat eleifend leo. Quisque sit amet est et sapien ullamcorper pharetra. Vestibulum erat wisi, condimentum sed, <code>commodo vitae</code>, ornare sit amet, wisi. Aenean fermentum, elit eget tincidunt condimentum, eros ipsum rutrum orci, sagittis tempus lacus enim ac dui. <a href="#">Donec non enim</a> in turpis pulvinar facilisis. Ut felis.</p>
<h2>Header Level 2</h2>
<ol>
<li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>
<li>Aliquam tincidunt mauris eu risus.</li>
<li><img src="#"></li>
</ol>
<blockquote><p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus magna. Cras in mi at felis aliquet congue. Ut a est eget ligula molestie gravida. Curabitur massa. Donec eleifend, libero at sagittis mollis, tellus est malesuada tellus, at luctus turpis elit sit amet quam. Vivamus pretium ornare est.</p></blockquote>
<h3>Header Level 3</h3>
<ul>
<li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>
<li>Aliquam tincidunt mauris eu risus.</li>
<li><img src="#"></li>
</ul>
<h3>Header Level 4</h3>
<ul>
<li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>
<li>Aliquam tincidunt mauris eu risus.</li>
<li><img src="#"></li>
</ul>
<h3>Header Level 5</h3>
<ul>
<li>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</li>
<li>Aliquam tincidunt mauris eu risus.</li>
<li><img src="#"></li>
</ul>
<pre><code>
#header h1 a {
display: block;
width: 300px;
height: 80px;
}
</code></pre>