Made the encoding even more defensive

This commit is contained in:
Erik C. Thauvin 2023-01-04 19:29:04 -08:00
parent e20c096cfe
commit a7b67c39af
6 changed files with 57 additions and 34 deletions

View file

@ -21,7 +21,7 @@ plugins {
id("signing")
}
description = "A simple library to encode/decode URL parameters"
description = "A simple defensive library to encode/decode URL components"
group = "net.thauvin.erik"
version = "1.0.1-SNAPSHOT"
@ -193,7 +193,7 @@ publishing {
artifactId = rootProject.name
artifact(javadocJar)
pom {
name.set(mavenName)
name.set("$mavenName for Kotlin")
description.set(project.description)
url.set(mavenUrl)
licenses {

View file

@ -1,13 +1,15 @@
<?xml version='1.0' encoding='UTF-8'?>
<SmellBaseline>
<ManuallySuppressedIssues/>
<CurrentIssues>
<ID>ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption &amp;&amp; args.size == 2 || !hasOption &amp;&amp; args.size == 1</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0x80</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$16</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$3</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$4</ID>
<ID>NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String</ID>
</CurrentIssues>
<ManuallySuppressedIssues/>
<CurrentIssues>
<ID>ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption &amp;&amp; args.size == 2 || !hasOption &amp;&amp;
args.size == 1
</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0x80</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$16</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$3</ID>
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$4</ID>
<ID>NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String</ID>
</CurrentIssues>
</SmellBaseline>

View file

@ -9,8 +9,8 @@
<groupId>net.thauvin.erik</groupId>
<artifactId>urlencoder</artifactId>
<version>1.0.1-SNAPSHOT</version>
<name>UrlEncoder</name>
<description>A simple library to encode/decode URL parameters</description>
<name>UrlEncoder for Kotlin</name>
<description>A simple defensive library to encode/decode URL components</description>
<url>https://github.com/ethauvin/urlencoder</url>
<licenses>
<license>

View file

@ -22,22 +22,31 @@ import java.util.BitSet
import kotlin.system.exitProcess
/**
* URL parameters encoding and decoding.
* Most defensive approach to URL encoding and decoding.
*
* - Rules determined by [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13),
* - Rules determined by combining the unreserved character set from
* [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from
* [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved
* set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the
* potential to be misunderstood.
*
* - This library encodes with rules that will be decoded correctly in either case.
*
* @author Geert Bevin (gbevin(remove) at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
*/
**/
object UrlEncoder {
private val hexDigits = "0123456789ABCDEF".toCharArray()
internal val usage =
"Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() +
"Encode and decode URL parameters." + System.lineSeparator() + " -e encode (default) " +
"Encode and decode URL components defensively." + System.lineSeparator() + " -e encode (default) " +
System.lineSeparator() + " -d decode"
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private val unreservedChars = BitSet('~'.code + 1).apply {
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private val unreservedChars = BitSet('z'.code + 1).apply {
set('-')
set('.')
for (c in '0'..'9') {
@ -50,14 +59,14 @@ object UrlEncoder {
for (c in 'a'.code..'z'.code) {
set(c)
}
set('~')
}
private fun BitSet.set(c: Char) = this.set(c.code)
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private fun Char.isUnreserved(): Boolean {
return this <= '~' && unreservedChars.get(code)
return this <= 'z' && unreservedChars.get(code)
}
private fun StringBuilder.appendEncodedDigit(digit: Int) {
@ -130,7 +139,7 @@ object UrlEncoder {
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
*
* - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@JvmStatic
fun encode(source: String, allow: String): String {
@ -177,7 +186,7 @@ object UrlEncoder {
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
*
* - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@JvmStatic
fun encode(source: String, vararg allow: Char): String {
@ -187,7 +196,7 @@ object UrlEncoder {
/**
* Encodes and decodes URLs from the command line.
*
* - `kotlin -cp urlencoder-*.jar net.thauvin.erik.urlencoder.UrlEncoder`
* - `java -jar urlencoder-*all.jar <text>`
*/
@JvmStatic
fun main(args: Array<String>) {
@ -200,7 +209,7 @@ object UrlEncoder {
}
exitProcess(result.status)
} catch (e: IllegalArgumentException) {
System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}");
System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}")
exitProcess(1)
}
}

View file

@ -34,7 +34,7 @@ import org.junit.jupiter.params.provider.ValueSource
import java.util.stream.Stream
class UrlEncoderTest {
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
companion object {
@JvmStatic
@ -45,7 +45,7 @@ class UrlEncoderTest {
arguments("a test &", "a%20test%20%26"),
arguments(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
),
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
arguments(