Made the encoding even more defensive
This commit is contained in:
parent
e20c096cfe
commit
a7b67c39af
6 changed files with 57 additions and 34 deletions
24
README.md
24
README.md
|
@ -9,18 +9,27 @@
|
||||||
|
|
||||||
# URL Encoder for Kotlin
|
# URL Encoder for Kotlin
|
||||||
|
|
||||||
A simple library to encode/decode URL parameters.
|
A simple defensive library to encode/decode URL components.
|
||||||
|
|
||||||
This library was adapted from the [RIFE2 Web Application Framework](https://rife2.com).
|
This library was adapted from the [RIFE2 Web Application Framework](https://rife2.com).
|
||||||
A pure Java version can also be found at [https://github.com/gbevin/urlencoder](https://github.com/gbevin/urlencoder).
|
A pure Java version can also be found at [https://github.com/gbevin/urlencoder](https://github.com/gbevin/urlencoder).
|
||||||
|
|
||||||
|
The rules are determined by combining the unreserved character set from
|
||||||
|
[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the
|
||||||
|
percent-encode set from
|
||||||
|
[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
|
||||||
|
|
||||||
For decades we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) because of its improper naming. It is actually intended to encode HTML form parameters, not URLs, causing the wrong escape sequences to be used.
|
Both specs above support percent decoding of two hexadecimal digits to a
|
||||||
|
binary octet, however their unreserved set of characters differs and
|
||||||
|
`application/x-www-form-urlencoded` adds conversion of space to `+`,
|
||||||
|
that has the potential to be misunderstood.
|
||||||
|
|
||||||
Additionally, `java.net.URLEncoder` allocates memory even when no encoding is necessary, significantly impacting performance. This library has a negligible performance impact when the specified string doesn't need to be encoded.
|
This class encodes with rules that will be decoded correctly in either case.
|
||||||
|
|
||||||
|
Additionally, this library allocates no memory when encoding isn't needed and
|
||||||
Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) also addresses the same issues.
|
does the work in a single pass without multiple loops. Both of these
|
||||||
|
optimizations have a significantly beneficial impact on performance of encoding
|
||||||
|
compared to other solutions like the standard `URLEncoder` in the JDK.
|
||||||
|
|
||||||
## Examples (TL;DR)
|
## Examples (TL;DR)
|
||||||
|
|
||||||
|
@ -34,6 +43,7 @@ UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81") // -> %#
|
||||||
```
|
```
|
||||||
|
|
||||||
## Gradle, Maven, etc.
|
## Gradle, Maven, etc.
|
||||||
|
|
||||||
To use with [Gradle](https://gradle.org/), include the following dependency in your build file:
|
To use with [Gradle](https://gradle.org/), include the following dependency in your build file:
|
||||||
|
|
||||||
```gradle
|
```gradle
|
||||||
|
@ -47,13 +57,15 @@ dependencies {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Instructions for using with Maven, Ivy, etc. can be found on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder).
|
Instructions for using with Maven, Ivy, etc. can be found
|
||||||
|
on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder).
|
||||||
|
|
||||||
## Standalone usage
|
## Standalone usage
|
||||||
|
|
||||||
UrlEncoder can be used on the command line also, both for encoding and decoding.
|
UrlEncoder can be used on the command line also, both for encoding and decoding.
|
||||||
|
|
||||||
You have two options:
|
You have two options:
|
||||||
|
|
||||||
* run it with Gradle
|
* run it with Gradle
|
||||||
* build the jar and launch it with Java
|
* build the jar and launch it with Java
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ plugins {
|
||||||
id("signing")
|
id("signing")
|
||||||
}
|
}
|
||||||
|
|
||||||
description = "A simple library to encode/decode URL parameters"
|
description = "A simple defensive library to encode/decode URL components"
|
||||||
group = "net.thauvin.erik"
|
group = "net.thauvin.erik"
|
||||||
version = "1.0.1-SNAPSHOT"
|
version = "1.0.1-SNAPSHOT"
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ publishing {
|
||||||
artifactId = rootProject.name
|
artifactId = rootProject.name
|
||||||
artifact(javadocJar)
|
artifact(javadocJar)
|
||||||
pom {
|
pom {
|
||||||
name.set(mavenName)
|
name.set("$mavenName for Kotlin")
|
||||||
description.set(project.description)
|
description.set(project.description)
|
||||||
url.set(mavenUrl)
|
url.set(mavenUrl)
|
||||||
licenses {
|
licenses {
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
<?xml version='1.0' encoding='UTF-8'?>
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
<SmellBaseline>
|
<SmellBaseline>
|
||||||
<ManuallySuppressedIssues/>
|
<ManuallySuppressedIssues/>
|
||||||
<CurrentIssues>
|
<CurrentIssues>
|
||||||
<ID>ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption && args.size == 1</ID>
|
<ID>ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption &&
|
||||||
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0x80</ID>
|
args.size == 1
|
||||||
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF</ID>
|
</ID>
|
||||||
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$16</ID>
|
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0x80</ID>
|
||||||
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$3</ID>
|
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF</ID>
|
||||||
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$4</ID>
|
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$16</ID>
|
||||||
<ID>NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String</ID>
|
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$3</ID>
|
||||||
</CurrentIssues>
|
<ID>MagicNumber:UrlEncoder.kt$UrlEncoder$4</ID>
|
||||||
|
<ID>NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String</ID>
|
||||||
|
</CurrentIssues>
|
||||||
</SmellBaseline>
|
</SmellBaseline>
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
<groupId>net.thauvin.erik</groupId>
|
<groupId>net.thauvin.erik</groupId>
|
||||||
<artifactId>urlencoder</artifactId>
|
<artifactId>urlencoder</artifactId>
|
||||||
<version>1.0.1-SNAPSHOT</version>
|
<version>1.0.1-SNAPSHOT</version>
|
||||||
<name>UrlEncoder</name>
|
<name>UrlEncoder for Kotlin</name>
|
||||||
<description>A simple library to encode/decode URL parameters</description>
|
<description>A simple defensive library to encode/decode URL components</description>
|
||||||
<url>https://github.com/ethauvin/urlencoder</url>
|
<url>https://github.com/ethauvin/urlencoder</url>
|
||||||
<licenses>
|
<licenses>
|
||||||
<license>
|
<license>
|
||||||
|
|
|
@ -22,22 +22,31 @@ import java.util.BitSet
|
||||||
import kotlin.system.exitProcess
|
import kotlin.system.exitProcess
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* URL parameters encoding and decoding.
|
* Most defensive approach to URL encoding and decoding.
|
||||||
*
|
*
|
||||||
* - Rules determined by [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13),
|
* - Rules determined by combining the unreserved character set from
|
||||||
|
* [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from
|
||||||
|
* [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
|
||||||
*
|
*
|
||||||
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
|
* - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved
|
||||||
|
* set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the
|
||||||
|
* potential to be misunderstood.
|
||||||
|
*
|
||||||
|
* - This library encodes with rules that will be decoded correctly in either case.
|
||||||
|
*
|
||||||
|
* @author Geert Bevin (gbevin(remove) at uwyn dot com)
|
||||||
* @author Erik C. Thauvin (erik@thauvin.net)
|
* @author Erik C. Thauvin (erik@thauvin.net)
|
||||||
*/
|
**/
|
||||||
object UrlEncoder {
|
object UrlEncoder {
|
||||||
private val hexDigits = "0123456789ABCDEF".toCharArray()
|
private val hexDigits = "0123456789ABCDEF".toCharArray()
|
||||||
internal val usage =
|
internal val usage =
|
||||||
"Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() +
|
"Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() +
|
||||||
"Encode and decode URL parameters." + System.lineSeparator() + " -e encode (default) " +
|
"Encode and decode URL components defensively." + System.lineSeparator() + " -e encode (default) " +
|
||||||
System.lineSeparator() + " -d decode"
|
System.lineSeparator() + " -d decode"
|
||||||
|
|
||||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||||
private val unreservedChars = BitSet('~'.code + 1).apply {
|
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
|
||||||
|
private val unreservedChars = BitSet('z'.code + 1).apply {
|
||||||
set('-')
|
set('-')
|
||||||
set('.')
|
set('.')
|
||||||
for (c in '0'..'9') {
|
for (c in '0'..'9') {
|
||||||
|
@ -50,14 +59,14 @@ object UrlEncoder {
|
||||||
for (c in 'a'.code..'z'.code) {
|
for (c in 'a'.code..'z'.code) {
|
||||||
set(c)
|
set(c)
|
||||||
}
|
}
|
||||||
set('~')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun BitSet.set(c: Char) = this.set(c.code)
|
private fun BitSet.set(c: Char) = this.set(c.code)
|
||||||
|
|
||||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||||
|
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
|
||||||
private fun Char.isUnreserved(): Boolean {
|
private fun Char.isUnreserved(): Boolean {
|
||||||
return this <= '~' && unreservedChars.get(code)
|
return this <= 'z' && unreservedChars.get(code)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun StringBuilder.appendEncodedDigit(digit: Int) {
|
private fun StringBuilder.appendEncodedDigit(digit: Int) {
|
||||||
|
@ -130,7 +139,7 @@ object UrlEncoder {
|
||||||
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
|
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
|
||||||
* encoding.
|
* encoding.
|
||||||
*
|
*
|
||||||
* - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
|
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
|
||||||
*/
|
*/
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
fun encode(source: String, allow: String): String {
|
fun encode(source: String, allow: String): String {
|
||||||
|
@ -177,7 +186,7 @@ object UrlEncoder {
|
||||||
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
|
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
|
||||||
* encoding.
|
* encoding.
|
||||||
*
|
*
|
||||||
* - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
|
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
|
||||||
*/
|
*/
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
fun encode(source: String, vararg allow: Char): String {
|
fun encode(source: String, vararg allow: Char): String {
|
||||||
|
@ -187,7 +196,7 @@ object UrlEncoder {
|
||||||
/**
|
/**
|
||||||
* Encodes and decodes URLs from the command line.
|
* Encodes and decodes URLs from the command line.
|
||||||
*
|
*
|
||||||
* - `kotlin -cp urlencoder-*.jar net.thauvin.erik.urlencoder.UrlEncoder`
|
* - `java -jar urlencoder-*all.jar <text>`
|
||||||
*/
|
*/
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
fun main(args: Array<String>) {
|
fun main(args: Array<String>) {
|
||||||
|
@ -200,7 +209,7 @@ object UrlEncoder {
|
||||||
}
|
}
|
||||||
exitProcess(result.status)
|
exitProcess(result.status)
|
||||||
} catch (e: IllegalArgumentException) {
|
} catch (e: IllegalArgumentException) {
|
||||||
System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}");
|
System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}")
|
||||||
exitProcess(1)
|
exitProcess(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ import org.junit.jupiter.params.provider.ValueSource
|
||||||
import java.util.stream.Stream
|
import java.util.stream.Stream
|
||||||
|
|
||||||
class UrlEncoderTest {
|
class UrlEncoderTest {
|
||||||
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"
|
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
|
@ -45,7 +45,7 @@ class UrlEncoderTest {
|
||||||
arguments("a test &", "a%20test%20%26"),
|
arguments("a test &", "a%20test%20%26"),
|
||||||
arguments(
|
arguments(
|
||||||
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
||||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
|
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
|
||||||
),
|
),
|
||||||
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
||||||
arguments(
|
arguments(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue