diff --git a/README.md b/README.md
index 58d3bd0..990bfc0 100644
--- a/README.md
+++ b/README.md
@@ -9,18 +9,27 @@
# URL Encoder for Kotlin
-A simple library to encode/decode URL parameters.
+A simple defensive library to encode/decode URL components.
This library was adapted from the [RIFE2 Web Application Framework](https://rife2.com).
A pure Java version can also be found at [https://github.com/gbevin/urlencoder](https://github.com/gbevin/urlencoder).
+The rules are determined by combining the unreserved character set from
+[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the
+percent-encode set from
+[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
-For decades we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) because of its improper naming. It is actually intended to encode HTML form parameters, not URLs, causing the wrong escape sequences to be used.
+Both specs above support percent decoding of two hexadecimal digits to a
+binary octet, however their unreserved set of characters differs and
+`application/x-www-form-urlencoded` adds conversion of space to `+`,
+that has the potential to be misunderstood.
-Additionally, `java.net.URLEncoder` allocates memory even when no encoding is necessary, significantly impacting performance. This library has a negligible performance impact when the specified string doesn't need to be encoded.
+This class encodes with rules that will be decoded correctly in either case.
-
-Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) also addresses the same issues.
+Additionally, this library allocates no memory when encoding isn't needed and
+does the work in a single pass without multiple loops. Both of these
+optimizations have a significantly beneficial impact on performance of encoding
+compared to other solutions like the standard `URLEncoder` in the JDK.
## Examples (TL;DR)
@@ -34,6 +43,7 @@ UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81") // -> %#
```
## Gradle, Maven, etc.
+
To use with [Gradle](https://gradle.org/), include the following dependency in your build file:
```gradle
@@ -47,13 +57,15 @@ dependencies {
}
```
-Instructions for using with Maven, Ivy, etc. can be found on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder).
+Instructions for using with Maven, Ivy, etc. can be found
+on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder).
## Standalone usage
UrlEncoder can be used on the command line also, both for encoding and decoding.
You have two options:
+
* run it with Gradle
* build the jar and launch it with Java
diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts
index e783c63..701c707 100644
--- a/lib/build.gradle.kts
+++ b/lib/build.gradle.kts
@@ -21,7 +21,7 @@ plugins {
id("signing")
}
-description = "A simple library to encode/decode URL parameters"
+description = "A simple defensive library to encode/decode URL components"
group = "net.thauvin.erik"
version = "1.0.1-SNAPSHOT"
@@ -193,7 +193,7 @@ publishing {
artifactId = rootProject.name
artifact(javadocJar)
pom {
- name.set(mavenName)
+ name.set("$mavenName for Kotlin")
description.set(project.description)
url.set(mavenUrl)
licenses {
diff --git a/lib/detekt-baseline.xml b/lib/detekt-baseline.xml
index 6817e55..e876ec6 100644
--- a/lib/detekt-baseline.xml
+++ b/lib/detekt-baseline.xml
@@ -1,13 +1,15 @@
-
-
- ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption && args.size == 1
- MagicNumber:UrlEncoder.kt$UrlEncoder$0x80
- MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF
- MagicNumber:UrlEncoder.kt$UrlEncoder$16
- MagicNumber:UrlEncoder.kt$UrlEncoder$3
- MagicNumber:UrlEncoder.kt$UrlEncoder$4
- NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String
-
+
+
+ ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption &&
+ args.size == 1
+
+ MagicNumber:UrlEncoder.kt$UrlEncoder$0x80
+ MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF
+ MagicNumber:UrlEncoder.kt$UrlEncoder$16
+ MagicNumber:UrlEncoder.kt$UrlEncoder$3
+ MagicNumber:UrlEncoder.kt$UrlEncoder$4
+ NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String
+
diff --git a/lib/pom.xml b/lib/pom.xml
index 7e4dcc3..7ef0538 100644
--- a/lib/pom.xml
+++ b/lib/pom.xml
@@ -9,8 +9,8 @@
net.thauvin.erik
urlencoder
1.0.1-SNAPSHOT
- UrlEncoder
- A simple library to encode/decode URL parameters
+ UrlEncoder for Kotlin
+ A simple defensive library to encode/decode URL components
https://github.com/ethauvin/urlencoder
diff --git a/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt b/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt
index 4a28780..d39b354 100644
--- a/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt
+++ b/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt
@@ -22,22 +22,31 @@ import java.util.BitSet
import kotlin.system.exitProcess
/**
- * URL parameters encoding and decoding.
+ * Most defensive approach to URL encoding and decoding.
*
- * - Rules determined by [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13),
+ * - Rules determined by combining the unreserved character set from
+ * [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from
+ * [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
*
- * @author Geert Bevin (gbevin[remove] at uwyn dot com)
+ * - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved
+ * set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the
+ * potential to be misunderstood.
+ *
+ * - This library encodes with rules that will be decoded correctly in either case.
+ *
+ * @author Geert Bevin (gbevin(remove) at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
- */
+ **/
object UrlEncoder {
private val hexDigits = "0123456789ABCDEF".toCharArray()
internal val usage =
"Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() +
- "Encode and decode URL parameters." + System.lineSeparator() + " -e encode (default) " +
+ "Encode and decode URL components defensively." + System.lineSeparator() + " -e encode (default) " +
System.lineSeparator() + " -d decode"
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
- private val unreservedChars = BitSet('~'.code + 1).apply {
+ // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
+ private val unreservedChars = BitSet('z'.code + 1).apply {
set('-')
set('.')
for (c in '0'..'9') {
@@ -50,14 +59,14 @@ object UrlEncoder {
for (c in 'a'.code..'z'.code) {
set(c)
}
- set('~')
}
private fun BitSet.set(c: Char) = this.set(c.code)
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
+ // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private fun Char.isUnreserved(): Boolean {
- return this <= '~' && unreservedChars.get(code)
+ return this <= 'z' && unreservedChars.get(code)
}
private fun StringBuilder.appendEncodedDigit(digit: Int) {
@@ -130,7 +139,7 @@ object UrlEncoder {
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
*
- * - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
+ * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@JvmStatic
fun encode(source: String, allow: String): String {
@@ -177,7 +186,7 @@ object UrlEncoder {
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
*
- * - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact.
+ * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@JvmStatic
fun encode(source: String, vararg allow: Char): String {
@@ -187,7 +196,7 @@ object UrlEncoder {
/**
* Encodes and decodes URLs from the command line.
*
- * - `kotlin -cp urlencoder-*.jar net.thauvin.erik.urlencoder.UrlEncoder`
+ * - `java -jar urlencoder-*all.jar `
*/
@JvmStatic
fun main(args: Array) {
@@ -200,7 +209,7 @@ object UrlEncoder {
}
exitProcess(result.status)
} catch (e: IllegalArgumentException) {
- System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}");
+ System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}")
exitProcess(1)
}
}
diff --git a/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt b/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt
index 366d6fc..79658e0 100644
--- a/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt
+++ b/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt
@@ -34,7 +34,7 @@ import org.junit.jupiter.params.provider.ValueSource
import java.util.stream.Stream
class UrlEncoderTest {
- private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"
+ private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
companion object {
@JvmStatic
@@ -45,7 +45,7 @@ class UrlEncoderTest {
arguments("a test &", "a%20test%20%26"),
arguments(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
- "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
+ "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
),
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
arguments(