diff --git a/README.md b/README.md index 58d3bd0..990bfc0 100644 --- a/README.md +++ b/README.md @@ -9,18 +9,27 @@ # URL Encoder for Kotlin -A simple library to encode/decode URL parameters. +A simple defensive library to encode/decode URL components. This library was adapted from the [RIFE2 Web Application Framework](https://rife2.com). A pure Java version can also be found at [https://github.com/gbevin/urlencoder](https://github.com/gbevin/urlencoder). +The rules are determined by combining the unreserved character set from +[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the +percent-encode set from +[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set). -For decades we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) because of its improper naming. It is actually intended to encode HTML form parameters, not URLs, causing the wrong escape sequences to be used. +Both specs above support percent decoding of two hexadecimal digits to a +binary octet, however their unreserved set of characters differs and +`application/x-www-form-urlencoded` adds conversion of space to `+`, +that has the potential to be misunderstood. -Additionally, `java.net.URLEncoder` allocates memory even when no encoding is necessary, significantly impacting performance. This library has a negligible performance impact when the specified string doesn't need to be encoded. +This class encodes with rules that will be decoded correctly in either case. - -Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) also addresses the same issues. +Additionally, this library allocates no memory when encoding isn't needed and +does the work in a single pass without multiple loops. Both of these +optimizations have a significantly beneficial impact on performance of encoding +compared to other solutions like the standard `URLEncoder` in the JDK. ## Examples (TL;DR) @@ -34,6 +43,7 @@ UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81") // -> %# ``` ## Gradle, Maven, etc. + To use with [Gradle](https://gradle.org/), include the following dependency in your build file: ```gradle @@ -47,13 +57,15 @@ dependencies { } ``` -Instructions for using with Maven, Ivy, etc. can be found on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder). +Instructions for using with Maven, Ivy, etc. can be found +on [Maven Central](https://maven-badges.herokuapp.com/maven-central/net.thauvin.erik/urlencoder). ## Standalone usage UrlEncoder can be used on the command line also, both for encoding and decoding. You have two options: + * run it with Gradle * build the jar and launch it with Java diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts index e783c63..701c707 100644 --- a/lib/build.gradle.kts +++ b/lib/build.gradle.kts @@ -21,7 +21,7 @@ plugins { id("signing") } -description = "A simple library to encode/decode URL parameters" +description = "A simple defensive library to encode/decode URL components" group = "net.thauvin.erik" version = "1.0.1-SNAPSHOT" @@ -193,7 +193,7 @@ publishing { artifactId = rootProject.name artifact(javadocJar) pom { - name.set(mavenName) + name.set("$mavenName for Kotlin") description.set(project.description) url.set(mavenUrl) licenses { diff --git a/lib/detekt-baseline.xml b/lib/detekt-baseline.xml index 6817e55..e876ec6 100644 --- a/lib/detekt-baseline.xml +++ b/lib/detekt-baseline.xml @@ -1,13 +1,15 @@ - - - ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption && args.size == 1 - MagicNumber:UrlEncoder.kt$UrlEncoder$0x80 - MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF - MagicNumber:UrlEncoder.kt$UrlEncoder$16 - MagicNumber:UrlEncoder.kt$UrlEncoder$3 - MagicNumber:UrlEncoder.kt$UrlEncoder$4 - NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String - + + + ComplexCondition:UrlEncoder.kt$UrlEncoder$hasOption && args.size == 2 || !hasOption && + args.size == 1 + + MagicNumber:UrlEncoder.kt$UrlEncoder$0x80 + MagicNumber:UrlEncoder.kt$UrlEncoder$0xFF + MagicNumber:UrlEncoder.kt$UrlEncoder$16 + MagicNumber:UrlEncoder.kt$UrlEncoder$3 + MagicNumber:UrlEncoder.kt$UrlEncoder$4 + NestedBlockDepth:UrlEncoder.kt$UrlEncoder$@JvmStatic fun encode(source: String, allow: String): String + diff --git a/lib/pom.xml b/lib/pom.xml index 7e4dcc3..7ef0538 100644 --- a/lib/pom.xml +++ b/lib/pom.xml @@ -9,8 +9,8 @@ net.thauvin.erik urlencoder 1.0.1-SNAPSHOT - UrlEncoder - A simple library to encode/decode URL parameters + UrlEncoder for Kotlin + A simple defensive library to encode/decode URL components https://github.com/ethauvin/urlencoder diff --git a/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt b/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt index 4a28780..d39b354 100644 --- a/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt +++ b/lib/src/main/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt @@ -22,22 +22,31 @@ import java.util.BitSet import kotlin.system.exitProcess /** - * URL parameters encoding and decoding. + * Most defensive approach to URL encoding and decoding. * - * - Rules determined by [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13), + * - Rules determined by combining the unreserved character set from + * [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from + * [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set). * - * @author Geert Bevin (gbevin[remove] at uwyn dot com) + * - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved + * set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the + * potential to be misunderstood. + * + * - This library encodes with rules that will be decoded correctly in either case. + * + * @author Geert Bevin (gbevin(remove) at uwyn dot com) * @author Erik C. Thauvin (erik@thauvin.net) - */ + **/ object UrlEncoder { private val hexDigits = "0123456789ABCDEF".toCharArray() internal val usage = "Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() + - "Encode and decode URL parameters." + System.lineSeparator() + " -e encode (default) " + + "Encode and decode URL components defensively." + System.lineSeparator() + " -e encode (default) " + System.lineSeparator() + " -d decode" // see https://www.rfc-editor.org/rfc/rfc3986#page-13 - private val unreservedChars = BitSet('~'.code + 1).apply { + // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set + private val unreservedChars = BitSet('z'.code + 1).apply { set('-') set('.') for (c in '0'..'9') { @@ -50,14 +59,14 @@ object UrlEncoder { for (c in 'a'.code..'z'.code) { set(c) } - set('~') } private fun BitSet.set(c: Char) = this.set(c.code) // see https://www.rfc-editor.org/rfc/rfc3986#page-13 + // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set private fun Char.isUnreserved(): Boolean { - return this <= '~' && unreservedChars.get(code) + return this <= 'z' && unreservedChars.get(code) } private fun StringBuilder.appendEncodedDigit(digit: Int) { @@ -130,7 +139,7 @@ object UrlEncoder { * Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8 * encoding. * - * - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact. + * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact. */ @JvmStatic fun encode(source: String, allow: String): String { @@ -177,7 +186,7 @@ object UrlEncoder { * Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8 * encoding. * - * - Letters, numbers, unreserved (`_-!.~'()*`) and allowed characters are left intact. + * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact. */ @JvmStatic fun encode(source: String, vararg allow: Char): String { @@ -187,7 +196,7 @@ object UrlEncoder { /** * Encodes and decodes URLs from the command line. * - * - `kotlin -cp urlencoder-*.jar net.thauvin.erik.urlencoder.UrlEncoder` + * - `java -jar urlencoder-*all.jar ` */ @JvmStatic fun main(args: Array) { @@ -200,7 +209,7 @@ object UrlEncoder { } exitProcess(result.status) } catch (e: IllegalArgumentException) { - System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}"); + System.err.println("${UrlEncoder::class.java.simpleName}: ${e.message}") exitProcess(1) } } diff --git a/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt b/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt index 366d6fc..79658e0 100644 --- a/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt +++ b/lib/src/test/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt @@ -34,7 +34,7 @@ import org.junit.jupiter.params.provider.ValueSource import java.util.stream.Stream class UrlEncoderTest { - private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~" + private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_." companion object { @JvmStatic @@ -45,7 +45,7 @@ class UrlEncoderTest { arguments("a test &", "a%20test%20%26"), arguments( "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", - "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D" + "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D" ), arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"), arguments(