restructure project to Kotlin Multiplatform

- custom POM location removed (there are now two POMs, one for the 'Kotlin Multiplatform' publication, and another for Kotlin/JVM, and more are on the way, which would lead to a cluttered build dir) - renamed the directories (the directory name is how Kotlin Multiplatform chooses the published artifact ID, and there's no an easier way to change it.) - updated README examples, and link to `-jvm` variant guide
2023-06-06 00:04:26 +02:00 · 2023-06-06 00:04:26 +02:00 · dce203845e
commit dce203845e
parent 4df6d3f599
16 changed files with 59 additions and 243 deletions
--- a/urlencoder-lib/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt
+++ b/urlencoder-lib/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt
@ -0,0 +1,197 @@
+/*
+ * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
+ * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package net.thauvin.erik.urlencoder
+
+import java.nio.charset.StandardCharsets
+import java.util.BitSet
+
+/**
+ * Most defensive approach to URL encoding and decoding.
+ *
+ * - Rules determined by combining the unreserved character set from
+ * [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from
+ * [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
+ *
+ * - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved
+ * set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the
+ * potential to be misunderstood.
+ *
+ * - This library encodes with rules that will be decoded correctly in either case.
+ *
+ * @author Geert Bevin (gbevin(remove) at uwyn dot com)
+ * @author Erik C. Thauvin (erik@thauvin.net)
+ **/
+object UrlEncoderUtil {
+    private val hexDigits = "0123456789ABCDEF".toCharArray()
+
+    // see https://www.rfc-editor.org/rfc/rfc3986#page-13
+    // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
+    private val unreservedChars = BitSet('z'.code + 1).apply {
+        set('-'.code)
+        set('.'.code)
+        for (c in '0'.code..'9'.code) {
+            set(c)
+        }
+        for (c in 'A'.code..'Z'.code) {
+            set(c)
+        }
+        set('_'.code)
+        for (c in 'a'.code..'z'.code) {
+            set(c)
+        }
+    }
+
+    // see https://www.rfc-editor.org/rfc/rfc3986#page-13
+    // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
+    private fun Char.isUnreserved(): Boolean {
+        return this <= 'z' && unreservedChars[code]
+    }
+
+    private fun StringBuilder.appendEncodedDigit(digit: Int) {
+        this.append(hexDigits[digit and 0x0F])
+    }
+
+    private fun StringBuilder.appendEncodedByte(ch: Int) {
+        this.append("%")
+        this.appendEncodedDigit(ch shr 4)
+        this.appendEncodedDigit(ch)
+    }
+
+    /**
+     * Transforms a provided [String] into a new string, containing decoded URL characters in the UTF-8
+     * encoding.
+     */
+    @JvmStatic
+    @JvmOverloads
+    fun decode(source: String, plusToSpace: Boolean = false): String {
+        if (source.isEmpty()) {
+            return source
+        }
+
+        val length = source.length
+        val out: StringBuilder by lazy { StringBuilder(length) }
+        var ch: Char
+        var bytesBuffer: ByteArray? = null
+        var bytesPos = 0
+        var i = 0
+        var started = false
+        while (i < length) {
+            ch = source[i]
+            if (ch == '%') {
+                if (!started) {
+                    out.append(source, 0, i)
+                    started = true
+                }
+                if (bytesBuffer == null) {
+                    // the remaining characters divided by the length of the encoding format %xx, is the maximum number
+                    // of bytes that can be extracted
+                    bytesBuffer = ByteArray((length - i) / 3)
+                }
+                i++
+                require(length >= i + 2) { "Illegal escape sequence" }
+                try {
+                    val v = source.substring(i, i + 2).toInt(16)
+                    require(v in 0..0xFF) { "Illegal escape value" }
+                    bytesBuffer[bytesPos++] = v.toByte()
+                    i += 2
+                } catch (e: NumberFormatException) {
+                    throw IllegalArgumentException("Illegal characters in escape sequence: $e.message", e)
+                }
+            } else {
+                if (bytesBuffer != null) {
+                    out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
+                    started = true
+                    bytesBuffer = null
+                    bytesPos = 0
+                }
+                if (plusToSpace && ch == '+') {
+                    if (!started) {
+                        out.append(source, 0, i)
+                        started = true
+                    }
+                    out.append(" ")
+                } else if (started) {
+                    out.append(ch)
+                }
+                i++
+            }
+        }
+
+        if (bytesBuffer != null) {
+            out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
+        }
+
+        return if (!started) source else out.toString()
+    }
+
+    /**
+     * Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
+     * encoding.
+     *
+     * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
+     */
+    @JvmStatic
+    @JvmOverloads
+    fun encode(source: String, allow: String = "", spaceToPlus: Boolean = false): String {
+        if (source.isEmpty()) {
+            return source
+        }
+        var out: StringBuilder? = null
+        var ch: Char
+        var i = 0
+        while (i < source.length) {
+            ch = source[i]
+            if (ch.isUnreserved() || allow.indexOf(ch) != -1) {
+                out?.append(ch)
+                i++
+            } else {
+                if (out == null) {
+                    out = StringBuilder(source.length)
+                    out.append(source, 0, i)
+                }
+                val cp = source.codePointAt(i)
+                when {
+                    cp < 0x80 -> {
+                        if (spaceToPlus && ch == ' ') {
+                            out.append('+')
+                        } else {
+                            out.appendEncodedByte(cp)
+                        }
+                        i++
+                    }
+                    Character.isBmpCodePoint(cp) -> {
+                        for (b in ch.toString().toByteArray(StandardCharsets.UTF_8)) {
+                            out.appendEncodedByte(b.toInt())
+                        }
+                        i++
+                    }
+                    Character.isSupplementaryCodePoint(cp) -> {
+                        val high = Character.highSurrogate(cp)
+                        val low = Character.lowSurrogate(cp)
+                        for (b in charArrayOf(high, low).concatToString().toByteArray(StandardCharsets.UTF_8)) {
+                            out.appendEncodedByte(b.toInt())
+                        }
+                        i += 2
+                    }
+                }
+            }
+        }
+
+        return out?.toString() ?: source
+    }
+}
--- a/urlencoder-lib/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt
+++ b/urlencoder-lib/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt
@ -0,0 +1,113 @@
+/*
+ * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
+ * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package net.thauvin.erik.urlencoder
+
+import net.thauvin.erik.urlencoder.UrlEncoderUtil.decode
+import net.thauvin.erik.urlencoder.UrlEncoderUtil.encode
+import kotlin.test.*
+import kotlin.test.DefaultAsserter.assertEquals
+import kotlin.test.DefaultAsserter.assertSame
+
+class UrlEncoderUtilTest {
+    private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
+
+    companion object {
+        @JvmStatic
+        var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
+
+        @JvmStatic
+        var validMap = arrayOf(
+            Pair("a test &", "a%20test%20%26"),
+            Pair(
+                "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
+                "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
+            ),
+            Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
+            Pair(
+                "\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80"
+            )
+        )
+    }
+
+    @Test
+    fun `Decode URL`() {
+        for (m in validMap) {
+            assertEquals(m.first, decode(m.second))
+        }
+    }
+
+    @Test
+    fun `Decode with Exception`() {
+        for (source in invalid) {
+            assertFailsWith<IllegalArgumentException>(
+                message = "decode($source)",
+                block = { decode(source) }
+            )
+        }
+    }
+
+    @Test
+    fun `Decode when None needed`() {
+        assertSame(same, decode(same))
+        assertEquals("decode('')", decode(""), "")
+        assertEquals("decode(' ')", decode(" "), " ")
+    }
+
+    @Test
+    fun `Decode with Plus to Space`() {
+        assertEquals("foo bar", decode("foo+bar", true))
+        assertEquals("foo bar  foo", decode("foo+bar++foo", true))
+        assertEquals("foo  bar  foo", decode("foo+%20bar%20+foo", true))
+        assertEquals("foo + bar", decode("foo+%2B+bar", plusToSpace = true))
+        assertEquals("foo+bar", decode("foo%2Bbar", plusToSpace = true))
+    }
+
+    @Test
+    fun `Encode URL`() {
+        for (m in validMap) {
+            assertEquals(m.second, encode(m.first))
+        }
+    }
+
+    @Test
+    fun `Encode Empty or Blank`() {
+        assertTrue(encode("", allow = "").isEmpty(), "encode('','')")
+        assertEquals("encode('')", encode(""), "")
+        assertEquals("encode(' ')", encode(" "), "%20")
+    }
+
+    @Test
+    fun `Encode when None needed`() {
+        assertSame(encode(same), same)
+        assertSame("with empty allow", encode(same, allow = ""), same)
+    }
+
+    @Test
+    fun `Encode with Allow`() {
+        assertEquals("encode(x, =?)","?test=a%20test", encode("?test=a test", allow = "=?"))
+        assertEquals("encode(aaa, a)", "aaa", encode("aaa", "a"))
+        assertEquals("encode(' ')", " ", encode(" ", " ") )
+    }
+
+    @Test
+    fun `Encode with Space to Plus`() {
+        assertEquals("foo+bar", encode("foo bar", spaceToPlus = true))
+        assertEquals("foo+bar++foo", encode("foo bar  foo", spaceToPlus = true))
+        assertEquals("foo bar", encode("foo bar", " ", true))
+    }
+}