restructure project to Kotlin Multiplatform

- custom POM location removed (there are now two POMs, one for the 'Kotlin Multiplatform' publication, and another for Kotlin/JVM, and more are on the way, which would lead to a cluttered build dir)
- renamed the directories (the directory name is how Kotlin Multiplatform chooses the published artifact ID, and there's no an easier way to change it.)
- updated README examples, and link to `-jvm` variant guide
This commit is contained in:
Adam 2023-06-06 00:04:26 +02:00
parent 4df6d3f599
commit dce203845e
16 changed files with 59 additions and 243 deletions

View file

@ -0,0 +1,197 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import java.nio.charset.StandardCharsets
import java.util.BitSet
/**
* Most defensive approach to URL encoding and decoding.
*
* - Rules determined by combining the unreserved character set from
* [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the percent-encode set from
* [application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
*
* - Both specs above support percent decoding of two hexadecimal digits to a binary octet, however their unreserved
* set of characters differs and `application/x-www-form-urlencoded` adds conversion of space to `+`, which has the
* potential to be misunderstood.
*
* - This library encodes with rules that will be decoded correctly in either case.
*
* @author Geert Bevin (gbevin(remove) at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
**/
object UrlEncoderUtil {
private val hexDigits = "0123456789ABCDEF".toCharArray()
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private val unreservedChars = BitSet('z'.code + 1).apply {
set('-'.code)
set('.'.code)
for (c in '0'.code..'9'.code) {
set(c)
}
for (c in 'A'.code..'Z'.code) {
set(c)
}
set('_'.code)
for (c in 'a'.code..'z'.code) {
set(c)
}
}
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private fun Char.isUnreserved(): Boolean {
return this <= 'z' && unreservedChars[code]
}
private fun StringBuilder.appendEncodedDigit(digit: Int) {
this.append(hexDigits[digit and 0x0F])
}
private fun StringBuilder.appendEncodedByte(ch: Int) {
this.append("%")
this.appendEncodedDigit(ch shr 4)
this.appendEncodedDigit(ch)
}
/**
* Transforms a provided [String] into a new string, containing decoded URL characters in the UTF-8
* encoding.
*/
@JvmStatic
@JvmOverloads
fun decode(source: String, plusToSpace: Boolean = false): String {
if (source.isEmpty()) {
return source
}
val length = source.length
val out: StringBuilder by lazy { StringBuilder(length) }
var ch: Char
var bytesBuffer: ByteArray? = null
var bytesPos = 0
var i = 0
var started = false
while (i < length) {
ch = source[i]
if (ch == '%') {
if (!started) {
out.append(source, 0, i)
started = true
}
if (bytesBuffer == null) {
// the remaining characters divided by the length of the encoding format %xx, is the maximum number
// of bytes that can be extracted
bytesBuffer = ByteArray((length - i) / 3)
}
i++
require(length >= i + 2) { "Illegal escape sequence" }
try {
val v = source.substring(i, i + 2).toInt(16)
require(v in 0..0xFF) { "Illegal escape value" }
bytesBuffer[bytesPos++] = v.toByte()
i += 2
} catch (e: NumberFormatException) {
throw IllegalArgumentException("Illegal characters in escape sequence: $e.message", e)
}
} else {
if (bytesBuffer != null) {
out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
started = true
bytesBuffer = null
bytesPos = 0
}
if (plusToSpace && ch == '+') {
if (!started) {
out.append(source, 0, i)
started = true
}
out.append(" ")
} else if (started) {
out.append(ch)
}
i++
}
}
if (bytesBuffer != null) {
out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
}
return if (!started) source else out.toString()
}
/**
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
*
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@JvmStatic
@JvmOverloads
fun encode(source: String, allow: String = "", spaceToPlus: Boolean = false): String {
if (source.isEmpty()) {
return source
}
var out: StringBuilder? = null
var ch: Char
var i = 0
while (i < source.length) {
ch = source[i]
if (ch.isUnreserved() || allow.indexOf(ch) != -1) {
out?.append(ch)
i++
} else {
if (out == null) {
out = StringBuilder(source.length)
out.append(source, 0, i)
}
val cp = source.codePointAt(i)
when {
cp < 0x80 -> {
if (spaceToPlus && ch == ' ') {
out.append('+')
} else {
out.appendEncodedByte(cp)
}
i++
}
Character.isBmpCodePoint(cp) -> {
for (b in ch.toString().toByteArray(StandardCharsets.UTF_8)) {
out.appendEncodedByte(b.toInt())
}
i++
}
Character.isSupplementaryCodePoint(cp) -> {
val high = Character.highSurrogate(cp)
val low = Character.lowSurrogate(cp)
for (b in charArrayOf(high, low).concatToString().toByteArray(StandardCharsets.UTF_8)) {
out.appendEncodedByte(b.toInt())
}
i += 2
}
}
}
}
return out?.toString() ?: source
}
}

View file

@ -0,0 +1,113 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import net.thauvin.erik.urlencoder.UrlEncoderUtil.decode
import net.thauvin.erik.urlencoder.UrlEncoderUtil.encode
import kotlin.test.*
import kotlin.test.DefaultAsserter.assertEquals
import kotlin.test.DefaultAsserter.assertSame
class UrlEncoderUtilTest {
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
companion object {
@JvmStatic
var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
@JvmStatic
var validMap = arrayOf(
Pair("a test &", "a%20test%20%26"),
Pair(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
),
Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
Pair(
"\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80"
)
)
}
@Test
fun `Decode URL`() {
for (m in validMap) {
assertEquals(m.first, decode(m.second))
}
}
@Test
fun `Decode with Exception`() {
for (source in invalid) {
assertFailsWith<IllegalArgumentException>(
message = "decode($source)",
block = { decode(source) }
)
}
}
@Test
fun `Decode when None needed`() {
assertSame(same, decode(same))
assertEquals("decode('')", decode(""), "")
assertEquals("decode(' ')", decode(" "), " ")
}
@Test
fun `Decode with Plus to Space`() {
assertEquals("foo bar", decode("foo+bar", true))
assertEquals("foo bar foo", decode("foo+bar++foo", true))
assertEquals("foo bar foo", decode("foo+%20bar%20+foo", true))
assertEquals("foo + bar", decode("foo+%2B+bar", plusToSpace = true))
assertEquals("foo+bar", decode("foo%2Bbar", plusToSpace = true))
}
@Test
fun `Encode URL`() {
for (m in validMap) {
assertEquals(m.second, encode(m.first))
}
}
@Test
fun `Encode Empty or Blank`() {
assertTrue(encode("", allow = "").isEmpty(), "encode('','')")
assertEquals("encode('')", encode(""), "")
assertEquals("encode(' ')", encode(" "), "%20")
}
@Test
fun `Encode when None needed`() {
assertSame(encode(same), same)
assertSame("with empty allow", encode(same, allow = ""), same)
}
@Test
fun `Encode with Allow`() {
assertEquals("encode(x, =?)","?test=a%20test", encode("?test=a test", allow = "=?"))
assertEquals("encode(aaa, a)", "aaa", encode("aaa", "a"))
assertEquals("encode(' ')", " ", encode(" ", " ") )
}
@Test
fun `Encode with Space to Plus`() {
assertEquals("foo+bar", encode("foo bar", spaceToPlus = true))
assertEquals("foo+bar++foo", encode("foo bar foo", spaceToPlus = true))
assertEquals("foo bar", encode("foo bar", " ", true))
}
}