From 8fcd629bce4e3812422a7da879ce71a214e72473 Mon Sep 17 00:00:00 2001 From: Adam <897017+aSemy@users.noreply.github.com> Date: Tue, 5 Sep 2023 22:55:29 +0200 Subject: [PATCH] Convert urlencoder lib to Kotlin Multiplatform (#10) * convert UrlEncoderUtil to be multiplatform compatible * convert lib tests to KMP * convert UrlEncoderTest to commonTest (but since there's only a JVM target there's no changes), and also convert mutable test data with read-only types. * Update copyright --------- Co-authored-by: Erik C. Thauvin --- .idea/copyright/Apache_License.xml | 2 +- README.md | 4 +- build.gradle.kts | 17 --- buildSrc/gradle.lockfile | 66 ---------- urlencoder-app/build.gradle.kts | 21 +--- .../net/thauvin/erik/urlencoder/UrlEncoder.kt | 12 +- .../thauvin/erik/urlencoder/UrlEncoderTest.kt | 25 ++-- urlencoder-lib/build.gradle.kts | 21 +--- .../net/thauvin/erik/urlencoder/Character.kt | 73 +++++++++++ .../thauvin/erik/urlencoder/UrlEncoderUtil.kt | 114 +++++++++++++----- .../erik/urlencoder/UrlEncoderUtilTest.kt | 47 +++----- 11 files changed, 196 insertions(+), 206 deletions(-) delete mode 100644 buildSrc/gradle.lockfile rename urlencoder-app/src/{jvmMain => commonMain}/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt (91%) rename urlencoder-app/src/{jvmTest => commonTest}/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt (80%) create mode 100644 urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/Character.kt rename urlencoder-lib/src/{jvmMain => commonMain}/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt (62%) rename urlencoder-lib/src/{jvmTest => commonTest}/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt (66%) diff --git a/.idea/copyright/Apache_License.xml b/.idea/copyright/Apache_License.xml index 68484aa..a5730ff 100644 --- a/.idea/copyright/Apache_License.xml +++ b/.idea/copyright/Apache_License.xml @@ -1,6 +1,6 @@ - \ No newline at end of file diff --git a/README.md b/README.md index 64911dc..77e9974 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ repositories { } dependencies { - implementation("net.thauvin.erik:urlencoder-lib:1.3.0") + implementation("net.thauvin.erik:urlencoder-lib-jvm:1.4.0") } ``` @@ -73,7 +73,7 @@ to the artifact URL. net.thauvin.erik urlencoder-lib-jvm - 1.3.0 + 1.4.0 ``` diff --git a/build.gradle.kts b/build.gradle.kts index c710ce6..ab1a701 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,20 +1,3 @@ -/* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - plugins { buildsrc.conventions.base id("org.jetbrains.kotlinx.kover") diff --git a/buildSrc/gradle.lockfile b/buildSrc/gradle.lockfile deleted file mode 100644 index ecf6cb4..0000000 --- a/buildSrc/gradle.lockfile +++ /dev/null @@ -1,66 +0,0 @@ -# This is a Gradle generated file for dependency locking. -# Manual edits can break the build and are not advised. -# This file is expected to be part of source control. -com.fasterxml.jackson.core:jackson-annotations:2.12.7=runtimeClasspath -com.fasterxml.jackson.core:jackson-core:2.12.7=runtimeClasspath -com.fasterxml.jackson.core:jackson-databind:2.12.7.1=runtimeClasspath -com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.12.7=runtimeClasspath -com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7=runtimeClasspath -com.fasterxml.jackson.module:jackson-module-kotlin:2.12.7=runtimeClasspath -com.fasterxml.jackson:jackson-bom:2.12.7=runtimeClasspath -com.fasterxml.woodstox:woodstox-core:6.2.4=runtimeClasspath -com.github.ben-manes:gradle-versions-plugin:0.47.0=compileClasspath,runtimeClasspath -com.squareup.moshi:moshi-kotlin:1.12.0=runtimeClasspath -com.squareup.moshi:moshi:1.12.0=runtimeClasspath -com.squareup.okhttp3:okhttp:4.11.0=runtimeClasspath -com.squareup.okio:okio-jvm:3.2.0=runtimeClasspath -com.squareup.okio:okio:3.2.0=runtimeClasspath -com.thoughtworks.xstream:xstream:1.4.20=runtimeClasspath -io.github.x-stream:mxparser:1.2.2=runtimeClasspath -io.gitlab.arturbosch.detekt:detekt-gradle-plugin:1.23.1=compileClasspath,runtimeClasspath -jakarta.activation:jakarta.activation-api:1.2.1=runtimeClasspath -jakarta.xml.bind:jakarta.xml.bind-api:2.3.2=runtimeClasspath -org.codehaus.woodstox:stax2-api:4.2.1=runtimeClasspath -org.jetbrains.dokka:dokka-core:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.dokka:dokka-gradle-plugin:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.intellij.deps:trove4j:1.0.20200330=runtimeClasspath -org.jetbrains.kotlin:kotlin-android-extensions:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-build-tools-api:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-compiler-embeddable:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-compiler-runner:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-daemon-client:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-daemon-embeddable:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin-annotations:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin-api:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin-idea-proto:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin-idea:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin-model:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugin:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-gradle-plugins-bom:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-klib-commonizer-api:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-native-utils:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-project-model:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-reflect:1.8.20=runtimeClasspath -org.jetbrains.kotlin:kotlin-reflect:1.9.0=compileClasspath -org.jetbrains.kotlin:kotlin-scripting-common:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-scripting-compiler-embeddable:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-scripting-compiler-impl-embeddable:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-scripting-jvm:1.9.0=runtimeClasspath -org.jetbrains.kotlin:kotlin-stdlib-common:1.8.21=runtimeClasspath -org.jetbrains.kotlin:kotlin-stdlib-common:1.9.0=compileClasspath -org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.8.21=runtimeClasspath -org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.9.0=compileClasspath -org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.8.21=runtimeClasspath -org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.9.0=compileClasspath -org.jetbrains.kotlin:kotlin-stdlib:1.8.21=runtimeClasspath -org.jetbrains.kotlin:kotlin-stdlib:1.9.0=compileClasspath -org.jetbrains.kotlin:kotlin-tooling-core:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-util-io:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlin:kotlin-util-klib:1.9.0=compileClasspath,runtimeClasspath -org.jetbrains.kotlinx:kotlinx-coroutines-bom:1.6.3=runtimeClasspath -org.jetbrains.kotlinx:kotlinx-coroutines-core-jvm:1.6.3=runtimeClasspath -org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.3=runtimeClasspath -org.jetbrains.kotlinx:kover-gradle-plugin:0.7.3=compileClasspath,runtimeClasspath -org.jetbrains:annotations:13.0=compileClasspath,runtimeClasspath -xmlpull:xmlpull:1.1.3.1=runtimeClasspath -empty= diff --git a/urlencoder-app/build.gradle.kts b/urlencoder-app/build.gradle.kts index ae96126..c5fe1df 100644 --- a/urlencoder-app/build.gradle.kts +++ b/urlencoder-app/build.gradle.kts @@ -1,26 +1,9 @@ -/* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - import org.jetbrains.dokka.gradle.DokkaTask plugins { buildsrc.conventions.lang.`kotlin-multiplatform-jvm` - buildsrc.conventions.lang.`kotlin-multiplatform-js` - buildsrc.conventions.lang.`kotlin-multiplatform-native` +// buildsrc.conventions.lang.`kotlin-multiplatform-js` +// buildsrc.conventions.lang.`kotlin-multiplatform-native` buildsrc.conventions.publishing id("application") id("com.github.ben-manes.versions") diff --git a/urlencoder-app/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt b/urlencoder-app/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt similarity index 91% rename from urlencoder-app/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt rename to urlencoder-app/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt index 902bc45..df89c28 100644 --- a/urlencoder-app/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt +++ b/urlencoder-app/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoder.kt @@ -1,12 +1,11 @@ /* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) + * Copyright 2001-2023 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -14,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package net.thauvin.erik.urlencoder import kotlin.system.exitProcess @@ -38,9 +38,9 @@ object UrlEncoder { internal val usage = "Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() + - "Encode and decode URL components defensively." + System.lineSeparator() + - " -e encode (default) " + System.lineSeparator() + - " -d decode" + "Encode and decode URL components defensively." + System.lineSeparator() + + " -e encode (default) " + System.lineSeparator() + + " -d decode" /** * Encodes and decodes URLs from the command line. diff --git a/urlencoder-app/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt b/urlencoder-app/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt similarity index 80% rename from urlencoder-app/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt rename to urlencoder-app/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt index 17cc7b2..1b159ed 100644 --- a/urlencoder-app/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt +++ b/urlencoder-app/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderTest.kt @@ -1,12 +1,11 @@ /* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) + * Copyright 2001-2023 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -25,20 +24,14 @@ import kotlin.test.assertFailsWith class UrlEncoderTest { companion object { - @JvmStatic - var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1") + val invalid = listOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1") - @JvmStatic - var validMap = arrayOf( - Pair("a test &", "a%20test%20%26"), - Pair( - "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", - "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D" - ), - Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"), - Pair( - "\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80" - ) + val validMap = listOf( + "a test &" to "a%20test%20%26", + "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=" to + "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D", + "%#okékÉȢ smile!😁" to "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81", + "\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00" to "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80", ) } diff --git a/urlencoder-lib/build.gradle.kts b/urlencoder-lib/build.gradle.kts index 5bbec3d..f922e29 100644 --- a/urlencoder-lib/build.gradle.kts +++ b/urlencoder-lib/build.gradle.kts @@ -1,20 +1,3 @@ -/* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - import org.jetbrains.dokka.gradle.DokkaTask plugins { @@ -31,10 +14,8 @@ val deployDir = project.layout.projectDirectory.dir("deploy") kotlin { sourceSets { - jvmTest { + commonTest { dependencies { - //implementation("com.willowtreeapps.assertk:assertk-jvm:0.25") - //implementation("org.junit.jupiter:junit-jupiter:5.9.1") implementation(kotlin("test")) } } diff --git a/urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/Character.kt b/urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/Character.kt new file mode 100644 index 0000000..1ea0db9 --- /dev/null +++ b/urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/Character.kt @@ -0,0 +1,73 @@ +/* + * Copyright 2001-2023 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package net.thauvin.erik.urlencoder + +import kotlin.Char.Companion.MIN_HIGH_SURROGATE +import kotlin.Char.Companion.MIN_LOW_SURROGATE + +/** + * Kotlin Multiplatform equivalent for `java.lang.Character` + * + * @author aSemy + */ + +internal object Character { + + /** + * See https://www.tutorialspoint.com/java/lang/character_issupplementarycodepoint.htm + * + * Determines whether the specified character (Unicode code point) is in the supplementary character range. + * The supplementary character range in the Unicode system falls in `U+10000` to `U+10FFFF`. + * + * The Unicode code points are divided into two categories: + * Basic Multilingual Plane (BMP) code points and Supplementary code points. + * BMP code points are present in the range U+0000 to U+FFFF. + * + * Whereas, supplementary characters are rare characters that are not represented using the original 16-bit Unicode. + * For example, these type of characters are used in Chinese or Japanese scripts and hence, are required by the + * applications used in these countries. + * + * @returns `true` if the specified code point falls in the range of supplementary code points + * ([MIN_SUPPLEMENTARY_CODE_POINT] to [MAX_CODE_POINT], inclusive), `false` otherwise. + */ + internal fun isSupplementaryCodePoint(codePoint: Int): Boolean = + codePoint in MIN_SUPPLEMENTARY_CODE_POINT..MAX_CODE_POINT + + internal fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int = + (highSurrogate.code shl 10) + lowSurrogate.code + SURROGATE_DECODE_OFFSET + + /** Basic Multilingual Plane (BMP) */ + internal fun isBmpCodePoint(codePoint: Int): Boolean = codePoint ushr 16 == 0 + + internal fun highSurrogateOf(codePoint: Int): Char = + ((codePoint ushr 10) + HIGH_SURROGATE_ENCODE_OFFSET.code).toChar() + + internal fun lowSurrogateOf(codePoint: Int): Char = + ((codePoint and 0x3FF) + MIN_LOW_SURROGATE.code).toChar() + +// private const val MIN_CODE_POINT: Int = 0x000000 + private const val MAX_CODE_POINT: Int = 0x10FFFF + + private const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000 + + private const val SURROGATE_DECODE_OFFSET: Int = + MIN_SUPPLEMENTARY_CODE_POINT - + (MIN_HIGH_SURROGATE.code shl 10) - + MIN_LOW_SURROGATE.code + + private const val HIGH_SURROGATE_ENCODE_OFFSET: Char = MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10) +} diff --git a/urlencoder-lib/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt b/urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt similarity index 62% rename from urlencoder-lib/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt rename to urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt index 7994cb1..0f45bc6 100644 --- a/urlencoder-lib/src/jvmMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt +++ b/urlencoder-lib/src/commonMain/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtil.kt @@ -1,12 +1,11 @@ /* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) + * Copyright 2001-2023 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,8 +16,8 @@ package net.thauvin.erik.urlencoder -import java.nio.charset.StandardCharsets -import java.util.BitSet +import kotlin.jvm.JvmOverloads +import kotlin.jvm.JvmStatic /** * Most defensive approach to URL encoding and decoding. @@ -39,20 +38,27 @@ import java.util.BitSet object UrlEncoderUtil { private val hexDigits = "0123456789ABCDEF".toCharArray() - // see https://www.rfc-editor.org/rfc/rfc3986#page-13 - // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set - private val unreservedChars = BitSet('z'.code + 1).apply { - set('-'.code) - set('.'.code) - for (c in '0'.code..'9'.code) { - set(c) + /** + * A [BooleanArray] with entries for the [character codes][Char.code] of + * + * * `0-9`, + * * `A-Z`, + * * `a-z` + * + * set to `true`. + */ + private val unreservedChars = BooleanArray('z'.code + 1).apply { + set('-'.code, true) + set('.'.code, true) + set('_'.code, true) + for (c in '0'..'9') { + set(c.code, true) } - for (c in 'A'.code..'Z'.code) { - set(c) + for (c in 'A'..'Z') { + set(c.code, true) } - set('_'.code) - for (c in 'a'.code..'z'.code) { - set(c) + for (c in 'a'..'z') { + set(c.code, true) } } @@ -84,14 +90,13 @@ object UrlEncoderUtil { } val length = source.length - val out: StringBuilder by lazy { StringBuilder(length) } - var ch: Char + val out = StringBuilder(length) var bytesBuffer: ByteArray? = null var bytesPos = 0 var i = 0 var started = false while (i < length) { - ch = source[i] + val ch = source[i] if (ch == '%') { if (!started) { out.append(source, 0, i) @@ -103,7 +108,7 @@ object UrlEncoderUtil { bytesBuffer = ByteArray((length - i) / 3) } i++ - require(length >= i + 2) { "Illegal escape sequence" } + require(length >= i + 2) { "Incomplete trailing escape ($ch) pattern" } try { val v = source.substring(i, i + 2).toInt(16) require(v in 0..0xFF) { "Illegal escape value" } @@ -114,7 +119,7 @@ object UrlEncoderUtil { } } else { if (bytesBuffer != null) { - out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8)) + out.append(bytesBuffer.decodeToString(0, bytesPos)) started = true bytesBuffer = null bytesPos = 0 @@ -133,15 +138,15 @@ object UrlEncoderUtil { } if (bytesBuffer != null) { - out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8)) + out.append(bytesBuffer.decodeToString(0, bytesPos)) } return if (!started) source else out.toString() } /** - * Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8 - * encoding. + * Transforms a provided [String] object into a new string, containing only valid URL + * characters in the UTF-8 encoding. * * - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact. */ @@ -152,11 +157,10 @@ object UrlEncoderUtil { return source } var out: StringBuilder? = null - var ch: Char var i = 0 while (i < source.length) { - ch = source[i] - if (ch.isUnreserved() || allow.indexOf(ch) != -1) { + val ch = source[i] + if (ch.isUnreserved() || ch in allow) { out?.append(ch) i++ } else { @@ -174,16 +178,18 @@ object UrlEncoderUtil { } i++ } + Character.isBmpCodePoint(cp) -> { - for (b in ch.toString().toByteArray(StandardCharsets.UTF_8)) { + for (b in ch.toString().encodeToByteArray()) { out.appendEncodedByte(b.toInt()) } i++ } + Character.isSupplementaryCodePoint(cp) -> { - val high = Character.highSurrogate(cp) - val low = Character.lowSurrogate(cp) - for (b in charArrayOf(high, low).concatToString().toByteArray(StandardCharsets.UTF_8)) { + val high = Character.highSurrogateOf(cp) + val low = Character.lowSurrogateOf(cp) + for (b in charArrayOf(high, low).concatToString().encodeToByteArray()) { out.appendEncodedByte(b.toInt()) } i += 2 @@ -194,4 +200,48 @@ object UrlEncoderUtil { return out?.toString() ?: source } + + /** + * Returns the Unicode code point at the specified index. + * + * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character + * sequence. + * + * If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented + * using a single `Char` and this method will behave exactly like [CharSequence.get]. + * Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate + * range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single + * code point in one of the supplementary planes. This method will do the necessary decoding and return the value of + * that single code point. + * + * In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`, + * this method will return the surrogate code point itself, behaving like [CharSequence.get]. + * + * If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException]. + * + * ```kotlin + * // Text containing code points outside the BMP (encoded as a surrogate pairs) + * val text = "\uD83E\uDD95\uD83E\uDD96" + * + * var index = 0 + * while (index < text.length) { + * val codePoint = text.codePointAt(index) + * // (Do something with codePoint...) + * index += CodePoints.charCount(codePoint) + * } + * ``` + */ + private fun CharSequence.codePointAt(index: Int): Int { + if (index !in indices) throw IndexOutOfBoundsException("index $index was not in range $indices") + + val firstChar = this[index] + if (firstChar.isHighSurrogate()) { + val nextChar = getOrNull(index + 1) + if (nextChar?.isLowSurrogate() == true) { + return Character.toCodePoint(firstChar, nextChar) + } + } + + return firstChar.code + } } diff --git a/urlencoder-lib/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt b/urlencoder-lib/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt similarity index 66% rename from urlencoder-lib/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt rename to urlencoder-lib/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt index 7591549..f6b67de 100644 --- a/urlencoder-lib/src/jvmTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt +++ b/urlencoder-lib/src/commonTest/kotlin/net/thauvin/erik/urlencoder/UrlEncoderUtilTest.kt @@ -1,12 +1,11 @@ /* - * Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com) - * Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net) + * Copyright 2001-2023 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -27,32 +26,26 @@ class UrlEncoderUtilTest { private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_." companion object { - @JvmStatic - var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1") + val invalid = listOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1") - @JvmStatic - var validMap = arrayOf( - Pair("a test &", "a%20test%20%26"), - Pair( - "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", - "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D" - ), - Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"), - Pair( - "\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80" - ) + val validMap = listOf( + "a test &" to "a%20test%20%26", + "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=" to + "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D", + "%#okékÉȢ smile!😁" to "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81", + "\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00" to "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80", ) } @Test - fun `Decode URL`() { + fun decodeURL() { for (m in validMap) { assertEquals(m.first, decode(m.second)) } } @Test - fun `Decode with Exception`() { + fun decodeWithException() { for (source in invalid) { assertFailsWith( message = "decode($source)", @@ -62,14 +55,14 @@ class UrlEncoderUtilTest { } @Test - fun `Decode when None needed`() { + fun decodeWhenNoneNeeded() { assertSame(same, decode(same)) assertEquals("decode('')", decode(""), "") assertEquals("decode(' ')", decode(" "), " ") } @Test - fun `Decode with Plus to Space`() { + fun decodeWithPlusToSpace() { assertEquals("foo bar", decode("foo+bar", true)) assertEquals("foo bar foo", decode("foo+bar++foo", true)) assertEquals("foo bar foo", decode("foo+%20bar%20+foo", true)) @@ -78,34 +71,34 @@ class UrlEncoderUtilTest { } @Test - fun `Encode URL`() { + fun encodeURL() { for (m in validMap) { assertEquals(m.second, encode(m.first)) } } @Test - fun `Encode Empty or Blank`() { + fun encodeEmptyOrBlank() { assertTrue(encode("", allow = "").isEmpty(), "encode('','')") assertEquals("encode('')", encode(""), "") assertEquals("encode(' ')", encode(" "), "%20") } @Test - fun `Encode when None needed`() { + fun encodeWhenNoneNeeded() { assertSame(encode(same), same) assertSame("with empty allow", encode(same, allow = ""), same) } @Test - fun `Encode with Allow`() { - assertEquals("encode(x, =?)","?test=a%20test", encode("?test=a test", allow = "=?")) + fun encodeWithAllow() { + assertEquals("encode(x, =?)", "?test=a%20test", encode("?test=a test", allow = "=?")) assertEquals("encode(aaa, a)", "aaa", encode("aaa", "a")) - assertEquals("encode(' ')", " ", encode(" ", " ") ) + assertEquals("encode(' ')", " ", encode(" ", " ")) } @Test - fun `Encode with Space to Plus`() { + fun encodeWithSpaceToPlus() { assertEquals("foo+bar", encode("foo bar", spaceToPlus = true)) assertEquals("foo+bar++foo", encode("foo bar foo", spaceToPlus = true)) assertEquals("foo bar", encode("foo bar", " ", true))