Convert urlencoder lib to Kotlin Multiplatform (#10)

* convert UrlEncoderUtil to be multiplatform compatible

* convert lib tests to KMP

* convert UrlEncoderTest to commonTest (but since there's only a JVM target there's no changes), and also convert mutable test data with read-only types.

* Update copyright

---------

Co-authored-by: Erik C. Thauvin <erik@thauvin.net>
This commit is contained in:
Adam 2023-09-05 22:55:29 +02:00 committed by GitHub
parent ae060f5bd2
commit 8fcd629bce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 196 additions and 206 deletions

View file

@ -1,6 +1,6 @@
<component name="CopyrightManager">
<copyright>
<option name="notice" value="Copyright 2001-&amp;#36;today.year Geert Bevin (gbevin[remove] at uwyn dot com)&#10;Copyright 2022-&amp;#36;today.year Erik C. Thauvin (erik@thauvin.net)&#10;&#10;Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);&#10;you may not use this file except in compliance with the License.&#10;You may obtain a copy of the License at&#10;&#10; http://www.apache.org/licenses/LICENSE-2.0&#10;&#10;Unless required by applicable law or agreed to in writing, software&#10;distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10;WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10;See the License for the specific language governing permissions and&#10;limitations under the License." />
<option name="notice" value="Copyright 2001-&amp;#36;today.year the original author or authors.&#10; &#10;Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);&#10;you may not use this file except in compliance with the License.&#10;You may obtain a copy of the License at&#10;&#10; https://www.apache.org/licenses/LICENSE-2.0&#10;&#10;Unless required by applicable law or agreed to in writing, software&#10;distributed under the License is distributed on an &quot;AS IS&quot; BASIS,&#10;WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.&#10;See the License for the specific language governing permissions and&#10;limitations under the License." />
<option name="myName" value="Apache License" />
</copyright>
</component>

View file

@ -62,7 +62,7 @@ repositories {
}
dependencies {
implementation("net.thauvin.erik:urlencoder-lib:1.3.0")
implementation("net.thauvin.erik:urlencoder-lib-jvm:1.4.0")
}
```
@ -73,7 +73,7 @@ to the artifact URL.
<dependency>
<groupId>net.thauvin.erik</groupId>
<artifactId>urlencoder-lib-jvm</artifactId>
<version>1.3.0</version>
<version>1.4.0</version>
</dependency>
```

View file

@ -1,20 +1,3 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
buildsrc.conventions.base
id("org.jetbrains.kotlinx.kover")

View file

@ -1,66 +0,0 @@
# This is a Gradle generated file for dependency locking.
# Manual edits can break the build and are not advised.
# This file is expected to be part of source control.
com.fasterxml.jackson.core:jackson-annotations:2.12.7=runtimeClasspath
com.fasterxml.jackson.core:jackson-core:2.12.7=runtimeClasspath
com.fasterxml.jackson.core:jackson-databind:2.12.7.1=runtimeClasspath
com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.12.7=runtimeClasspath
com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7=runtimeClasspath
com.fasterxml.jackson.module:jackson-module-kotlin:2.12.7=runtimeClasspath
com.fasterxml.jackson:jackson-bom:2.12.7=runtimeClasspath
com.fasterxml.woodstox:woodstox-core:6.2.4=runtimeClasspath
com.github.ben-manes:gradle-versions-plugin:0.47.0=compileClasspath,runtimeClasspath
com.squareup.moshi:moshi-kotlin:1.12.0=runtimeClasspath
com.squareup.moshi:moshi:1.12.0=runtimeClasspath
com.squareup.okhttp3:okhttp:4.11.0=runtimeClasspath
com.squareup.okio:okio-jvm:3.2.0=runtimeClasspath
com.squareup.okio:okio:3.2.0=runtimeClasspath
com.thoughtworks.xstream:xstream:1.4.20=runtimeClasspath
io.github.x-stream:mxparser:1.2.2=runtimeClasspath
io.gitlab.arturbosch.detekt:detekt-gradle-plugin:1.23.1=compileClasspath,runtimeClasspath
jakarta.activation:jakarta.activation-api:1.2.1=runtimeClasspath
jakarta.xml.bind:jakarta.xml.bind-api:2.3.2=runtimeClasspath
org.codehaus.woodstox:stax2-api:4.2.1=runtimeClasspath
org.jetbrains.dokka:dokka-core:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.dokka:dokka-gradle-plugin:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.intellij.deps:trove4j:1.0.20200330=runtimeClasspath
org.jetbrains.kotlin:kotlin-android-extensions:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-build-tools-api:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-compiler-embeddable:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-compiler-runner:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-daemon-client:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-daemon-embeddable:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin-annotations:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin-api:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin-idea-proto:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin-idea:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin-model:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugin:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-gradle-plugins-bom:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-klib-commonizer-api:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-native-utils:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-project-model:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-reflect:1.8.20=runtimeClasspath
org.jetbrains.kotlin:kotlin-reflect:1.9.0=compileClasspath
org.jetbrains.kotlin:kotlin-scripting-common:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-scripting-compiler-embeddable:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-scripting-compiler-impl-embeddable:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-scripting-jvm:1.9.0=runtimeClasspath
org.jetbrains.kotlin:kotlin-stdlib-common:1.8.21=runtimeClasspath
org.jetbrains.kotlin:kotlin-stdlib-common:1.9.0=compileClasspath
org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.8.21=runtimeClasspath
org.jetbrains.kotlin:kotlin-stdlib-jdk7:1.9.0=compileClasspath
org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.8.21=runtimeClasspath
org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.9.0=compileClasspath
org.jetbrains.kotlin:kotlin-stdlib:1.8.21=runtimeClasspath
org.jetbrains.kotlin:kotlin-stdlib:1.9.0=compileClasspath
org.jetbrains.kotlin:kotlin-tooling-core:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-util-io:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlin:kotlin-util-klib:1.9.0=compileClasspath,runtimeClasspath
org.jetbrains.kotlinx:kotlinx-coroutines-bom:1.6.3=runtimeClasspath
org.jetbrains.kotlinx:kotlinx-coroutines-core-jvm:1.6.3=runtimeClasspath
org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.3=runtimeClasspath
org.jetbrains.kotlinx:kover-gradle-plugin:0.7.3=compileClasspath,runtimeClasspath
org.jetbrains:annotations:13.0=compileClasspath,runtimeClasspath
xmlpull:xmlpull:1.1.3.1=runtimeClasspath
empty=

View file

@ -1,26 +1,9 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.jetbrains.dokka.gradle.DokkaTask
plugins {
buildsrc.conventions.lang.`kotlin-multiplatform-jvm`
buildsrc.conventions.lang.`kotlin-multiplatform-js`
buildsrc.conventions.lang.`kotlin-multiplatform-native`
// buildsrc.conventions.lang.`kotlin-multiplatform-js`
// buildsrc.conventions.lang.`kotlin-multiplatform-native`
buildsrc.conventions.publishing
id("application")
id("com.github.ben-manes.versions")

View file

@ -1,12 +1,11 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
* Copyright 2001-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@ -14,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import kotlin.system.exitProcess
@ -38,9 +38,9 @@ object UrlEncoder {
internal val usage =
"Usage : java -jar urlencoder-*all.jar [-ed] text" + System.lineSeparator() +
"Encode and decode URL components defensively." + System.lineSeparator() +
" -e encode (default) " + System.lineSeparator() +
" -d decode"
"Encode and decode URL components defensively." + System.lineSeparator() +
" -e encode (default) " + System.lineSeparator() +
" -d decode"
/**
* Encodes and decodes URLs from the command line.

View file

@ -1,12 +1,11 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
* Copyright 2001-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@ -25,20 +24,14 @@ import kotlin.test.assertFailsWith
class UrlEncoderTest {
companion object {
@JvmStatic
var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
val invalid = listOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
@JvmStatic
var validMap = arrayOf(
Pair("a test &", "a%20test%20%26"),
Pair(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
),
Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
Pair(
"\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80"
)
val validMap = listOf(
"a test &" to "a%20test%20%26",
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=" to
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D",
"%#okékÉȢ smile!😁" to "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81",
"\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00" to "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80",
)
}

View file

@ -1,20 +1,3 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.jetbrains.dokka.gradle.DokkaTask
plugins {
@ -31,10 +14,8 @@ val deployDir = project.layout.projectDirectory.dir("deploy")
kotlin {
sourceSets {
jvmTest {
commonTest {
dependencies {
//implementation("com.willowtreeapps.assertk:assertk-jvm:0.25")
//implementation("org.junit.jupiter:junit-jupiter:5.9.1")
implementation(kotlin("test"))
}
}

View file

@ -0,0 +1,73 @@
/*
* Copyright 2001-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import kotlin.Char.Companion.MIN_HIGH_SURROGATE
import kotlin.Char.Companion.MIN_LOW_SURROGATE
/**
* Kotlin Multiplatform equivalent for `java.lang.Character`
*
* @author <a href="https://github.com/aSemy">aSemy</a>
*/
internal object Character {
/**
* See https://www.tutorialspoint.com/java/lang/character_issupplementarycodepoint.htm
*
* Determines whether the specified character (Unicode code point) is in the supplementary character range.
* The supplementary character range in the Unicode system falls in `U+10000` to `U+10FFFF`.
*
* The Unicode code points are divided into two categories:
* Basic Multilingual Plane (BMP) code points and Supplementary code points.
* BMP code points are present in the range U+0000 to U+FFFF.
*
* Whereas, supplementary characters are rare characters that are not represented using the original 16-bit Unicode.
* For example, these type of characters are used in Chinese or Japanese scripts and hence, are required by the
* applications used in these countries.
*
* @returns `true` if the specified code point falls in the range of supplementary code points
* ([MIN_SUPPLEMENTARY_CODE_POINT] to [MAX_CODE_POINT], inclusive), `false` otherwise.
*/
internal fun isSupplementaryCodePoint(codePoint: Int): Boolean =
codePoint in MIN_SUPPLEMENTARY_CODE_POINT..MAX_CODE_POINT
internal fun toCodePoint(highSurrogate: Char, lowSurrogate: Char): Int =
(highSurrogate.code shl 10) + lowSurrogate.code + SURROGATE_DECODE_OFFSET
/** Basic Multilingual Plane (BMP) */
internal fun isBmpCodePoint(codePoint: Int): Boolean = codePoint ushr 16 == 0
internal fun highSurrogateOf(codePoint: Int): Char =
((codePoint ushr 10) + HIGH_SURROGATE_ENCODE_OFFSET.code).toChar()
internal fun lowSurrogateOf(codePoint: Int): Char =
((codePoint and 0x3FF) + MIN_LOW_SURROGATE.code).toChar()
// private const val MIN_CODE_POINT: Int = 0x000000
private const val MAX_CODE_POINT: Int = 0x10FFFF
private const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000
private const val SURROGATE_DECODE_OFFSET: Int =
MIN_SUPPLEMENTARY_CODE_POINT -
(MIN_HIGH_SURROGATE.code shl 10) -
MIN_LOW_SURROGATE.code
private const val HIGH_SURROGATE_ENCODE_OFFSET: Char = MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT ushr 10)
}

View file

@ -1,12 +1,11 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
* Copyright 2001-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@ -17,8 +16,8 @@
package net.thauvin.erik.urlencoder
import java.nio.charset.StandardCharsets
import java.util.BitSet
import kotlin.jvm.JvmOverloads
import kotlin.jvm.JvmStatic
/**
* Most defensive approach to URL encoding and decoding.
@ -39,20 +38,27 @@ import java.util.BitSet
object UrlEncoderUtil {
private val hexDigits = "0123456789ABCDEF".toCharArray()
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private val unreservedChars = BitSet('z'.code + 1).apply {
set('-'.code)
set('.'.code)
for (c in '0'.code..'9'.code) {
set(c)
/**
* A [BooleanArray] with entries for the [character codes][Char.code] of
*
* * `0-9`,
* * `A-Z`,
* * `a-z`
*
* set to `true`.
*/
private val unreservedChars = BooleanArray('z'.code + 1).apply {
set('-'.code, true)
set('.'.code, true)
set('_'.code, true)
for (c in '0'..'9') {
set(c.code, true)
}
for (c in 'A'.code..'Z'.code) {
set(c)
for (c in 'A'..'Z') {
set(c.code, true)
}
set('_'.code)
for (c in 'a'.code..'z'.code) {
set(c)
for (c in 'a'..'z') {
set(c.code, true)
}
}
@ -84,14 +90,13 @@ object UrlEncoderUtil {
}
val length = source.length
val out: StringBuilder by lazy { StringBuilder(length) }
var ch: Char
val out = StringBuilder(length)
var bytesBuffer: ByteArray? = null
var bytesPos = 0
var i = 0
var started = false
while (i < length) {
ch = source[i]
val ch = source[i]
if (ch == '%') {
if (!started) {
out.append(source, 0, i)
@ -103,7 +108,7 @@ object UrlEncoderUtil {
bytesBuffer = ByteArray((length - i) / 3)
}
i++
require(length >= i + 2) { "Illegal escape sequence" }
require(length >= i + 2) { "Incomplete trailing escape ($ch) pattern" }
try {
val v = source.substring(i, i + 2).toInt(16)
require(v in 0..0xFF) { "Illegal escape value" }
@ -114,7 +119,7 @@ object UrlEncoderUtil {
}
} else {
if (bytesBuffer != null) {
out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
out.append(bytesBuffer.decodeToString(0, bytesPos))
started = true
bytesBuffer = null
bytesPos = 0
@ -133,15 +138,15 @@ object UrlEncoderUtil {
}
if (bytesBuffer != null) {
out.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
out.append(bytesBuffer.decodeToString(0, bytesPos))
}
return if (!started) source else out.toString()
}
/**
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding.
* Transforms a provided [String] object into a new string, containing only valid URL
* characters in the UTF-8 encoding.
*
* - Letters, numbers, unreserved (`_-!.'()*`) and allowed characters are left intact.
*/
@ -152,11 +157,10 @@ object UrlEncoderUtil {
return source
}
var out: StringBuilder? = null
var ch: Char
var i = 0
while (i < source.length) {
ch = source[i]
if (ch.isUnreserved() || allow.indexOf(ch) != -1) {
val ch = source[i]
if (ch.isUnreserved() || ch in allow) {
out?.append(ch)
i++
} else {
@ -174,16 +178,18 @@ object UrlEncoderUtil {
}
i++
}
Character.isBmpCodePoint(cp) -> {
for (b in ch.toString().toByteArray(StandardCharsets.UTF_8)) {
for (b in ch.toString().encodeToByteArray()) {
out.appendEncodedByte(b.toInt())
}
i++
}
Character.isSupplementaryCodePoint(cp) -> {
val high = Character.highSurrogate(cp)
val low = Character.lowSurrogate(cp)
for (b in charArrayOf(high, low).concatToString().toByteArray(StandardCharsets.UTF_8)) {
val high = Character.highSurrogateOf(cp)
val low = Character.lowSurrogateOf(cp)
for (b in charArrayOf(high, low).concatToString().encodeToByteArray()) {
out.appendEncodedByte(b.toInt())
}
i += 2
@ -194,4 +200,48 @@ object UrlEncoderUtil {
return out?.toString() ?: source
}
/**
* Returns the Unicode code point at the specified index.
*
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
* sequence.
*
* If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented
* using a single `Char` and this method will behave exactly like [CharSequence.get].
* Code points outside the BMP are encoded using a surrogate pair a `Char` containing a value in the high surrogate
* range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single
* code point in one of the supplementary planes. This method will do the necessary decoding and return the value of
* that single code point.
*
* In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`,
* this method will return the surrogate code point itself, behaving like [CharSequence.get].
*
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException].
*
* ```kotlin
* // Text containing code points outside the BMP (encoded as a surrogate pairs)
* val text = "\uD83E\uDD95\uD83E\uDD96"
*
* var index = 0
* while (index < text.length) {
* val codePoint = text.codePointAt(index)
* // (Do something with codePoint...)
* index += CodePoints.charCount(codePoint)
* }
* ```
*/
private fun CharSequence.codePointAt(index: Int): Int {
if (index !in indices) throw IndexOutOfBoundsException("index $index was not in range $indices")
val firstChar = this[index]
if (firstChar.isHighSurrogate()) {
val nextChar = getOrNull(index + 1)
if (nextChar?.isLowSurrogate() == true) {
return Character.toCodePoint(firstChar, nextChar)
}
}
return firstChar.code
}
}

View file

@ -1,12 +1,11 @@
/*
* Copyright 2001-2023 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022-2023 Erik C. Thauvin (erik@thauvin.net)
* Copyright 2001-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@ -27,32 +26,26 @@ class UrlEncoderUtilTest {
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."
companion object {
@JvmStatic
var invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
val invalid = listOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
@JvmStatic
var validMap = arrayOf(
Pair("a test &", "a%20test%20%26"),
Pair(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
),
Pair("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
Pair(
"\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00", "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80"
)
val validMap = listOf(
"a test &" to "a%20test%20%26",
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=" to
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D",
"%#okékÉȢ smile!😁" to "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81",
"\uD808\uDC00\uD809\uDD00\uD808\uDF00\uD808\uDD00" to "%F0%92%80%80%F0%92%94%80%F0%92%8C%80%F0%92%84%80",
)
}
@Test
fun `Decode URL`() {
fun decodeURL() {
for (m in validMap) {
assertEquals(m.first, decode(m.second))
}
}
@Test
fun `Decode with Exception`() {
fun decodeWithException() {
for (source in invalid) {
assertFailsWith<IllegalArgumentException>(
message = "decode($source)",
@ -62,14 +55,14 @@ class UrlEncoderUtilTest {
}
@Test
fun `Decode when None needed`() {
fun decodeWhenNoneNeeded() {
assertSame(same, decode(same))
assertEquals("decode('')", decode(""), "")
assertEquals("decode(' ')", decode(" "), " ")
}
@Test
fun `Decode with Plus to Space`() {
fun decodeWithPlusToSpace() {
assertEquals("foo bar", decode("foo+bar", true))
assertEquals("foo bar foo", decode("foo+bar++foo", true))
assertEquals("foo bar foo", decode("foo+%20bar%20+foo", true))
@ -78,34 +71,34 @@ class UrlEncoderUtilTest {
}
@Test
fun `Encode URL`() {
fun encodeURL() {
for (m in validMap) {
assertEquals(m.second, encode(m.first))
}
}
@Test
fun `Encode Empty or Blank`() {
fun encodeEmptyOrBlank() {
assertTrue(encode("", allow = "").isEmpty(), "encode('','')")
assertEquals("encode('')", encode(""), "")
assertEquals("encode(' ')", encode(" "), "%20")
}
@Test
fun `Encode when None needed`() {
fun encodeWhenNoneNeeded() {
assertSame(encode(same), same)
assertSame("with empty allow", encode(same, allow = ""), same)
}
@Test
fun `Encode with Allow`() {
assertEquals("encode(x, =?)","?test=a%20test", encode("?test=a test", allow = "=?"))
fun encodeWithAllow() {
assertEquals("encode(x, =?)", "?test=a%20test", encode("?test=a test", allow = "=?"))
assertEquals("encode(aaa, a)", "aaa", encode("aaa", "a"))
assertEquals("encode(' ')", " ", encode(" ", " ") )
assertEquals("encode(' ')", " ", encode(" ", " "))
}
@Test
fun `Encode with Space to Plus`() {
fun encodeWithSpaceToPlus() {
assertEquals("foo+bar", encode("foo bar", spaceToPlus = true))
assertEquals("foo+bar++foo", encode("foo bar foo", spaceToPlus = true))
assertEquals("foo bar", encode("foo bar", " ", true))