Initial commit.

This commit is contained in:
Erik C. Thauvin 2022-12-30 17:45:42 -08:00
commit a3645937ca
23 changed files with 1199 additions and 0 deletions

View file

@ -0,0 +1,168 @@
/*
* Copyright 2001-2022 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import java.nio.charset.StandardCharsets
import java.util.BitSet
/**
* URL parameters encoding and decoding.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
*/
object UrlEncoder {
private val hexDigits = "0123456789ABCDEF".toCharArray()
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private val unreservedChars = BitSet('~'.code + 1).apply {
set('-')
set('.')
for (c in '0'..'9') {
set(c)
}
for (c in 'A'..'Z') {
set(c)
}
set('_'.code)
for (c in 'a'.code..'z'.code) {
set(c)
}
set('~')
}
private fun BitSet.set(c: Char) = this.set(c.code)
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private fun Char.isUnreserved(): Boolean {
return if (this > '~') false else unreservedChars.get(this.code)
}
private fun StringBuilder.appendEncodedDigit(digit: Int) {
this.append(hexDigits[digit and 0x0F])
}
private fun StringBuilder.appendEncodedByte(ch: Int) {
this.append("%")
this.appendEncodedDigit(ch shr 4)
this.appendEncodedDigit(ch)
}
/**
* Transforms a provided [String] into a new string, containing decoded URL characters in the UTF-8
* encoding.
*/
@JvmStatic
fun decode(source: String): String {
if (source.isBlank()) {
return source
}
val length = source.length
var out: StringBuilder? = null
var ch: Char
var bytesBuffer: ByteArray? = null
var bytesPos = 0
var i = 0
while (i < length) {
ch = source[i]
if (ch == '%') {
if (out == null) {
out = StringBuilder(source.length)
out.append(source, 0, i)
}
if (bytesBuffer == null) {
// the remaining characters divided by the length of the encoding format %xx, is the maximum number
// of bytes that can be extracted
bytesBuffer = ByteArray((length - i) / 3)
bytesPos = 0
}
i++
require(length >= i + 2) { "Illegal escape sequence" }
try {
val v: Int = source.substring(i, i + 2).toInt(16)
require(v in 0..0xFF) { "Illegal escape value" }
bytesBuffer[bytesPos++] = v.toByte()
i += 2
} catch (e: NumberFormatException) {
throw IllegalArgumentException("Illegal characters in escape sequence: $e.message")
}
} else {
if (bytesBuffer != null) {
out?.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
bytesBuffer = null
bytesPos = 0
}
out?.append(ch)
i++
}
}
if (bytesBuffer != null) {
out!!.append(String(bytesBuffer, 0, bytesPos, StandardCharsets.UTF_8))
}
return out?.toString() ?: source
}
/**
* Transforms a provided [String] object into a new string, containing only valid URL characters in the UTF-8
* encoding. Letters, numbers, unreserved (<code>"_-!.~'()*"</code>) and allowed characters are left intact.
*/
@JvmStatic
fun encode(source: String, vararg allow: Char): String {
if (source.isBlank()) {
return source
}
var out: StringBuilder? = null
var ch: Char
var i = 0
while (i < source.length) {
ch = source[i]
if (ch.isUnreserved() || allow.contains(ch)) {
out?.append(ch)
i++
} else {
if (out == null) {
out = StringBuilder(source.length)
out.append(source, 0, i)
}
val cp = source.codePointAt(i)
if (cp < 0x80) {
out.appendEncodedByte(cp)
i++
} else if (Character.isBmpCodePoint(cp)) {
for (b in ch.toString().toByteArray(StandardCharsets.UTF_8)) {
out.appendEncodedByte(b.toInt())
}
i++
} else if (Character.isSupplementaryCodePoint(cp)) {
val high = Character.highSurrogate(cp)
val low = Character.lowSurrogate(cp)
for (b in charArrayOf(high, low).concatToString().toByteArray(StandardCharsets.UTF_8)) {
out.appendEncodedByte(b.toInt())
}
i += 2
}
}
}
return out?.toString() ?: source
}
}

View file

@ -0,0 +1,62 @@
/*
* Copyright 2001-2022 Geert Bevin (gbevin[remove] at uwyn dot com)
* Copyright 2022 Erik C. Thauvin (erik@thauvin.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.thauvin.erik.urlencoder
import net.thauvin.erik.urlencoder.UrlEncoder.decode
import net.thauvin.erik.urlencoder.UrlEncoder.encode
import org.junit.Test
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlin.test.assertSame
class UrlEncoderTest {
private val invalid = arrayOf("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1")
private val same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"
private val validMap = mapOf(
"a test &" to "a%20test%20%26",
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=" to
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D",
"%#okékÉȢ smile!😁" to "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"
)
@Test
fun testDecode() {
assertEquals("", decode(""))
assertSame(same, decode(same))
validMap.forEach {
assertEquals(it.key, decode(it.value))
}
invalid.forEach {
assertFailsWith(IllegalArgumentException::class) {
decode(it)
}
}
}
@Test
fun testEncode() {
assertEquals("", encode(""))
assertSame(same, encode(same))
validMap.forEach {
assertEquals(it.value, encode(it.key))
}
assertEquals("?test=a%20test", encode("?test=a test", '=', '?'))
assertEquals("aaa", encode("aaa", 'a'))
}
}