First commit

This commit is contained in:
Geert Bevin 2022-12-31 01:27:51 -05:00
commit 2d7f91cbd7
12 changed files with 913 additions and 0 deletions

76
lib/build.gradle.kts Normal file
View file

@ -0,0 +1,76 @@
plugins {
`java-library`
`maven-publish`
signing
}
base {
archivesName.set("urlencoder")
version = 0.9
}
java {
withJavadocJar()
withSourcesJar()
toolchain {
languageVersion.set(JavaLanguageVersion.of(17))
}
}
repositories {
mavenCentral()
}
dependencies {
testImplementation("org.junit.jupiter:junit-jupiter:5.9.0")
}
tasks.named<Test>("test") {
useJUnitPlatform()
}
publishing {
publications {
create<MavenPublication>("mavenJava") {
artifactId = "urlencoder"
from(components["java"])
pom {
name.set("URL Encoder")
description.set("A simple library to encode/decode URL parameters.")
url.set("https://github.com/gbevin/urlencoder")
licenses {
license {
name.set("The Apache License, Version 2.0")
url.set("http://www.apache.org/licenses/LICENSE-2.0.txt")
}
}
developers {
developer {
id.set("gbevin")
name.set("Geert Bevin")
email.set("gbevin@uwyn.com")
}
}
scm {
connection.set("scm:git:https://github.com/gbevin/urlencoder.git")
developerConnection.set("scm:git:git@github.com:gbevin/urlencoder.git")
url.set("https://github.com/gbevin/urlencoder")
}
}
repositories {
maven {
credentials {
username = project.properties["ossrhUsername"].toString()
password = project.properties["ossrhPassword"].toString()
}
val releasesRepoUrl = uri("https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/")
val snapshotsRepoUrl = uri("https://s01.oss.sonatype.org/content/repositories/snapshots/")
url = if (version.toString().endsWith("SNAPSHOT")) snapshotsRepoUrl else releasesRepoUrl
}
}
}
}
}
signing {
sign(publishing.publications["mavenJava"])
}

View file

@ -0,0 +1,187 @@
/*
* Copyright 2001-2022 Geert Bevin (gbevin[remove] at uwyn dot com)
* Licensed under the Apache License, Version 2.0 (the "License")
*/
package com.uwyn.urlencoder;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
/**
* URL encoding and decoding.
* <p>
* Rules determined by <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a>.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @since 1.0
*/
public class UrlEncoder {
/**
* Transforms a provided <code>String</code> object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
*
* @param source The string that has to be transformed into a valid URL
* string.
* @return The encoded <code>String</code> object.
* @see #decode(String)
* @since 1.0
*/
public static String encode(String source) {
if (source == null) {
return null;
}
StringBuilder out = null;
char ch;
for (var i = 0; i < source.length(); ) {
ch = source.charAt(i);
if (isUnreservedUriChar(ch)) {
if (out != null) {
out.append(ch);
}
i += 1;
} else {
if (out == null) {
out = new StringBuilder(source.length());
out.append(source, 0, i);
}
var cp = source.codePointAt(i);
if (cp < 0x80) {
appendUrlEncodedByte(out, cp);
i += 1;
} else if (Character.isBmpCodePoint(cp)) {
for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) {
appendUrlEncodedByte(out, b);
}
i += 1;
} else if (Character.isSupplementaryCodePoint(cp)) {
var high = Character.highSurrogate(cp);
var low = Character.lowSurrogate(cp);
for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) {
appendUrlEncodedByte(out, b);
}
i += 2;
}
}
}
if (out == null) {
return source;
}
return out.toString();
}
static final BitSet UNRESERVED_URI_CHARS;
static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
var unreserved = new BitSet('~' + 1);
unreserved.set('-');
unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved;
}
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private static boolean isUnreservedUriChar(char ch) {
if (ch > '~') return false;
return UNRESERVED_URI_CHARS.get(ch);
}
private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
private static void appendUrlEncodedDigit(StringBuilder out, int digit) {
out.append(HEX_DIGITS[digit & 0x0F]);
}
private static void appendUrlEncodedByte(StringBuilder out, int ch) {
out.append("%");
appendUrlEncodedDigit(out, ch >> 4);
appendUrlEncodedDigit(out, ch);
}
/**
* Transforms a provided <code>String</code> URL into a new string,
* containing decoded URL characters in the UTF-8 encoding.
*
* @param source The string URL that has to be decoded
* @return The decoded <code>String</code> object.
* @see #encode(String)
* @since 1.0
*/
public static String decode(String source) {
if (source == null) {
return source;
}
var length = source.length();
StringBuilder out = null;
char ch;
byte[] bytes_buffer = null;
var bytes_pos = 0;
for (var i = 0; i < length; ) {
ch = source.charAt(i);
if (ch == '%') {
if (out == null) {
out = new StringBuilder(source.length());
out.append(source, 0, i);
}
if (bytes_buffer == null) {
// the remaining characters divided by the length
// of the encoding format %xx, is the maximum number of
// bytes that can be extracted
bytes_buffer = new byte[(length - i) / 3];
bytes_pos = 0;
}
i += 1;
if (length < i + 2) {
throw new IllegalArgumentException("Illegal escape sequence");
}
try {
var v = Integer.parseInt(source, i, i + 2, 16);
if (v < 0 || v > 0xFF) {
throw new IllegalArgumentException("Illegal escape value");
}
bytes_buffer[bytes_pos++] = (byte) v;
i += 2;
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Illegal characters in escape sequence" + e.getMessage());
}
} else {
if (bytes_buffer != null) {
out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8));
bytes_buffer = null;
bytes_pos = 0;
}
if (out != null) {
out.append(ch);
}
i += 1;
}
}
if (out == null) {
return source;
}
if (bytes_buffer != null) {
out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8));
}
return out.toString();
}
}

View file

@ -0,0 +1,53 @@
/*
* Copyright 2001-2022 Geert Bevin (gbevin[remove] at uwyn dot com)
* Licensed under the Apache License, Version 2.0 (the "License")
*/
package com.uwyn.urlencoder;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertTrue;
class UrlEncoderTest {
@Test
public void testEncodeURL() {
assertNull(UrlEncoder.encode(null));
assertEquals("a%20test%20%26", UrlEncoder.encode("a test &"));
String valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
assertSame(valid, UrlEncoder.encode(valid));
assertEquals("%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D", UrlEncoder.encode("!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~="));
assertEquals("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81", UrlEncoder.encode("%#okékÉȢ smile!😁"));
}
@Test
public void testDecodeURL() {
assertNull(UrlEncoder.decode(null));
assertEquals("a test &", UrlEncoder.decode("a%20test%20%26"));
String valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
assertSame(valid, UrlEncoder.decode(valid));
assertEquals("!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", UrlEncoder.decode("%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"));
assertEquals("%#okékÉȢ smile!😁", UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"));
try {
UrlEncoder.decode("sdkjfh%");
fail();
} catch (Exception e) {
assertTrue(e instanceof IllegalArgumentException);
}
try {
UrlEncoder.decode("sdkjfh%6");
fail();
} catch (Exception e) {
assertTrue(e instanceof IllegalArgumentException);
}
try {
UrlEncoder.decode("sdkjfh%xx");
fail();
} catch (Exception e) {
assertTrue(e instanceof IllegalArgumentException);
}
}
}