From 957bb694c6561a61f3126a606d4b52b8c2bb8e87 Mon Sep 17 00:00:00 2001 From: "Erik C. Thauvin" Date: Wed, 31 Jul 2024 17:10:00 -0700 Subject: [PATCH] Improved normalizing with support for common separators --- src/main/java/rife/render/RenderUtils.java | 27 +++++++++---------- .../java/rife/render/TestRenderUtils.java | 10 +++++-- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/main/java/rife/render/RenderUtils.java b/src/main/java/rife/render/RenderUtils.java index 43c92c1..f2f0859 100644 --- a/src/main/java/rife/render/RenderUtils.java +++ b/src/main/java/rife/render/RenderUtils.java @@ -334,21 +334,20 @@ public final class RenderUtils { return src; } - var normalized = Normalizer.normalize(src.trim(), Normalizer.Form.NFD); - var sb = new StringBuilder(normalized.length()); - boolean space = false; - for (var c : normalized.toCharArray()) { - if (c <= '\u007F') { // ascii only - if (!space && c == ' ') { - space = true; - sb.append('-'); - } else { - space = false; - if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z') { - sb.append(c); - } else if (c >= 'A' && c <= 'Z') { - sb.append((char) (c + 32)); // lowercase + var normalized = Normalizer.normalize(src.trim(), Normalizer.Form.NFD).toCharArray(); + + var sb = new StringBuilder(normalized.length); + for (var i = 0; i < normalized.length; i++) { + var c = normalized[i]; + if (c <= '\u007F') { // ASCII only + if (" &()-_=[{]}\\|;:,<.>/".indexOf(c) != -1) { // common separators + if (!sb.isEmpty() && i != normalized.length - 1 && sb.charAt(sb.length() - 1) != '-') { + sb.append('-'); } + } else if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z') { // letters & digits + sb.append(c); + } else if (c >= 'A' && c <= 'Z') { // uppercase letters + sb.append((char) (c + 32)); // make lowercase } } } diff --git a/src/test/java/rife/render/TestRenderUtils.java b/src/test/java/rife/render/TestRenderUtils.java index 6ab1a22..fcee416 100644 --- a/src/test/java/rife/render/TestRenderUtils.java +++ b/src/test/java/rife/render/TestRenderUtils.java @@ -96,8 +96,14 @@ class TestRenderUtils { @Test void testNormalize() { - assertThat(RenderUtils.normalize("")).isEmpty(); - assertThat(RenderUtils.normalize(SAMPLE_GERMAN)).isEqualTo("mochten-sie-ein-paar-apfel"); + assertThat(RenderUtils.normalize("")).as("empty").isEmpty(); + assertThat(RenderUtils.normalize(" &()-_=[{]}\\|;:,<.>/")).as("blank").isEmpty(); + assertThat(RenderUtils.normalize(SAMPLE_GERMAN)).as("greman").isEqualTo("mochten-sie-ein-paar-apfel"); + assertThat(RenderUtils.normalize("foo bar, ,foo:bar,foo;(bar), {foo} & bar=foo.bar[foo|bar]")) + .as("foo-bar") + .isEqualTo("foo-bar-foo-bar-foo-bar-foo-bar-foo-bar-foo-bar-foo-bar"); + assertThat(RenderUtils.normalize("News for January 6, 2023 (Paris)")).as("docs example") + .isEqualTo("news-for-january-6-2023-paris"); } @Test