Improved normalizing with support for common separators
This commit is contained in:
parent
c04fa16307
commit
957bb694c6
2 changed files with 21 additions and 16 deletions
|
@ -334,21 +334,20 @@ public final class RenderUtils {
|
|||
return src;
|
||||
}
|
||||
|
||||
var normalized = Normalizer.normalize(src.trim(), Normalizer.Form.NFD);
|
||||
var sb = new StringBuilder(normalized.length());
|
||||
boolean space = false;
|
||||
for (var c : normalized.toCharArray()) {
|
||||
if (c <= '\u007F') { // ascii only
|
||||
if (!space && c == ' ') {
|
||||
space = true;
|
||||
var normalized = Normalizer.normalize(src.trim(), Normalizer.Form.NFD).toCharArray();
|
||||
|
||||
var sb = new StringBuilder(normalized.length);
|
||||
for (var i = 0; i < normalized.length; i++) {
|
||||
var c = normalized[i];
|
||||
if (c <= '\u007F') { // ASCII only
|
||||
if (" &()-_=[{]}\\|;:,<.>/".indexOf(c) != -1) { // common separators
|
||||
if (!sb.isEmpty() && i != normalized.length - 1 && sb.charAt(sb.length() - 1) != '-') {
|
||||
sb.append('-');
|
||||
} else {
|
||||
space = false;
|
||||
if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z') {
|
||||
sb.append(c);
|
||||
} else if (c >= 'A' && c <= 'Z') {
|
||||
sb.append((char) (c + 32)); // lowercase
|
||||
}
|
||||
} else if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z') { // letters & digits
|
||||
sb.append(c);
|
||||
} else if (c >= 'A' && c <= 'Z') { // uppercase letters
|
||||
sb.append((char) (c + 32)); // make lowercase
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,8 +96,14 @@ class TestRenderUtils {
|
|||
|
||||
@Test
|
||||
void testNormalize() {
|
||||
assertThat(RenderUtils.normalize("")).isEmpty();
|
||||
assertThat(RenderUtils.normalize(SAMPLE_GERMAN)).isEqualTo("mochten-sie-ein-paar-apfel");
|
||||
assertThat(RenderUtils.normalize("")).as("empty").isEmpty();
|
||||
assertThat(RenderUtils.normalize(" &()-_=[{]}\\|;:,<.>/")).as("blank").isEmpty();
|
||||
assertThat(RenderUtils.normalize(SAMPLE_GERMAN)).as("greman").isEqualTo("mochten-sie-ein-paar-apfel");
|
||||
assertThat(RenderUtils.normalize("foo bar, <foo-bar>,foo:bar,foo;(bar), {foo} & bar=foo.bar[foo|bar]"))
|
||||
.as("foo-bar")
|
||||
.isEqualTo("foo-bar-foo-bar-foo-bar-foo-bar-foo-bar-foo-bar-foo-bar");
|
||||
assertThat(RenderUtils.normalize("News for January 6, 2023 (Paris)")).as("docs example")
|
||||
.isEqualTo("news-for-january-6-2023-paris");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue