lf/diacritics.go

65 lines
1.7 KiB
Go
Raw Normal View History

package main
import (
"strconv"
"strings"
)
var normMap map[rune]rune
func init() {
normMap = make(map[rune]rune)
// (not only) european
appendTransliterate(
"ěřůøĉĝĥĵŝŭèùÿėįųāēīūļķņģőűëïąćęłńśźżõșțčďĺľňŕšťýžéíñóúüåäöçîşûğăâđêôơưáàãảạ",
"eruocghjsueuyeiuaeiulkngoueiacelnszzostcdllnrstyzeinouuaaocisugaadeoouaaaaa",
true,
)
// Vietnamese
appendTransliterate(
"áạàảãăắặằẳẵâấậầẩẫéẹèẻẽêếệềểễiíịìỉĩoóọòỏõôốộồổỗơớợờởỡúụùủũưứựừửữyýỵỳỷỹđ",
"aaaaaaaaaaaaaaaaaeeeeeeeeeeeiiiiiioooooooooooooooooouuuuuuuuuuuyyyyyyd",
true,
)
}
func appendTransliterate(base, norm string, uppercase bool) {
normRunes := []rune(norm)
baseRunes := []rune(base)
lenNorm := len(normRunes)
lenBase := len(baseRunes)
if lenNorm != lenBase {
2019-03-03 19:29:34 +00:00
panic("Base and normalized strings have differend length: base=" + strconv.Itoa(lenBase) + ", norm=" + strconv.Itoa(lenNorm)) // programmer error in constant length
}
for i, baseRune := range baseRunes {
normMap[baseRune] = normRunes[i]
}
if uppercase {
upperBase := strings.ToUpper(base)
upperNorm := strings.ToUpper(norm)
normRune := []rune(upperNorm)
for i, baseRune := range []rune(upperBase) {
normMap[baseRune] = normRune[i]
}
}
}
// Remove diacritics and make lowercase.
func removeDiacritics(baseString string) string {
var normalizedRunes []rune
for _, baseRune := range []rune(baseString) {
if normRune, ok := normMap[baseRune]; ok {
normalizedRunes = append(normalizedRunes, normRune)
} else {
normalizedRunes = append(normalizedRunes, baseRune)
}
}
return string(normalizedRunes)
}