2018-12-03 12:41:53 +00:00
package main
import (
"strconv"
"strings"
)
var normMap map [ rune ] rune
func init ( ) {
normMap = make ( map [ rune ] rune )
// (not only) european
appendTransliterate (
"ěřůøĉĝĥĵŝŭèùÿėįųāēīūļķņģőűëïąćęłńśźżõșțčďĺľňŕšťýžéíñóúüåäöçîşûğăâđêôơưáàãảạ" ,
"eruocghjsueuyeiuaeiulkngoueiacelnszzostcdllnrstyzeinouuaaocisugaadeoouaaaaa" ,
true ,
)
// Vietnamese
appendTransliterate (
"áạàảãăắặằẳẵâấậầẩẫéẹèẻẽêếệềểễiíịìỉĩoóọòỏõôốộồổỗơớợờởỡúụùủũưứựừửữyýỵỳỷỹđ" ,
"aaaaaaaaaaaaaaaaaeeeeeeeeeeeiiiiiioooooooooooooooooouuuuuuuuuuuyyyyyyd" ,
true ,
)
}
func appendTransliterate ( base , norm string , uppercase bool ) {
normRunes := [ ] rune ( norm )
baseRunes := [ ] rune ( base )
lenNorm := len ( normRunes )
lenBase := len ( baseRunes )
if lenNorm != lenBase {
2019-03-03 19:29:34 +00:00
panic ( "Base and normalized strings have differend length: base=" + strconv . Itoa ( lenBase ) + ", norm=" + strconv . Itoa ( lenNorm ) ) // programmer error in constant length
2018-12-03 12:41:53 +00:00
}
for i , baseRune := range baseRunes {
normMap [ baseRune ] = normRunes [ i ]
}
if uppercase {
upperBase := strings . ToUpper ( base )
upperNorm := strings . ToUpper ( norm )
normRune := [ ] rune ( upperNorm )
for i , baseRune := range [ ] rune ( upperBase ) {
normMap [ baseRune ] = normRune [ i ]
}
}
}
// Remove diacritics and make lowercase.
func removeDiacritics ( baseString string ) string {
var normalizedRunes [ ] rune
for _ , baseRune := range [ ] rune ( baseString ) {
if normRune , ok := normMap [ baseRune ] ; ok {
normalizedRunes = append ( normalizedRunes , normRune )
} else {
normalizedRunes = append ( normalizedRunes , baseRune )
}
}
return string ( normalizedRunes )
}