2018-12-03 12:41:53 +00:00
package main
import (
"strconv"
2019-05-27 16:39:38 +00:00
"unicode"
2018-12-03 12:41:53 +00:00
)
var normMap map [ rune ] rune
func init ( ) {
normMap = make ( map [ rune ] rune )
// (not only) european
appendTransliterate (
"ěřůøĉĝĥĵŝŭèùÿėįųāēīūļķņģőűëïąćęłńśźżõșțčďĺľňŕšťýžéíñóúüåäöçîşûğăâđêôơưáàãảạ" ,
"eruocghjsueuyeiuaeiulkngoueiacelnszzostcdllnrstyzeinouuaaocisugaadeoouaaaaa" ,
)
// Vietnamese
appendTransliterate (
"áạàảãăắặằẳẵâấậầẩẫéẹèẻẽêếệềểễiíịìỉĩoóọòỏõôốộồổỗơớợờởỡúụùủũưứựừửữyýỵỳỷỹđ" ,
"aaaaaaaaaaaaaaaaaeeeeeeeeeeeiiiiiioooooooooooooooooouuuuuuuuuuuyyyyyyd" ,
)
}
2019-05-27 16:39:38 +00:00
func appendTransliterate ( base , norm string ) {
2018-12-03 12:41:53 +00:00
normRunes := [ ] rune ( norm )
baseRunes := [ ] rune ( base )
lenNorm := len ( normRunes )
lenBase := len ( baseRunes )
if lenNorm != lenBase {
2019-03-03 19:29:34 +00:00
panic ( "Base and normalized strings have differend length: base=" + strconv . Itoa ( lenBase ) + ", norm=" + strconv . Itoa ( lenNorm ) ) // programmer error in constant length
2018-12-03 12:41:53 +00:00
}
2019-05-27 16:39:38 +00:00
for i := 0 ; i < lenBase ; i ++ {
normMap [ baseRunes [ i ] ] = normRunes [ i ]
2018-12-03 12:41:53 +00:00
2019-05-27 16:39:38 +00:00
baseUpper := unicode . ToUpper ( baseRunes [ i ] )
normUpper := unicode . ToUpper ( normRunes [ i ] )
normMap [ baseUpper ] = normUpper
2018-12-03 12:41:53 +00:00
}
}
// Remove diacritics and make lowercase.
func removeDiacritics ( baseString string ) string {
var normalizedRunes [ ] rune
2019-05-27 16:39:38 +00:00
for _ , baseRune := range baseString {
2018-12-03 12:41:53 +00:00
if normRune , ok := normMap [ baseRune ] ; ok {
normalizedRunes = append ( normalizedRunes , normRune )
} else {
normalizedRunes = append ( normalizedRunes , baseRune )
}
}
return string ( normalizedRunes )
}