void Main()
{
string input = "Theȳ";
input.Length.Dump();
input.ToCharArray().Length.Dump();
input = input.Normalize(NormalizationForm.FormD);
input.Length.Dump();
input.ToCharArray().Length.Dump();
for (int i = 0; i < input.Length; i++)
{
Char c = input[i];
if (System.Globalization.CharUnicodeInfo.GetUnicodeCategory(c) != System.Globalization.UnicodeCategory.NonSpacingMark)
c.Dump();
}
input = "ŠĐĆŽ-šđčćž";
"ÂâĈĉÊêĜĝĤĥÎîĴĵÔôŜŝÛûŴŵXxŶŷẐẑ".Test();
"ÀàÈèÌìÒòÙùǸǹẀẁỲỳ".Test();
"ÁáÉéÍíÓóÚúÝýǼǽǾǿĆćǴǵḰḱĹĺḾḿŃńṔṕŔশẂẃŹź".Test();
"Ç窺".Test();
"ĀāĀ́ā́Ā̀ā̀Ā̂ā̂Ā͂ā͂ǞǟǠǡĒēḖḗḔḕĒ̂ē̂Ē͂ē͂ĪīĪ́ī́Ī̀ī̀Ī̂ī̂Ī͂ī͂ŌōṒṓṐṑŌ̂ō̂Ō͂ō͂ȪȫǬǭȬȭȰȱŪūŪ́ū́Ū̀ū̀Ū̂ū̂Ū͂ū͂ǕǖṺṻȲȳȲ́ȳ́Ȳ̀ȳ̀Ȳ̂ȳ̂Ȳ͂ȳ͂ÆæØø".Test();
"ĀāĒēĪīŌōŪūȲȳÆæØø".Test();
"ÿÿÿÿÿÿÿÿÿÿÿÂâĈĉÊêĜĝĤĥÎîĴĵÔôŜŝÛûŴŵXxŶŷẐẑĀāĒēĪīŌōŪūȲȳÆæØø".Test();
}
// Define other methods and classes here
public static class X{
static Dictionary<string,string> _dictionaryMacronsToUmlauts = new Dictionary<string,string>();
static Dictionary<string,string> _dictionaryMacronsToEscaped = new Dictionary<string,string>();
static Dictionary<string,string> _dictionaryEscapedToUmlaut = new Dictionary<string,string>();
static Dictionary<string,string> _dictionaryUmlautsToEscaped = new Dictionary<string, string>();
static X(){
_dictionaryMacronsToUmlauts["Ā"] = "Ä";
_dictionaryMacronsToUmlauts["ā"] = "ä";
_dictionaryMacronsToUmlauts["Ā́"] = "Ä";
_dictionaryMacronsToUmlauts["ā́"] = "ä";
_dictionaryMacronsToUmlauts["Ā̀"] = "Ä";
_dictionaryMacronsToUmlauts["ā̀"] = "ä";
_dictionaryMacronsToUmlauts["Ā̂"] = "Ä";
_dictionaryMacronsToUmlauts["ā̂"] = "ä";
_dictionaryMacronsToUmlauts["Ā͂"] = "Ä";
_dictionaryMacronsToUmlauts["ā͂"] = "ä";
_dictionaryMacronsToUmlauts["Ǟ"] = "Ä";
_dictionaryMacronsToUmlauts["ǟ"] = "ä";
_dictionaryMacronsToUmlauts["Ǡ"] = "Ä";
_dictionaryMacronsToUmlauts["ǡ"] = "ä";
//_dictionaryMacronsToUmlauts["Ǣ"]="";
//_dictionaryMacronsToUmlauts["ǣ"]="";
//_dictionaryMacronsToUmlauts["Ḇ"] = "B";
//_dictionaryMacronsToUmlauts["ḇ"] = "b";
//_dictionaryMacronsToUmlauts["C̄"] = "C";
//_dictionaryMacronsToUmlauts["c̄"] = "c";
//_dictionaryMacronsToUmlauts["Ḏ"] = "D";
//_dictionaryMacronsToUmlauts["ḏ"] = "d";
_dictionaryMacronsToUmlauts["Ē"] = "Ë";
_dictionaryMacronsToUmlauts["ē"] = "ë";
_dictionaryMacronsToUmlauts["Ḗ"] = "Ë";
_dictionaryMacronsToUmlauts["ḗ"] = "ë";
_dictionaryMacronsToUmlauts["Ḕ"] = "Ë";
_dictionaryMacronsToUmlauts["ḕ"] = "ë";
_dictionaryMacronsToUmlauts["Ē̂"] = "Ë";
_dictionaryMacronsToUmlauts["ē̂"] = "ë";
_dictionaryMacronsToUmlauts["Ē͂"] = "Ë";
_dictionaryMacronsToUmlauts["ē͂"] = "ë";
//_dictionaryMacronsToUmlauts["Ḡ"] = "G";
//_dictionaryMacronsToUmlauts["ḡ"] = "g";
//_dictionaryMacronsToUmlauts["H̱"] = "H";
//_dictionaryMacronsToUmlauts["ẖ"] = "h";
_dictionaryMacronsToUmlauts["Ī"] = "Ï";
_dictionaryMacronsToUmlauts["ī"] = "i";
_dictionaryMacronsToUmlauts["Ī́"] = "Ï";
_dictionaryMacronsToUmlauts["ī́"] = "i";
_dictionaryMacronsToUmlauts["Ī̀"] = "Ï";
_dictionaryMacronsToUmlauts["ī̀"] = "i";
_dictionaryMacronsToUmlauts["Ī̂"] = "Ï";
_dictionaryMacronsToUmlauts["ī̂"] = "i";
_dictionaryMacronsToUmlauts["Ī͂"] = "Ï";
_dictionaryMacronsToUmlauts["ī͂"] = "i";
//_dictionaryMacronsToUmlauts["Ḵ"] = "K";
//_dictionaryMacronsToUmlauts["ḵ"] = "k";
//_dictionaryMacronsToUmlauts["Ḻ"] = "L";
//_dictionaryMacronsToUmlauts["ḻ"] = "l";
//_dictionaryMacronsToUmlauts["Ḹ"] = "L";
//_dictionaryMacronsToUmlauts["ḹ"] = "l";
//_dictionaryMacronsToUmlauts["M̄"] = "M";
//_dictionaryMacronsToUmlauts["m̄"] = "m";
//_dictionaryMacronsToUmlauts["Ṉ"] = "N";
//_dictionaryMacronsToUmlauts["ṉ"] = "n";
//_dictionaryMacronsToUmlauts["N̄"] = "N";
//_dictionaryMacronsToUmlauts["n̄"] = "n";
_dictionaryMacronsToUmlauts["Ō"] = "Ö";
_dictionaryMacronsToUmlauts["ō"] = "ö";
_dictionaryMacronsToUmlauts["Ṓ"] = "Ö";
_dictionaryMacronsToUmlauts["ṓ"] = "ö";
_dictionaryMacronsToUmlauts["Ṑ"] = "Ö";
_dictionaryMacronsToUmlauts["ṑ"] = "ö";
_dictionaryMacronsToUmlauts["Ō̂"] = "Ö";
_dictionaryMacronsToUmlauts["ō̂"] = "ö";
_dictionaryMacronsToUmlauts["Ō͂"] = "Ö";
_dictionaryMacronsToUmlauts["ō͂"] = "ö";
_dictionaryMacronsToUmlauts["Ȫ"] = "Ö";
_dictionaryMacronsToUmlauts["ȫ"] = "ö";
_dictionaryMacronsToUmlauts["Ǭ"] = "Ö";
_dictionaryMacronsToUmlauts["ǭ"] = "ö";
_dictionaryMacronsToUmlauts["Ȭ"] = "Ö";
_dictionaryMacronsToUmlauts["ȭ"] = "ö";
_dictionaryMacronsToUmlauts["Ȱ"] = "Ö";
_dictionaryMacronsToUmlauts["ȱ"] = "ö";
//_dictionaryMacronsToUmlauts["R̄"] = "R";
//_dictionaryMacronsToUmlauts["r̄"] = "r";
//_dictionaryMacronsToUmlauts["Ṟ"] = "R";
//_dictionaryMacronsToUmlauts["ṟ"] = "r";
//_dictionaryMacronsToUmlauts["Ṝ"] = "R";
//_dictionaryMacronsToUmlauts["ṝ"] = "r";
//_dictionaryMacronsToUmlauts["Ṯ"] = "T";
//_dictionaryMacronsToUmlauts["ṯ"] = "t";
_dictionaryMacronsToUmlauts["Ū"] = "Ü";
_dictionaryMacronsToUmlauts["ū"] = "ü";
_dictionaryMacronsToUmlauts["Ū́"] = "Ü";
_dictionaryMacronsToUmlauts["ū́"] = "ü";
_dictionaryMacronsToUmlauts["Ū̀"] = "Ü";
_dictionaryMacronsToUmlauts["ū̀"] = "ü";
_dictionaryMacronsToUmlauts["Ū̂"] = "Ü";
_dictionaryMacronsToUmlauts["ū̂"] = "ü";
_dictionaryMacronsToUmlauts["Ū͂"] = "Ü";
_dictionaryMacronsToUmlauts["ū͂"] = "ü";
_dictionaryMacronsToUmlauts["Ǖ"] = "Ü";
_dictionaryMacronsToUmlauts["ǖ"] = "ü";
_dictionaryMacronsToUmlauts["Ṻ"] = "Ü";
_dictionaryMacronsToUmlauts["ṻ"] = "ü";
_dictionaryMacronsToUmlauts["Ȳ"] = "Ÿ";
_dictionaryMacronsToUmlauts["ȳ"] = "ÿ";
_dictionaryMacronsToUmlauts["Ȳ́"] = "Ÿ";
_dictionaryMacronsToUmlauts["ȳ́"] = "ÿ";
_dictionaryMacronsToUmlauts["Ȳ̀"] = "Ÿ";
_dictionaryMacronsToUmlauts["ȳ̀"] = "ÿ";
_dictionaryMacronsToUmlauts["Ȳ̂"] = "Ÿ";
_dictionaryMacronsToUmlauts["ȳ̂"] = "ÿ";
_dictionaryMacronsToUmlauts["Ȳ͂"] = "Ÿ";
_dictionaryMacronsToUmlauts["ȳ͂"] = "ÿ";
//_dictionaryMacronsToUmlauts["Ẕ"] = "Z";
//_dictionaryMacronsToUmlauts["ẕ"] = "z";
// _dictionaryMacronsToUmlauts["Æ"] = "_";
// _dictionaryMacronsToUmlauts["æ"] = "_";
// _dictionaryMacronsToUmlauts["Ø"] = "_";
// _dictionaryMacronsToUmlauts["ø"] = "_";
_dictionaryMacronsToEscaped["Ā"] = "[{A}]";
_dictionaryMacronsToEscaped["ā"] = "[{a}]";
_dictionaryMacronsToEscaped["Ē"] = "[{E}]";
_dictionaryMacronsToEscaped["ē"] = "[{e}]";
_dictionaryMacronsToEscaped["Ī"] = "[{I}]";
_dictionaryMacronsToEscaped["ī"] = "[{i}]";
_dictionaryMacronsToEscaped["Ō"] = "[{O}]";
_dictionaryMacronsToEscaped["ō"] = "[{o}]";
_dictionaryMacronsToEscaped["Ū"] = "[{U}]";
_dictionaryMacronsToEscaped["ū"] = "[{u}]";
_dictionaryMacronsToEscaped["Ȳ"] = "[{Y}]";
_dictionaryMacronsToEscaped["ȳ"] = "[{y}]";
_dictionaryUmlautsToEscaped["Ä"] = "[{A}]";
_dictionaryUmlautsToEscaped["ä"] = "[{a}]";
_dictionaryUmlautsToEscaped["Ë"] = "[{E}]";
_dictionaryUmlautsToEscaped["ë"] = "[{e}]";
_dictionaryUmlautsToEscaped["Ï"] = "[{I}]";
_dictionaryUmlautsToEscaped["i"] = "[{i}]";
_dictionaryUmlautsToEscaped["Ö"] = "[{O}]";
_dictionaryUmlautsToEscaped["ö"] = "[{o}]";
_dictionaryUmlautsToEscaped["Ü"] = "[{U}]";
_dictionaryUmlautsToEscaped["ü"] = "[{u}]";
_dictionaryUmlautsToEscaped["Ÿ"] = "[{Y}]";
_dictionaryUmlautsToEscaped["ÿ"] = "[{y}]";
_dictionaryEscapedToUmlaut["[{A}]"] = "Ä";
_dictionaryEscapedToUmlaut["[{a}]"] = "ä";
_dictionaryEscapedToUmlaut["[{E}]"] = "Ë";
_dictionaryEscapedToUmlaut["[{e}]"] = "ë";
_dictionaryEscapedToUmlaut["[{I}]"] = "Ï";
_dictionaryEscapedToUmlaut["[{i}]"] = "i";
_dictionaryEscapedToUmlaut["[{O}]"] = "Ö";
_dictionaryEscapedToUmlaut["[{o}]"] = "ö";
_dictionaryEscapedToUmlaut["[{U}]"] = "Ü";
_dictionaryEscapedToUmlaut["[{u}]"] = "ü";
_dictionaryEscapedToUmlaut["[{Y}]"] = "Ÿ";
_dictionaryEscapedToUmlaut["[{y}]"] = "ÿ";
}
public static void Test(this string input){
input.Dump();
input.RemoveAccents(true).Dump();
}
public static string RemoveAccents (this string input, bool saveMacrons)
{
if (saveMacrons){
return input.RemoveMostDiacritics();
}
input = input.Replace("Ǽ","_").Replace("ǽ","_").Replace("Ǿ","_").Replace("ǿ","_");
string normalized = input.Normalize(NormalizationForm.FormKD);
Encoding removal = Encoding.GetEncoding(Encoding.ASCII.CodePage,
new EncoderReplacementFallback(""),
new DecoderReplacementFallback(""));
byte[] bytes = removal.GetBytes(normalized);
string result = Encoding.ASCII.GetString(bytes);
return result;
}
public static string RemoveMostDiacritics(this string text){
StringBuilder stringBuilder = new StringBuilder();
//Iterate through each letter:
foreach(char c in text)
{
string singleLetter = c.ToString();
string replacement;
if (_dictionaryMacronsToUmlauts.TryGetValue(singleLetter,out replacement)){
stringBuilder.Append(replacement);
continue;
}
if (_dictionaryUmlautsToEscaped.TryGetValue(singleLetter,out replacement)){
stringBuilder.Append(singleLetter);
continue;
}
//String normalizedLetter = s.Normalize(NormalizationForm.FormKD);
String normalizedString = singleLetter.Normalize(NormalizationForm.FormD);
foreach(char normalizedChar in normalizedString){
if (System.Globalization.CharUnicodeInfo.GetUnicodeCategory(normalizedChar) != System.Globalization.UnicodeCategory.NonSpacingMark){
stringBuilder.Append(normalizedChar);
}
}
}
return stringBuilder.ToString();
}
}