it:ad:diacritics:home

IT:AD:Diacritics

void Main()
	{
	
	string input = "Theȳ";
	input.Length.Dump();
	input.ToCharArray().Length.Dump();
	input = input.Normalize(NormalizationForm.FormD);
	input.Length.Dump();
	input.ToCharArray().Length.Dump();

  for (int i = 0; i < input.Length; i++)
  {
    Char c = input[i];
    if (System.Globalization.CharUnicodeInfo.GetUnicodeCategory(c) != System.Globalization.UnicodeCategory.NonSpacingMark)
      c.Dump();
  }
	
	
	input = "ŠĐĆŽ-šđčćž";
	
	"ÂâĈĉÊêĜĝĤĥÎîĴĵÔôŜŝÛûŴŵXxŶŷẐẑ".Test();
	"ÀàÈèÌìÒòÙùǸǹẀẁỲỳ".Test();
	"ÁáÉéÍíÓóÚúÝýǼǽǾǿĆćǴǵḰḱĹĺḾḿŃńṔṕŔশẂẃŹź".Test();
	
	"Ç窺".Test();
	
	"ĀāĀ́ā́Ā̀ā̀Ā̂ā̂Ā͂ā͂ǞǟǠǡĒēḖḗḔḕĒ̂ē̂Ē͂ē͂ĪīĪ́ī́Ī̀ī̀Ī̂ī̂Ī͂ī͂ŌōṒṓṐṑŌ̂ō̂Ō͂ō͂ȪȫǬǭȬȭȰȱŪūŪ́ū́Ū̀ū̀Ū̂ū̂Ū͂ū͂ǕǖṺṻȲȳȲ́ȳ́Ȳ̀ȳ̀Ȳ̂ȳ̂Ȳ͂ȳ͂ÆæØø".Test();
	"ĀāĒēĪīŌōŪūȲȳÆæØø".Test();
	
	"ÿÿÿÿÿÿÿÿÿÿÿÂâĈĉÊêĜĝĤĥÎîĴĵÔôŜŝÛûŴŵXxŶŷẐẑĀāĒēĪīŌōŪūȲȳÆæØø".Test();
	
	}
	
	// Define other methods and classes here
	
	public static class X{
	
			static Dictionary<string,string> _dictionaryMacronsToUmlauts = new Dictionary<string,string>();
			static Dictionary<string,string> _dictionaryMacronsToEscaped = new Dictionary<string,string>();
			static Dictionary<string,string> _dictionaryEscapedToUmlaut = new Dictionary<string,string>();
			
			static Dictionary<string,string> _dictionaryUmlautsToEscaped = new Dictionary<string, string>();
	
	static X(){
	
		_dictionaryMacronsToUmlauts["Ā"] = "Ä";
		_dictionaryMacronsToUmlauts["ā"] = "ä";
		_dictionaryMacronsToUmlauts["Ā́"] = "Ä";
		_dictionaryMacronsToUmlauts["ā́"] = "ä";
		_dictionaryMacronsToUmlauts["Ā̀"] = "Ä";
		_dictionaryMacronsToUmlauts["ā̀"] = "ä";
		_dictionaryMacronsToUmlauts["Ā̂"] = "Ä";
		_dictionaryMacronsToUmlauts["ā̂"] = "ä";
		_dictionaryMacronsToUmlauts["Ā͂"] = "Ä";
		_dictionaryMacronsToUmlauts["ā͂"] = "ä";
		_dictionaryMacronsToUmlauts["Ǟ"] = "Ä";
		_dictionaryMacronsToUmlauts["ǟ"] = "ä";
		_dictionaryMacronsToUmlauts["Ǡ"] = "Ä";
		_dictionaryMacronsToUmlauts["ǡ"] = "ä";
		//_dictionaryMacronsToUmlauts["Ǣ"]="";
		//_dictionaryMacronsToUmlauts["ǣ"]="";
		//_dictionaryMacronsToUmlauts["Ḇ"] = "B";
		//_dictionaryMacronsToUmlauts["ḇ"] = "b";
		//_dictionaryMacronsToUmlauts["C̄"] = "C";
		//_dictionaryMacronsToUmlauts["c̄"] = "c";
		//_dictionaryMacronsToUmlauts["Ḏ"] = "D";
		//_dictionaryMacronsToUmlauts["ḏ"] = "d";
		_dictionaryMacronsToUmlauts["Ē"] = "Ë";
		_dictionaryMacronsToUmlauts["ē"] = "ë";
		_dictionaryMacronsToUmlauts["Ḗ"] = "Ë";
		_dictionaryMacronsToUmlauts["ḗ"] = "ë";
		_dictionaryMacronsToUmlauts["Ḕ"] = "Ë";
		_dictionaryMacronsToUmlauts["ḕ"] = "ë";
		_dictionaryMacronsToUmlauts["Ē̂"] = "Ë";
		_dictionaryMacronsToUmlauts["ē̂"] = "ë";
		_dictionaryMacronsToUmlauts["Ē͂"] = "Ë";
		_dictionaryMacronsToUmlauts["ē͂"] = "ë";
		//_dictionaryMacronsToUmlauts["Ḡ"] = "G";
		//_dictionaryMacronsToUmlauts["ḡ"] = "g";
		//_dictionaryMacronsToUmlauts["H̱"] = "H";
		//_dictionaryMacronsToUmlauts["ẖ"] = "h";
		_dictionaryMacronsToUmlauts["Ī"] = "Ï";
		_dictionaryMacronsToUmlauts["ī"] = "i";
		_dictionaryMacronsToUmlauts["Ī́"] = "Ï";
		_dictionaryMacronsToUmlauts["ī́"] = "i";
		_dictionaryMacronsToUmlauts["Ī̀"] = "Ï";
		_dictionaryMacronsToUmlauts["ī̀"] = "i";
		_dictionaryMacronsToUmlauts["Ī̂"] = "Ï";
		_dictionaryMacronsToUmlauts["ī̂"] = "i";
		_dictionaryMacronsToUmlauts["Ī͂"] = "Ï";
		_dictionaryMacronsToUmlauts["ī͂"] = "i";
		//_dictionaryMacronsToUmlauts["Ḵ"] = "K";
		//_dictionaryMacronsToUmlauts["ḵ"] = "k";
		//_dictionaryMacronsToUmlauts["Ḻ"] = "L";
		//_dictionaryMacronsToUmlauts["ḻ"] = "l";
		//_dictionaryMacronsToUmlauts["Ḹ"] = "L";
		//_dictionaryMacronsToUmlauts["ḹ"] = "l";
		//_dictionaryMacronsToUmlauts["M̄"] = "M";
		//_dictionaryMacronsToUmlauts["m̄"] = "m";
		//_dictionaryMacronsToUmlauts["Ṉ"] = "N";
		//_dictionaryMacronsToUmlauts["ṉ"] = "n";
		//_dictionaryMacronsToUmlauts["N̄"] = "N";
		//_dictionaryMacronsToUmlauts["n̄"] = "n";
		_dictionaryMacronsToUmlauts["Ō"] = "Ö";
		_dictionaryMacronsToUmlauts["ō"] = "ö";
		_dictionaryMacronsToUmlauts["Ṓ"] = "Ö";
		_dictionaryMacronsToUmlauts["ṓ"] = "ö";
		_dictionaryMacronsToUmlauts["Ṑ"] = "Ö";
		_dictionaryMacronsToUmlauts["ṑ"] = "ö";
		_dictionaryMacronsToUmlauts["Ō̂"] = "Ö";
		_dictionaryMacronsToUmlauts["ō̂"] = "ö";
		_dictionaryMacronsToUmlauts["Ō͂"] = "Ö";
		_dictionaryMacronsToUmlauts["ō͂"] = "ö";
		_dictionaryMacronsToUmlauts["Ȫ"] = "Ö";
		_dictionaryMacronsToUmlauts["ȫ"] = "ö";
		_dictionaryMacronsToUmlauts["Ǭ"] = "Ö";
		_dictionaryMacronsToUmlauts["ǭ"] = "ö";
		_dictionaryMacronsToUmlauts["Ȭ"] = "Ö";
		_dictionaryMacronsToUmlauts["ȭ"] = "ö";
		_dictionaryMacronsToUmlauts["Ȱ"] = "Ö";
		_dictionaryMacronsToUmlauts["ȱ"] = "ö";
		//_dictionaryMacronsToUmlauts["R̄"] = "R";
		//_dictionaryMacronsToUmlauts["r̄"] = "r";
		//_dictionaryMacronsToUmlauts["Ṟ"] = "R";
		//_dictionaryMacronsToUmlauts["ṟ"] = "r";
		//_dictionaryMacronsToUmlauts["Ṝ"] = "R";
		//_dictionaryMacronsToUmlauts["ṝ"] = "r";
		//_dictionaryMacronsToUmlauts["Ṯ"] = "T";
		//_dictionaryMacronsToUmlauts["ṯ"] = "t";
		_dictionaryMacronsToUmlauts["Ū"] = "Ü";
		_dictionaryMacronsToUmlauts["ū"] = "ü";
		_dictionaryMacronsToUmlauts["Ū́"] = "Ü";
		_dictionaryMacronsToUmlauts["ū́"] = "ü";
		_dictionaryMacronsToUmlauts["Ū̀"] = "Ü";
		_dictionaryMacronsToUmlauts["ū̀"] = "ü";
		_dictionaryMacronsToUmlauts["Ū̂"] = "Ü";
		_dictionaryMacronsToUmlauts["ū̂"] = "ü";
		_dictionaryMacronsToUmlauts["Ū͂"] = "Ü";
		_dictionaryMacronsToUmlauts["ū͂"] = "ü";
		_dictionaryMacronsToUmlauts["Ǖ"] = "Ü";
		_dictionaryMacronsToUmlauts["ǖ"] = "ü";
		_dictionaryMacronsToUmlauts["Ṻ"] = "Ü";
		_dictionaryMacronsToUmlauts["ṻ"] = "ü";
		_dictionaryMacronsToUmlauts["Ȳ"] = "Ÿ";
		_dictionaryMacronsToUmlauts["ȳ"] = "ÿ";
		_dictionaryMacronsToUmlauts["Ȳ́"] = "Ÿ";
		_dictionaryMacronsToUmlauts["ȳ́"] = "ÿ";
		_dictionaryMacronsToUmlauts["Ȳ̀"] = "Ÿ";
		_dictionaryMacronsToUmlauts["ȳ̀"] = "ÿ";
		_dictionaryMacronsToUmlauts["Ȳ̂"] = "Ÿ";
		_dictionaryMacronsToUmlauts["ȳ̂"] = "ÿ";
		_dictionaryMacronsToUmlauts["Ȳ͂"] = "Ÿ";
		_dictionaryMacronsToUmlauts["ȳ͂"] = "ÿ";
	
		//_dictionaryMacronsToUmlauts["Ẕ"] = "Z";
		//_dictionaryMacronsToUmlauts["ẕ"] = "z";
			
	//       _dictionaryMacronsToUmlauts["Æ"] = "_";
	//     _dictionaryMacronsToUmlauts["æ"] = "_";
		//   _dictionaryMacronsToUmlauts["Ø"] = "_";
		// _dictionaryMacronsToUmlauts["ø"] = "_";
		
		
			_dictionaryMacronsToEscaped["Ā"] = "[{A}]";
		_dictionaryMacronsToEscaped["ā"] = "[{a}]";
		_dictionaryMacronsToEscaped["Ē"] = "[{E}]";
		_dictionaryMacronsToEscaped["ē"] = "[{e}]";
		_dictionaryMacronsToEscaped["Ī"] = "[{I}]";
		_dictionaryMacronsToEscaped["ī"] = "[{i}]";
		_dictionaryMacronsToEscaped["Ō"] = "[{O}]";
		_dictionaryMacronsToEscaped["ō"] = "[{o}]";
		_dictionaryMacronsToEscaped["Ū"] = "[{U}]";
		_dictionaryMacronsToEscaped["ū"] = "[{u}]";
		_dictionaryMacronsToEscaped["Ȳ"] = "[{Y}]";
		_dictionaryMacronsToEscaped["ȳ"] = "[{y}]";


		_dictionaryUmlautsToEscaped["Ä"] = "[{A}]";
		_dictionaryUmlautsToEscaped["ä"] = "[{a}]";
		_dictionaryUmlautsToEscaped["Ë"] = "[{E}]";
		_dictionaryUmlautsToEscaped["ë"] = "[{e}]";
		_dictionaryUmlautsToEscaped["Ï"] = "[{I}]";
		_dictionaryUmlautsToEscaped["i"] = "[{i}]";
		_dictionaryUmlautsToEscaped["Ö"] = "[{O}]";
		_dictionaryUmlautsToEscaped["ö"] = "[{o}]";
		_dictionaryUmlautsToEscaped["Ü"] = "[{U}]";
		_dictionaryUmlautsToEscaped["ü"] = "[{u}]";
		_dictionaryUmlautsToEscaped["Ÿ"] = "[{Y}]";
		_dictionaryUmlautsToEscaped["ÿ"] = "[{y}]";

	
	
		_dictionaryEscapedToUmlaut["[{A}]"] = "Ä";
		_dictionaryEscapedToUmlaut["[{a}]"] = "ä";
		_dictionaryEscapedToUmlaut["[{E}]"] = "Ë";
		_dictionaryEscapedToUmlaut["[{e}]"] = "ë";
		_dictionaryEscapedToUmlaut["[{I}]"] = "Ï";
		_dictionaryEscapedToUmlaut["[{i}]"] = "i";
		_dictionaryEscapedToUmlaut["[{O}]"] = "Ö";
		_dictionaryEscapedToUmlaut["[{o}]"] = "ö";
		_dictionaryEscapedToUmlaut["[{U}]"] = "Ü";
		_dictionaryEscapedToUmlaut["[{u}]"] = "ü";
		_dictionaryEscapedToUmlaut["[{Y}]"] = "Ÿ";
		_dictionaryEscapedToUmlaut["[{y}]"] = "ÿ";
	
	}
		public static void Test(this string input){
			input.Dump();
			input.RemoveAccents(true).Dump();
		}
	
		public static string RemoveAccents (this string input, bool saveMacrons)
		{
			if (saveMacrons){
				return input.RemoveMostDiacritics();
			}
			
			input = input.Replace("Ǽ","_").Replace("ǽ","_").Replace("Ǿ","_").Replace("ǿ","_");
			string normalized = input.Normalize(NormalizationForm.FormKD);
			Encoding removal = Encoding.GetEncoding(Encoding.ASCII.CodePage,
													new EncoderReplacementFallback(""),
													new DecoderReplacementFallback(""));
			byte[] bytes = removal.GetBytes(normalized);
			
			string result = Encoding.ASCII.GetString(bytes);
	
			return result;
		}
	
	
			public static string RemoveMostDiacritics(this string text){
				StringBuilder stringBuilder = new StringBuilder();
	
				//Iterate through each letter:
				foreach(char c in text)
				{
					string singleLetter = c.ToString();
					string replacement;
					if (_dictionaryMacronsToUmlauts.TryGetValue(singleLetter,out replacement)){
						stringBuilder.Append(replacement);
						continue;
					}
					if (_dictionaryUmlautsToEscaped.TryGetValue(singleLetter,out replacement)){
						stringBuilder.Append(singleLetter);
						continue;
					}
					//String normalizedLetter = s.Normalize(NormalizationForm.FormKD);
					String normalizedString = singleLetter.Normalize(NormalizationForm.FormD);
					
					foreach(char normalizedChar in normalizedString){
						if (System.Globalization.CharUnicodeInfo.GetUnicodeCategory(normalizedChar) != System.Globalization.UnicodeCategory.NonSpacingMark){
							stringBuilder.Append(normalizedChar);
						}
					}
				}
				return stringBuilder.ToString();
			}
		}

  • /home/skysigal/public_html/data/pages/it/ad/diacritics/home.txt
  • Last modified: 2023/11/04 22:23
  • by 127.0.0.1