Nástroje používateľa

Nástoje správy stránok


blog:odborny:2019-06-28-unicode_normalization_tool

Rozdiely

Tu môžete vidieť rozdiely medzi vybranou verziou a aktuálnou verziou danej stránky.

Odkaz na tento prehľad zmien

Obojstranná predošlá revízia Predchádzajúca revízia
Nasledujúca revízia
Predchádzajúca revízia
blog:odborny:2019-06-28-unicode_normalization_tool [2020/03/05 13:59]
Róbert Toth [Other sources] case
blog:odborny:2019-06-28-unicode_normalization_tool [2022/10/24 11:12] (aktuálne)
Róbert Toth added possibility to select source of text analysis
Riadok 4: Riadok 4:
  
  
-===== Source string =====+===== Source Text =====
  
 <html> <html>
Riadok 46: Riadok 46:
   var sourceForm = '<b>Source text encoding:</b> ';   var sourceForm = '<b>Source text encoding:</b> ';
   if (allForms == 0) {   if (allForms == 0) {
-    sourceForm += 'source text was not in Unicode.';+    sourceForm += 'not in single Unicode normalisation form (or not encoded in Unicode at all).';
   }   }
   else if (isNFD && isNFKD && allForms == 2) {   else if (isNFD && isNFKD && allForms == 2) {
Riadok 68: Riadok 68:
   var resultText = sourceText.normalize(requiredForm);   var resultText = sourceText.normalize(requiredForm);
   resultEl.value = resultText;   resultEl.value = resultText;
 +  
 +  // continue by doing UTF analysis
 +  utfAnalysis();
 } }
  
Riadok 82: Riadok 85:
 function utfAnalysis() { function utfAnalysis() {
   var unicode = unicode12_1_0;   var unicode = unicode12_1_0;
-  var sourceText = document.getElementById('utfResult').value;+  var sourceSelect = document.querySelector('input[name="utfAnalysisSource"]:checked').value; 
 +  var sourceText = (sourceSelect === 'original' 
 +                    ? document.getElementById('utfSource').value 
 +                    : document.getElementById('utfResult').value);
   var resultTextEl = document.getElementById('utfAnalysisText');   var resultTextEl = document.getElementById('utfAnalysisText');
   var resultCharEl = document.getElementById('utfAnalysisCharacters');   var resultCharEl = document.getElementById('utfAnalysisCharacters');
Riadok 121: Riadok 127:
     var char = String.fromCodePoint(code);     var char = String.fromCodePoint(code);
     var name = (unicode[codeHex] || '[unknown]');     var name = (unicode[codeHex] || '[unknown]');
-    analysisChars += '<tr><td>'+char+'</td><td>0x'+codeHex+'</td><td>&amp;#'+code+';</td><td>'+name.escapeHtml()+'</td><td>'+positions.length+'</td><td>'+positions.join(" ")+'</td></tr>';+    analysisChars += '<tr><td>'+char+'</td><td>0x'+codeHex+'</td><td>&amp;#'+code+';</td><td>'+name.escapeHtml()+'</td><td>'+positions.length+'</td><td><div class="limitHeight">'+positions.join(" ")+'</div></td></tr>';
   }   }
      
Riadok 130: Riadok 136:
 </script> </script>
  
-<textarea id='utfSource' oninput='utfRecalculate();utfAnalysis();' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120'></textarea>+<textarea id='utfSource' oninput='utfRecalculate()' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120'></textarea>
  
 <div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div> <div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div>
Riadok 137: Riadok 143:
  
  
-===== Normalized text =====+===== Normalized Text =====
  
 <html> <html>
Riadok 158: Riadok 164:
 </div> </div>
  
-<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120'></textarea>+<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120' readonly="readonly"></textarea>
  
 </html> </html>
  
-===== Text rundown =====+===== Text Analysis =====
  
 <html> <html>
 +
 +<div style="float:left; width:49%;">
 +  <dl>
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="original" checked="checked"> Source text</dt>
 +    <dd>Analyse original string as entered</dd>
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="normalised"> Result text</dt>
 +    <dd>Analyse the resulting string after normalisation</dd>
 +  </dl>
 +</div>
  
 <style type="text/css"> <style type="text/css">
Riadok 207: Riadok 222:
     text-align: left;     text-align: left;
     text-transform: capitalize;     text-transform: capitalize;
 +  }
 +  
 +  div.limitHeight {
 +    overflow: auto;
 +  }
 +  div.limitHeight:hover {
 +    max-height: fit-content !important;
 +  }
 +  #utfAnalysisCharacters div.limitHeight {
 +    max-height: 3.5em;
   }   }
 </style> </style>
  
-<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div>+</html>
  
 +==== Text Rundown ====
 +
 +<html>
 +<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div>
 </html> </html>
  
  
-===== Analysis of Characters =====+==== Statistics of Characters ====
  
 <html> <html>
blog/odborny/2019-06-28-unicode_normalization_tool.1583413199.txt.gz · Posledná úprava: 2020/03/05 13:59 od Róbert Toth