Nástroje používateľa

Nástoje správy stránok


blog:odborny:2019-06-28-unicode_normalization_tool

Rozdiely

Tu môžete vidieť rozdiely medzi vybranou verziou a aktuálnou verziou danej stránky.

Odkaz na tento prehľad zmien

Obojstranná predošlá revízia Predchádzajúca revízia
Nasledujúca revízia
Predchádzajúca revízia
blog:odborny:2019-06-28-unicode_normalization_tool [2020/02/24 00:53]
Róbert Toth spacing
blog:odborny:2019-06-28-unicode_normalization_tool [2022/10/24 11:12] (aktuálne)
Róbert Toth added possibility to select source of text analysis
Riadok 4: Riadok 4:
  
  
-===== Source string =====+===== Source Text =====
  
-<php>?>+<html>
  
 <script type="text/javascript"> <script type="text/javascript">
Riadok 46: Riadok 46:
   var sourceForm = '<b>Source text encoding:</b> ';   var sourceForm = '<b>Source text encoding:</b> ';
   if (allForms == 0) {   if (allForms == 0) {
-    sourceForm += 'source text was not in Unicode.';+    sourceForm += 'not in single Unicode normalisation form (or not encoded in Unicode at all).';
   }   }
   else if (isNFD && isNFKD && allForms == 2) {   else if (isNFD && isNFKD && allForms == 2) {
Riadok 68: Riadok 68:
   var resultText = sourceText.normalize(requiredForm);   var resultText = sourceText.normalize(requiredForm);
   resultEl.value = resultText;   resultEl.value = resultText;
 +  
 +  // continue by doing UTF analysis
 +  utfAnalysis();
 } }
  
Riadok 82: Riadok 85:
 function utfAnalysis() { function utfAnalysis() {
   var unicode = unicode12_1_0;   var unicode = unicode12_1_0;
-  var sourceText = document.getElementById('utfResult').value;+  var sourceSelect = document.querySelector('input[name="utfAnalysisSource"]:checked').value; 
 +  var sourceText = (sourceSelect === 'original' 
 +                    ? document.getElementById('utfSource').value 
 +                    : document.getElementById('utfResult').value);
   var resultTextEl = document.getElementById('utfAnalysisText');   var resultTextEl = document.getElementById('utfAnalysisText');
   var resultCharEl = document.getElementById('utfAnalysisCharacters');   var resultCharEl = document.getElementById('utfAnalysisCharacters');
Riadok 121: Riadok 127:
     var char = String.fromCodePoint(code);     var char = String.fromCodePoint(code);
     var name = (unicode[codeHex] || '[unknown]');     var name = (unicode[codeHex] || '[unknown]');
-    analysisChars += '<tr><td>'+char+'</td><td>0x'+codeHex+'</td><td>&amp;#'+code+';</td><td>'+name.escapeHtml()+'</td><td>'+positions.length+'</td><td>'+positions.join(" ")+'</td></tr>';+    analysisChars += '<tr><td>'+char+'</td><td>0x'+codeHex+'</td><td>&amp;#'+code+';</td><td>'+name.escapeHtml()+'</td><td>'+positions.length+'</td><td><div class="limitHeight">'+positions.join(" ")+'</div></td></tr>';
   }   }
      
Riadok 130: Riadok 136:
 </script> </script>
  
-<textarea id='utfSource' oninput='utfRecalculate();utfAnalysis();' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120'></textarea>+<textarea id='utfSource' oninput='utfRecalculate()' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120'></textarea>
  
 <div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div> <div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div>
  
-<?php</php>+</html>
  
  
-===== Normalized text =====+===== Normalized Text =====
  
-<php>?>+<html>
  
-<dl+<div style="float:left; width:49%;"
-  <dt><input type='radioname='utfFormonchange='utfRecalculate()value='NFD'> NFD</dt> +  <dl> 
-  <dd>Canonical Decomposition</dd> +    <dt><input type="radioname="utfFormonchange="utfRecalculate()value="NFD"> NFD</dt> 
-  <dt><input type='radioname='utfFormonchange='utfRecalculate()value='NFCchecked='checked'> NFC</dt> +    <dd>Canonical Decomposition</dd> 
-  <dd>Canonical Decomposition, followed by Canonical Composition</br></dd+    <dt><input type="radioname="utfFormonchange="utfRecalculate()value="NFCchecked="checked"> NFC</dt> 
-  <dt><input type='radioname='utfFormonchange='utfRecalculate()value='NFKD'> NFKD</dt> +    <dd>Canonical Decomposition, followed by Canonical Composition</dd> 
-  <dd>Compatibility Decomposition</dd> +  </dl> 
-  <dt><input type='radioname='utfFormonchange='utfRecalculate()value='NFKC'> NFKC</dt> +</div> 
-  <dd>Compatibility Decomposition, followed by Canonical Composition</dd> +<div style="float:right; width:49%;"
-</dl>+  <dl> 
 +    <dt><input type="radioname="utfFormonchange="utfRecalculate()value="NFKD"> NFKD</dt> 
 +    <dd>Compatibility Decomposition</dd> 
 +    <dt><input type="radioname="utfFormonchange="utfRecalculate()value="NFKC"> NFKC</dt> 
 +    <dd>Compatibility Decomposition, followed by Canonical Composition</dd> 
 +  </dl
 +</div>
  
-<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120'></textarea>+<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120' readonly="readonly"></textarea>
  
-<?php</php>+</html>
  
-===== Text rundown =====+===== Text Analysis =====
  
-<php>?>+<html> 
 + 
 +<div style="float:left; width:49%;"> 
 +  <dl> 
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="original" checked="checked"> Source text</dt> 
 +    <dd>Analyse original string as entered</dd> 
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="normalised"> Result text</dt> 
 +    <dd>Analyse the resulting string after normalisation</dd> 
 +  </dl> 
 +</div>
  
 <style type="text/css"> <style type="text/css">
Riadok 201: Riadok 222:
     text-align: left;     text-align: left;
     text-transform: capitalize;     text-transform: capitalize;
 +  }
 +  
 +  div.limitHeight {
 +    overflow: auto;
 +  }
 +  div.limitHeight:hover {
 +    max-height: fit-content !important;
 +  }
 +  #utfAnalysisCharacters div.limitHeight {
 +    max-height: 3.5em;
   }   }
 </style> </style>
  
-<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div>+</html>
  
-<?php</php>+==== Text Rundown ==== 
 + 
 +<html> 
 +<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div> 
 +</html>
  
  
-===== Analysis of Characters =====+==== Statistics of Characters ====
  
-<php>?>+<html>
  
 <table id='utfAnalysisCharacters' class="inline" style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:10px;'></table> <table id='utfAnalysisCharacters' class="inline" style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:10px;'></table>
  
-<?php</php>+</html>
  
  
Riadok 223: Riadok 258:
  
 ~~socialite~~ ~~socialite~~
-{{tag>tools}}+{{tag>tools Unicode UTF-8}}
  
  
Riadok 230: Riadok 265:
 ~~DISQUS~~ ~~DISQUS~~
  
-<php>?>+<html>
 <script type="text/javascript"> <script type="text/javascript">
  
Riadok 33081: Riadok 33116:
  
 </script> </script>
-<?php</php>+</html>
  
blog/odborny/2019-06-28-unicode_normalization_tool.1582502013.txt.gz · Posledná úprava: 2020/02/24 00:53 od Róbert Toth