Nástroje používateľa

Nástoje správy stránok


blog:odborny:2019-06-28-unicode_normalization_tool

Rozdiely

Tu môžete vidieť rozdiely medzi vybranou verziou a aktuálnou verziou danej stránky.

Odkaz na tento prehľad zmien

Obojstranná predošlá revíziaPredchádzajúca revízia
Nasledujúca revízia
Predchádzajúca revízia
blog:odborny:2019-06-28-unicode_normalization_tool [2022/03/03 12:22] – stará verzia bola obnovená (2020/09/10 12:07) Róbert Tothblog:odborny:2019-06-28-unicode_normalization_tool [2025/01/20 20:02] (aktuálne) – [Source Text] Róbert Toth
Riadok 4: Riadok 4:
  
  
-===== Source string ===== +===== Source Text =====
- +
-<html>+
  
 +<HTML>
 <script type="text/javascript"> <script type="text/javascript">
-(function(){+ 
 +function sleep(ms) { 
 +  return new Promise(resolve => setTimeout(resolve, ms)); 
 +
 +window.onload = function() { 
 +  sleep(1000).then(() => { 
 +    document.getElementById('utfSource').focus(); 
 +  }); 
 +  sleep(2000).then(() => { 
 +    document.getElementById('utfSource').focus(); 
 +  }); 
 +  sleep(3000).then(() => { 
 +    document.getElementById('utfSource').focus(); 
 +  }); 
   "use strict";   "use strict";
  
Riadok 30: Riadok 43:
     String.prototype.escapeHtml = escapeHtml;     String.prototype.escapeHtml = escapeHtml;
   }   }
-})();+};
  
 function utfRecalculate() { function utfRecalculate() {
Riadok 46: Riadok 59:
   var sourceForm = '<b>Source text encoding:</b> ';   var sourceForm = '<b>Source text encoding:</b> ';
   if (allForms == 0) {   if (allForms == 0) {
-    sourceForm += 'source text was not in Unicode.';+    sourceForm += 'not in single Unicode normalisation form (or not encoded in Unicode at all).';
   }   }
   else if (isNFD && isNFKD && allForms == 2) {   else if (isNFD && isNFKD && allForms == 2) {
Riadok 85: Riadok 98:
 function utfAnalysis() { function utfAnalysis() {
   var unicode = unicode12_1_0;   var unicode = unicode12_1_0;
-  var sourceText = document.getElementById('utfResult').value;+  var sourceSelect = document.querySelector('input[name="utfAnalysisSource"]:checked').value; 
 +  var sourceText = (sourceSelect === 'original' 
 +                    ? document.getElementById('utfSource').value 
 +                    : document.getElementById('utfResult').value);
   var resultTextEl = document.getElementById('utfAnalysisText');   var resultTextEl = document.getElementById('utfAnalysisText');
   var resultCharEl = document.getElementById('utfAnalysisCharacters');   var resultCharEl = document.getElementById('utfAnalysisCharacters');
Riadok 94: Riadok 110:
      
   // count chars and words   // count chars and words
-  analysisText += '<p><div>Characters: ' + sourceText.length + '</div>'; +  analysisText += '<div>Characters: ' + sourceText.length + '</div>'; 
-  analysisText += '<div>Words: ' + (sourceText.match(/[^\s]+/g) || new Array()).length + '</div></p>';+  analysisText += '<div>Words: ' + (sourceText.match(/[^\s]+/g) || new Array()).length + '</div>'; 
 +   
 +  analysisText += '<div id="utfAnalysisText_content">';
      
   let iterator = sourceText[Symbol.iterator]();   let iterator = sourceText[Symbol.iterator]();
Riadok 113: Riadok 131:
     index++;     index++;
   }   }
 +  
 +  analysisText += '</div>';
      
   // display text rundown   // display text rundown
Riadok 132: Riadok 152:
  
 </script> </script>
- 
-<textarea id='utfSource' oninput='utfRecalculate()' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120'></textarea> 
- 
-<div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div> 
- 
-</html> 
  
- 
-===== Normalized text ===== 
- 
-<html> 
- 
-<div style="float:left; width:49%;"> 
-  <dl> 
-    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFD"> NFD</dt> 
-    <dd>Canonical Decomposition</dd> 
-    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFC" checked="checked"> NFC</dt> 
-    <dd>Canonical Decomposition, followed by Canonical Composition</dd> 
-  </dl> 
-</div> 
-<div style="float:right; width:49%;"> 
-  <dl> 
-    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFKD"> NFKD</dt> 
-    <dd>Compatibility Decomposition</dd> 
-    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFKC"> NFKC</dt> 
-    <dd>Compatibility Decomposition, followed by Canonical Composition</dd> 
-  </dl> 
-</div> 
- 
-<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120' readonly="readonly"></textarea> 
- 
-</html> 
- 
-===== Text rundown ===== 
- 
-<html> 
  
 <style type="text/css"> <style type="text/css">
-  #utfAnalysisText > span {+  #utfAnalysisText 
 +    clear: both; 
 +  } 
 +  #utfAnalysisText_content { 
 +    margin: 1.5em 0.5em; 
 +    float: left; 
 +  } 
 +  #utfAnalysisText_content > span {
     position: relative;     position: relative;
     display: inline-block;     display: inline-block;
Riadok 181: Riadok 173:
     padding-bottom:4px;     padding-bottom:4px;
     vertical-align: bottom;     vertical-align: bottom;
 +    float: left;
     /*cursor: nw-resize; /* nw-resize or text or crosshair */     /*cursor: nw-resize; /* nw-resize or text or crosshair */
   }   }
-  #utfAnalysisText > span:first-of-type {+  #utfAnalysisText_content > span:first-of-type {
     box-shadow: -2px -2px 2px rgb(127, 43, 1);     box-shadow: -2px -2px 2px rgb(127, 43, 1);
   }   }
-  #utfAnalysisText > span:last-of-type {+  #utfAnalysisText_content > span:last-of-type {
     box-shadow: 2px 2px 2px rgb(127, 43, 1);     box-shadow: 2px 2px 2px rgb(127, 43, 1);
   }   }
-  #utfAnalysisText > span:hover {+  #utfAnalysisText_content > span[data-title$="0x000A"] { 
 +    border-right: 2px solid rgb(127, 43, 1); 
 +    border-top-right-radius: 50%; 
 +  } 
 +  #utfAnalysisText_content > span[data-title$="0x000A"] + span { 
 +    clear: left; 
 +  } 
 +  #utfAnalysisText_content > span:hover {
     background-color: rgba(255, 0, 0, 0.5);     background-color: rgba(255, 0, 0, 0.5);
     padding-bottom:0px;     padding-bottom:0px;
Riadok 195: Riadok 195:
     overflow-x: visible;     overflow-x: visible;
   }   }
-  #utfAnalysisText > span:hover::after {+  #utfAnalysisText_content > span:hover::after {
     content: attr(data-title);     content: attr(data-title);
     position: absolute;     position: absolute;
Riadok 223: Riadok 223:
 </style> </style>
  
-<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div>+<textarea id='utfSource' oninput='utfRecalculate()' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='15' cols='120' autofocus></textarea>
  
-</html>+<div id='utfSourceForm'><b>Source text encoding:</b> undetermined</div> 
 + 
 +</HTML> 
 + 
 + 
 +===== Normalized Text ===== 
 + 
 +<HTML> 
 + 
 +<div style="float:left; width:49%;"> 
 +  <dl> 
 +    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFD"> NFD</dt> 
 +    <dd>Canonical Decomposition</dd> 
 +    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFC" checked="checked"> NFC</dt> 
 +    <dd>Canonical Decomposition, followed by Canonical Composition</dd> 
 +  </dl> 
 +</div> 
 +<div style="float:right; width:49%;"> 
 +  <dl> 
 +    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFKD"> NFKD</dt> 
 +    <dd>Compatibility Decomposition</dd> 
 +    <dt><input type="radio" name="utfForm" onchange="utfRecalculate()" value="NFKC"> NFKC</dt> 
 +    <dd>Compatibility Decomposition, followed by Canonical Composition</dd> 
 +  </dl> 
 +</div> 
 + 
 +<textarea id='utfResult' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;' rows='10' cols='120' readonly="readonly"></textarea> 
 + 
 +</HTML> 
 + 
 +===== Text Analysis ===== 
 + 
 +<HTML> 
 + 
 +<div style="float:left; width:49%;"> 
 +  <dl> 
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="original" checked="checked"> Source text</dt> 
 +    <dd>Analyse original string as entered</dd> 
 +    <dt><input type="radio" name="utfAnalysisSource" onchange="utfAnalysis()" value="normalised"> Result text</dt> 
 +    <dd>Analyse the resulting string after normalisation</dd> 
 +  </dl> 
 +</div> 
 + 
 +</HTML> 
 + 
 +==== Text Rundown ==== 
 + 
 +<HTML> 
 +<div id='utfAnalysisText' style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:11px;'></div> 
 +</HTML>
  
  
-===== Analysis of Characters =====+==== Statistics of Characters ====
  
-<html>+<HTML>
  
 <table id='utfAnalysisCharacters' class="inline" style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:10px;'></table> <table id='utfAnalysisCharacters' class="inline" style='width:100%; font-family:Consolas, "Andale Mono WT", "Andale Mono", "Bitstream Vera Sans Mono", "Nimbus Mono L", Monaco, "Courier New", monospace; font-size:10px;'></table>
  
-</html>+</HTML>
  
  
Riadok 249: Riadok 298:
 ~~DISQUS~~ ~~DISQUS~~
  
-<html>+<HTML>
 <script type="text/javascript"> <script type="text/javascript">
  
Riadok 33100: Riadok 33149:
  
 </script> </script>
-</html>+</HTML>
  
blog/odborny/2019-06-28-unicode_normalization_tool.1646306567.txt.gz · Posledná úprava: 2022/03/03 12:22 od Róbert Toth