blog:odborny:2019-06-28-unicode_normalization_tool
Rozdiely
Tu môžete vidieť rozdiely medzi vybranou verziou a aktuálnou verziou danej stránky.
| Obojstranná predošlá revíziaPredchádzajúca revíziaNasledujúca revízia | Predchádzajúca revízia | ||
| blog:odborny:2019-06-28-unicode_normalization_tool [2019/11/10 03:27] – escaping, table cols reordered Róbert Toth | blog:odborny:2019-06-28-unicode_normalization_tool [2025/01/20 20:02] (aktuálne) – [Source Text] Róbert Toth | ||
|---|---|---|---|
| Riadok 1: | Riadok 1: | ||
| - | ====== Unicode Normalization Tool ====== | + | ====== Unicode Normalization |
| Tool to detect and convert between different [[https:// | Tool to detect and convert between different [[https:// | ||
| - | ===== Source | + | ===== Source |
| - | + | ||
| - | < | + | |
| + | < | ||
| <script type=" | <script type=" | ||
| - | (function(){ | + | |
| + | function sleep(ms) { | ||
| + | return new Promise(resolve => setTimeout(resolve, | ||
| + | } | ||
| + | window.onload = function() { | ||
| + | sleep(1000).then(() => { | ||
| + | document.getElementById(' | ||
| + | }); | ||
| + | sleep(2000).then(() => { | ||
| + | document.getElementById(' | ||
| + | }); | ||
| + | sleep(3000).then(() => { | ||
| + | document.getElementById(' | ||
| + | }); | ||
| "use strict"; | "use strict"; | ||
| Riadok 30: | Riadok 43: | ||
| String.prototype.escapeHtml = escapeHtml; | String.prototype.escapeHtml = escapeHtml; | ||
| } | } | ||
| - | })(); | + | }; |
| function utfRecalculate() { | function utfRecalculate() { | ||
| Riadok 46: | Riadok 59: | ||
| var sourceForm = '< | var sourceForm = '< | ||
| if (allForms == 0) { | if (allForms == 0) { | ||
| - | sourceForm += 'source text was not in Unicode.'; | + | sourceForm += 'not in single |
| } | } | ||
| else if (isNFD && isNFKD && allForms == 2) { | else if (isNFD && isNFKD && allForms == 2) { | ||
| Riadok 68: | Riadok 81: | ||
| var resultText = sourceText.normalize(requiredForm); | var resultText = sourceText.normalize(requiredForm); | ||
| resultEl.value = resultText; | resultEl.value = resultText; | ||
| + | | ||
| + | // continue by doing UTF analysis | ||
| + | utfAnalysis(); | ||
| } | } | ||
| Riadok 82: | Riadok 98: | ||
| function utfAnalysis() { | function utfAnalysis() { | ||
| var unicode = unicode12_1_0; | var unicode = unicode12_1_0; | ||
| - | var sourceText = document.getElementById(' | + | |
| + | | ||
| + | ? document.getElementById(' | ||
| + | : document.getElementById(' | ||
| var resultTextEl = document.getElementById(' | var resultTextEl = document.getElementById(' | ||
| var resultCharEl = document.getElementById(' | var resultCharEl = document.getElementById(' | ||
| Riadok 89: | Riadok 108: | ||
| var analysisChars = ''; | var analysisChars = ''; | ||
| var spottedChars = {}; | var spottedChars = {}; | ||
| + | | ||
| + | // count chars and words | ||
| + | analysisText += '< | ||
| + | analysisText += '< | ||
| + | | ||
| + | analysisText += '< | ||
| | | ||
| let iterator = sourceText[Symbol.iterator](); | let iterator = sourceText[Symbol.iterator](); | ||
| Riadok 96: | Riadok 121: | ||
| var codeHex = code.toString(16).padStart(4, | var codeHex = code.toString(16).padStart(4, | ||
| var name = (unicode[codeHex] || ' | var name = (unicode[codeHex] || ' | ||
| - | analysisText += '< | + | analysisText += '< |
| | | ||
| if (code in spottedChars) { | if (code in spottedChars) { | ||
| Riadok 106: | Riadok 131: | ||
| index++; | index++; | ||
| } | } | ||
| + | | ||
| + | analysisText += '</ | ||
| | | ||
| // display text rundown | // display text rundown | ||
| Riadok 111: | Riadok 138: | ||
| | | ||
| // create character analysis | // create character analysis | ||
| - | analysisChars += '< | + | analysisChars += '< |
| for (var code in spottedChars) { | for (var code in spottedChars) { | ||
| var codeHex = parseInt(code, | var codeHex = parseInt(code, | ||
| Riadok 117: | Riadok 144: | ||
| var char = String.fromCodePoint(code); | var char = String.fromCodePoint(code); | ||
| var name = (unicode[codeHex] || ' | var name = (unicode[codeHex] || ' | ||
| - | | + | analysisChars += '< |
| - | | + | |
| } | } | ||
| | | ||
| Riadok 127: | Riadok 153: | ||
| </ | </ | ||
| - | < | ||
| - | |||
| - | <div id=' | ||
| - | |||
| - | <dl> | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | < | ||
| - | </dl> | ||
| - | |||
| - | <? | ||
| - | |||
| - | |||
| - | ===== Result ===== | ||
| - | |||
| - | < | ||
| - | |||
| - | < | ||
| - | |||
| - | <? | ||
| - | |||
| - | ==== Text rundown ==== | ||
| - | |||
| - | < | ||
| <style type=" | <style type=" | ||
| - | # | + | # |
| + | clear: both; | ||
| + | } | ||
| + | # | ||
| + | margin: 1.5em 0.5em; | ||
| + | float: left; | ||
| + | } | ||
| + | # | ||
| position: relative; | position: relative; | ||
| display: inline-block; | display: inline-block; | ||
| Riadok 169: | Riadok 173: | ||
| padding-bottom: | padding-bottom: | ||
| vertical-align: | vertical-align: | ||
| + | float: left; | ||
| /*cursor: nw-resize; /* nw-resize or text or crosshair */ | /*cursor: nw-resize; /* nw-resize or text or crosshair */ | ||
| } | } | ||
| - | #utfAnalysisText | + | #utfAnalysisText_content |
| box-shadow: -2px -2px 2px rgb(127, 43, 1); | box-shadow: -2px -2px 2px rgb(127, 43, 1); | ||
| } | } | ||
| - | #utfAnalysisText | + | #utfAnalysisText_content |
| box-shadow: 2px 2px 2px rgb(127, 43, 1); | box-shadow: 2px 2px 2px rgb(127, 43, 1); | ||
| } | } | ||
| - | #utfAnalysisText | + | #utfAnalysisText_content > span[data-title$=" |
| + | border-right: | ||
| + | border-top-right-radius: | ||
| + | } | ||
| + | # | ||
| + | clear: left; | ||
| + | } | ||
| + | # | ||
| background-color: | background-color: | ||
| padding-bottom: | padding-bottom: | ||
| Riadok 183: | Riadok 195: | ||
| overflow-x: visible; | overflow-x: visible; | ||
| } | } | ||
| - | #utfAnalysisText | + | #utfAnalysisText_content |
| content: attr(data-title); | content: attr(data-title); | ||
| position: absolute; | position: absolute; | ||
| Riadok 189: | Riadok 201: | ||
| top: 30px; | top: 30px; | ||
| z-index: 99; | z-index: 99; | ||
| - | padding: 5px; | + | padding: |
| min-width: | min-width: | ||
| + | width: max-content; | ||
| + | max-width: 250px; | ||
| background-color: | background-color: | ||
| color: rgb(206, 199, 140); | color: rgb(206, 199, 140); | ||
| Riadok 196: | Riadok 210: | ||
| text-align: left; | text-align: left; | ||
| text-transform: | text-transform: | ||
| + | } | ||
| + | | ||
| + | div.limitHeight { | ||
| + | overflow: auto; | ||
| + | } | ||
| + | div.limitHeight: | ||
| + | max-height: fit-content !important; | ||
| + | } | ||
| + | # | ||
| + | max-height: 3.5em; | ||
| } | } | ||
| </ | </ | ||
| - | <div id='utfAnalysisText' style=' | + | <textarea |
| - | <?php</php> | + | <div id=' |
| + | |||
| + | </ | ||
| + | |||
| + | |||
| + | ===== Normalized Text ===== | ||
| + | |||
| + | < | ||
| + | |||
| + | <div style=" | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | <div style=" | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | |||
| + | </ | ||
| + | |||
| + | ===== Text Analysis ===== | ||
| + | |||
| + | < | ||
| + | |||
| + | <div style=" | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | |||
| + | </ | ||
| + | |||
| + | ==== Text Rundown ==== | ||
| + | |||
| + | < | ||
| + | <div id=' | ||
| + | </HTML> | ||
| - | ==== Analysis | + | ==== Statistics |
| - | <php>?> | + | <HTML> |
| <table id=' | <table id=' | ||
| - | <?php</php> | + | </HTML> |
| Riadok 218: | Riadok 291: | ||
| ~~socialite~~ | ~~socialite~~ | ||
| - | {{tag> | + | {{tag> |
| Riadok 225: | Riadok 298: | ||
| ~~DISQUS~~ | ~~DISQUS~~ | ||
| - | <php>?> | + | <HTML> |
| <script type=" | <script type=" | ||
| Riadok 33076: | Riadok 33149: | ||
| </ | </ | ||
| - | <?php</php> | + | </HTML> |
blog/odborny/2019-06-28-unicode_normalization_tool.1573352859.txt.gz · Posledná úprava: 2019/11/10 03:27 od Róbert Toth
