Tu môžete vidieť rozdiely medzi vybranou verziou a aktuálnou verziou danej stránky.
Obojstranná predošlá revízia Predchádzajúca revízia Nasledujúca revízia | Predchádzajúca revízia | ||
blog:odborny:2019-06-28-unicode_normalization_tool [2019/11/10 03:42] Róbert Toth renamed |
blog:odborny:2019-06-28-unicode_normalization_tool [2022/10/24 11:12] Róbert Toth added possibility to select source of text analysis |
||
---|---|---|---|
Riadok 4: | Riadok 4: | ||
- | ===== Source | + | ===== Source |
- | <php>?> | + | <html> |
<script type=" | <script type=" | ||
Riadok 46: | Riadok 46: | ||
var sourceForm = '< | var sourceForm = '< | ||
if (allForms == 0) { | if (allForms == 0) { | ||
- | sourceForm += 'source text was not in Unicode.'; | + | sourceForm += 'not in single |
} | } | ||
else if (isNFD && isNFKD && allForms == 2) { | else if (isNFD && isNFKD && allForms == 2) { | ||
Riadok 68: | Riadok 68: | ||
var resultText = sourceText.normalize(requiredForm); | var resultText = sourceText.normalize(requiredForm); | ||
resultEl.value = resultText; | resultEl.value = resultText; | ||
+ | | ||
+ | // continue by doing UTF analysis | ||
+ | utfAnalysis(); | ||
} | } | ||
Riadok 82: | Riadok 85: | ||
function utfAnalysis() { | function utfAnalysis() { | ||
var unicode = unicode12_1_0; | var unicode = unicode12_1_0; | ||
- | var sourceText = document.getElementById(' | + | |
+ | | ||
+ | ? document.getElementById(' | ||
+ | : document.getElementById(' | ||
var resultTextEl = document.getElementById(' | var resultTextEl = document.getElementById(' | ||
var resultCharEl = document.getElementById(' | var resultCharEl = document.getElementById(' | ||
Riadok 89: | Riadok 95: | ||
var analysisChars = ''; | var analysisChars = ''; | ||
var spottedChars = {}; | var spottedChars = {}; | ||
+ | | ||
+ | // count chars and words | ||
+ | analysisText += '< | ||
+ | analysisText += '< | ||
| | ||
let iterator = sourceText[Symbol.iterator](); | let iterator = sourceText[Symbol.iterator](); | ||
Riadok 96: | Riadok 106: | ||
var codeHex = code.toString(16).padStart(4, | var codeHex = code.toString(16).padStart(4, | ||
var name = (unicode[codeHex] || ' | var name = (unicode[codeHex] || ' | ||
- | analysisText += '< | + | analysisText += '< |
| | ||
if (code in spottedChars) { | if (code in spottedChars) { | ||
Riadok 111: | Riadok 121: | ||
| | ||
// create character analysis | // create character analysis | ||
- | analysisChars += '< | + | analysisChars += '< |
for (var code in spottedChars) { | for (var code in spottedChars) { | ||
var codeHex = parseInt(code, | var codeHex = parseInt(code, | ||
Riadok 117: | Riadok 127: | ||
var char = String.fromCodePoint(code); | var char = String.fromCodePoint(code); | ||
var name = (unicode[codeHex] || ' | var name = (unicode[codeHex] || ' | ||
- | analysisChars += '< | + | analysisChars += '< |
} | } | ||
| | ||
Riadok 126: | Riadok 136: | ||
</ | </ | ||
- | < | + | < |
- | <div id=' | + | <div id=' |
- | < | + | </html> |
- | < | + | |
- | < | + | |
- | < | + | |
- | < | + | |
- | < | + | |
- | < | + | |
- | < | + | |
- | < | + | |
- | </dl> | + | |
- | <? | ||
+ | ===== Normalized Text ===== | ||
- | ===== Result ===== | + | < |
- | <php>?> | + | <div style=" |
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <div style=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </div> | ||
- | < | + | < |
- | <?php</php> | + | </html> |
- | ==== Text rundown | + | ===== Text Analysis ===== |
- | <php>?> | + | <html> |
+ | |||
+ | <div style=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </div> | ||
<style type=" | <style type=" | ||
Riadok 188: | Riadok 213: | ||
top: 30px; | top: 30px; | ||
z-index: 99; | z-index: 99; | ||
- | padding: 5px; | + | padding: |
min-width: | min-width: | ||
+ | width: max-content; | ||
+ | max-width: 250px; | ||
background-color: | background-color: | ||
color: rgb(206, 199, 140); | color: rgb(206, 199, 140); | ||
Riadok 195: | Riadok 222: | ||
text-align: left; | text-align: left; | ||
text-transform: | text-transform: | ||
+ | } | ||
+ | | ||
+ | div.limitHeight { | ||
+ | overflow: auto; | ||
+ | } | ||
+ | div.limitHeight: | ||
+ | max-height: fit-content !important; | ||
+ | } | ||
+ | # | ||
+ | max-height: 3.5em; | ||
} | } | ||
</ | </ | ||
- | <div id=' | + | </html> |
- | <?php</php> | + | ==== Text Rundown ==== |
+ | |||
+ | <html> | ||
+ | <div id=' | ||
+ | </html> | ||
- | ==== Analysis | + | ==== Statistics |
- | <php>?> | + | <html> |
<table id=' | <table id=' | ||
- | <?php</php> | + | </html> |
Riadok 217: | Riadok 258: | ||
~~socialite~~ | ~~socialite~~ | ||
- | {{tag> | + | {{tag> |
Riadok 224: | Riadok 265: | ||
~~DISQUS~~ | ~~DISQUS~~ | ||
- | <php>?> | + | <html> |
<script type=" | <script type=" | ||
Riadok 33075: | Riadok 33116: | ||
</ | </ | ||
- | <?php</php> | + | </html> |