fix: fix more html entities in card titles (#1628)

This commit is contained in:
Nolan Lawson 2019-11-09 17:25:39 -05:00 committed by GitHub
parent ea382acf1d
commit c5a005186c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 34 deletions

View file

@ -1,43 +1,50 @@
// via https://github.com/jonschlinkert/unescape/blob/98d1e52/index.js
//
// Originally via https://github.com/jonschlinkert/unescape/blob/98d1e52/index.js
//
import { thunk } from '../../_utils/thunk'
// via https://www.htmlhelp.com/reference/html40/entities/special.html
// plus some more known entities like pound, nbsp, etc
const chars = {
'"': '"',
'"': '"',
''': '\'',
''': '\'',
'&': '&',
'&': '&',
'>': '>',
'>': '>',
'&lt;': '<',
'&#60;': '<',
'&apos;': '\'',
'&bdquo;': '„',
'&cent;': '¢',
'&#162;': '¢',
'&circ;': 'ˆ',
'&copy;': '©',
'&#169;': '©',
'&dagger;': '†',
'&Dagger;': '‡',
'&emsp;': '',
'&ensp;': '',
'&euro;': '€',
'&#8364;': '€',
'&gt;': '>',
'&ldquo;': '“',
'&lrm;': '',
'&lsaquo;': '',
'&lsquo;': '',
'&lt;': '<',
'&mdash;': '—',
'&nbsp;': ' ',
'&ndash;': '',
'&oelig;': 'œ',
'&OElig;': 'Œ',
'&permil;': '‰',
'&pound;': '£',
'&#163;': '£',
'&quot;': '"',
'&rdquo;': '”',
'&reg;': '®',
'&#174;': '®',
'&rsaquo;': '',
'&rsquo;': '',
'&sbquo;': '',
'&scaron;': 'š',
'&Scaron;': 'Š',
'&thinsp;': '',
'&tilde;': '˜',
'&yen;': '¥',
'&#165;': '¥',
'&nbsp;': ' '
'&Yuml;': 'Ÿ'
}
let regex
const getRegex = thunk(() => toRegex(chars))
/**
* Convert HTML entities to HTML characters.
@ -45,15 +52,35 @@ let regex
* @param {String} `str` String with HTML entities to un-escape.
* @return {String}
*/
function unescape (str) {
regex = regex || toRegex(chars)
return str.replace(regex, m => chars[m])
return str.replace(getRegex(), replace)
}
function replace (match) {
const knownValue = chars[match]
if (knownValue) {
return knownValue
}
let codePoint
try {
if (match.startsWith('&#x')) { // hex
codePoint = parseInt(match.substring(3, match.length - 1), 16)
} else { // decimal
codePoint = parseInt(match.substring(2, match.length - 1), 10)
}
return String.fromCodePoint(codePoint)
} catch (e) {
return match // bad code point, bail out
}
}
function toRegex (chars) {
var keys = Object.keys(chars).join('|')
return new RegExp('(' + keys + ')', 'g')
const patterns = Object.keys(chars).concat([
'&#[0-9]{1,6};', // decimal code points
'&#x[a-fA-F0-9]{1,6};' // hex code points
])
return new RegExp('(' + patterns.join('|') + ')', 'g')
}
/**

View file

@ -0,0 +1,23 @@
/* global describe, it */
import assert from 'assert'
import { unescape } from '../../src/routes/_thirdparty/unescape/unescape'
describe('test-unescape.js', () => {
it('unescapes html correctly', () => {
assert.deepStrictEqual(unescape('What I&#8217;ve learned'), 'What Ive learned')
assert.deepStrictEqual(unescape('Hello &#34;world&#34;'), 'Hello "world"')
assert.deepStrictEqual(unescape('That costs 3&pound; or 4&euro;'), 'That costs 3£ or 4€')
assert.deepStrictEqual(unescape('That costs 3&POUND; or 4&EURO;'), 'That costs 3&POUND; or 4&EURO;') // must be lc
assert.deepStrictEqual(unescape('Foo &amp; bar &amp; baz'), 'Foo & bar & baz')
assert.deepStrictEqual(unescape('Winking tongue: &#128540;'), 'Winking tongue: 😜')
assert.deepStrictEqual(unescape('Winking tongue as hex: &#x1F61C;'), 'Winking tongue as hex: 😜')
assert.deepStrictEqual(unescape('Winking tongue as hex: &#x1f61c;'), 'Winking tongue as hex: 😜')
assert.deepStrictEqual(unescape('All&#039;s fair'), 'All\'s fair')
assert.deepStrictEqual(unescape('All&apos;s fair'), 'All\'s fair')
assert.deepStrictEqual(unescape('foo&nbsp;bar'), 'foo bar')
})
it('handles fake html code points', () => {
assert.deepStrictEqual(unescape('Hello &#xFFFFFF;'), 'Hello &#xFFFFFF;')
})
})