From d5dee316f7f53521e7ece23f10200fe566b9c565 Mon Sep 17 00:00:00 2001 From: Kyle Tsang <6854874+kyletsang@users.noreply.github.com> Date: Sun, 30 Apr 2023 14:33:09 -0700 Subject: [PATCH] Update URL sanitizer to allow more protocols (#38531) Co-authored-by: XhmikosR --- js/src/util/sanitizer.js | 78 +++++++++++++--------------- js/tests/unit/util/sanitizer.spec.js | 74 +++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 50 deletions(-) diff --git a/js/src/util/sanitizer.js b/js/src/util/sanitizer.js index 5a07a67c1a..d2b08082ca 100644 --- a/js/src/util/sanitizer.js +++ b/js/src/util/sanitizer.js @@ -5,47 +5,6 @@ * -------------------------------------------------------------------------- */ -const uriAttributes = new Set([ - 'background', - 'cite', - 'href', - 'itemtype', - 'longdesc', - 'poster', - 'src', - 'xlink:href' -]) - -/** - * A pattern that recognizes a commonly useful subset of URLs that are safe. - * - * Shout-out to Angular https://github.com/angular/angular/blob/12.2.x/packages/core/src/sanitization/url_sanitizer.ts - */ -const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file|sms):|[^#&/:?]*(?:[#/?]|$))/i - -/** - * A pattern that matches safe data URLs. Only matches image, video and audio types. - * - * Shout-out to Angular https://github.com/angular/angular/blob/12.2.x/packages/core/src/sanitization/url_sanitizer.ts - */ -const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm)|audio\/(?:mp3|oga|ogg|opus));base64,[\d+/a-z]+=*$/i - -const allowedAttribute = (attribute, allowedAttributeList) => { - const attributeName = attribute.nodeName.toLowerCase() - - if (allowedAttributeList.includes(attributeName)) { - if (uriAttributes.has(attributeName)) { - return Boolean(SAFE_URL_PATTERN.test(attribute.nodeValue) || DATA_URL_PATTERN.test(attribute.nodeValue)) - } - - return true - } - - // Check if a regular expression validates the attribute. - return allowedAttributeList.filter(attributeRegex => attributeRegex instanceof RegExp) - .some(regex => regex.test(attributeName)) -} - // js-docs-start allow-list const ARIA_ATTRIBUTE_PATTERN = /^aria-[\w-]*$/i @@ -84,6 +43,42 @@ export const DefaultAllowlist = { } // js-docs-end allow-list +const uriAttributes = new Set([ + 'background', + 'cite', + 'href', + 'itemtype', + 'longdesc', + 'poster', + 'src', + 'xlink:href' +]) + +/** + * A pattern that recognizes URLs that are safe wrt. XSS in URL navigation + * contexts. + * + * Shout-out to Angular https://github.com/angular/angular/blob/15.2.8/packages/core/src/sanitization/url_sanitizer.ts#L38 + */ +// eslint-disable-next-line unicorn/better-regex +const SAFE_URL_PATTERN = /^(?!javascript:)(?:[a-z0-9+.-]+:|[^&:/?#]*(?:[/?#]|$))/i + +const allowedAttribute = (attribute, allowedAttributeList) => { + const attributeName = attribute.nodeName.toLowerCase() + + if (allowedAttributeList.includes(attributeName)) { + if (uriAttributes.has(attributeName)) { + return Boolean(SAFE_URL_PATTERN.test(attribute.nodeValue)) + } + + return true + } + + // Check if a regular expression validates the attribute. + return allowedAttributeList.filter(attributeRegex => attributeRegex instanceof RegExp) + .some(regex => regex.test(attributeName)) +} + export function sanitizeHtml(unsafeHtml, allowList, sanitizeFunction) { if (!unsafeHtml.length) { return unsafeHtml @@ -102,7 +97,6 @@ export function sanitizeHtml(unsafeHtml, allowList, sanitizeFunction) { if (!Object.keys(allowList).includes(elementName)) { element.remove() - continue } diff --git a/js/tests/unit/util/sanitizer.spec.js b/js/tests/unit/util/sanitizer.spec.js index 55e9b63364..2b21ef2e19 100644 --- a/js/tests/unit/util/sanitizer.spec.js +++ b/js/tests/unit/util/sanitizer.spec.js @@ -10,17 +10,75 @@ describe('Sanitizer', () => { expect(result).toEqual(empty) }) + it('should retain tags with valid URLs', () => { + const validUrls = [ + '', + 'http://abc', + 'HTTP://abc', + 'https://abc', + 'HTTPS://abc', + 'ftp://abc', + 'FTP://abc', + 'mailto:me@example.com', + 'MAILTO:me@example.com', + 'tel:123-123-1234', + 'TEL:123-123-1234', + 'sip:me@example.com', + 'SIP:me@example.com', + '#anchor', + '/page1.md', + 'http://JavaScript/my.js', + '', // Truncated. + 'data:video/webm;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/', + 'data:audio/opus;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/', + 'unknown-scheme:abc' + ] + + for (const url of validUrls) { + const template = [ + '
', + ` Click me`, + ' Some content', + '
' + ].join('') + + const result = sanitizeHtml(template, DefaultAllowlist, null) + + expect(result).toContain(`href="${url}"`) + } + }) + it('should sanitize template by removing tags with XSS', () => { - const template = [ - '
', - ' Click me', - ' Some content', - '
' - ].join('') + const invalidUrls = [ + // eslint-disable-next-line no-script-url + 'javascript:alert(7)', + // eslint-disable-next-line no-script-url + 'javascript:evil()', + // eslint-disable-next-line no-script-url + 'JavaScript:abc', + ' javascript:abc', + ' \n Java\n Script:abc', + 'javascript:', + 'javascript:', + 'j avascript:', + 'javascript:', + 'javascript:', + 'jav ascript:alert();', + 'jav\u0000ascript:alert();' + ] - const result = sanitizeHtml(template, DefaultAllowlist, null) + for (const url of invalidUrls) { + const template = [ + '
', + ` Click me`, + ' Some content', + '
' + ].join('') - expect(result).not.toContain('href="javascript:alert(7)') + const result = sanitizeHtml(template, DefaultAllowlist, null) + + expect(result).not.toContain(`href="${url}"`) + } }) it('should sanitize template and work with multiple regex', () => {