/**
* @fileoverview
* An EncodeSet represents a set of characters that should be percent-encoded.
*
* Different characters need to be encoded in different parts of an URL.
* For example, a literal ? question mark in an URL’s path would indicate the
* start of the query string. A question mark meant to be part of the path
* therefore needs to be percent-encoded.
* In the query string however, a question mark does not have any special
* meaning and does not need to be percent-encoded.
*
* A few sets are defined in this module.
* Use the {@link EncodeSet} class to define different ones.
*
* @see {@link https://url.spec.whatwg.org/#simple-encode-set}
*/
/**
* A character (String), or character code (Number).
*
* @typedef {String|Number} Char
*/
/**
* A Set or Array of {@link Char}(s).
*
* @typedef {Set.<Char>|Array.<Char>} CharSet
*/
const QUERY_ENCODE_CHARS = [' ', '"', '#', '\'', '<', '>'],
FRAGMENT_EXTEND_CHARS = [' ', '"', '<', '>', '`'],
PATH_EXTEND_CHARS = ['#', '?', '{', '}'],
USERINFO_EXTEND_CHARS = ['/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'];
/**
* Returns a number representing the UTF-16 code unit value of the character.
*
* @private
* @param {Char} char Character or character code
* @returns {Number} Character code
*/
function charCode (char) {
const code = (typeof char === 'string') ?
// get char code from string
char.charCodeAt(0) :
// or, normalize char code using double Bitwise NOT
// Refer: https://jsperf.com/truncating-decimals
~~char;
// ensure UTF-16 range [0, 0xFFFF]
return (code >= 0 && code <= 0xFFFF) ? code : 0;
}
/**
* Extends the EncodeSet with the given characters.
*
* @note Mutates the input EncodeSet.
*
* @private
* @param {EncodeSet} encodeSet Instance of EncodeSet
* @param {CharSet} chars Character set to extend
* @returns {EncodeSet} Given EncodeSet
*/
function extendEncodeSet (encodeSet, chars) {
// special handling for Uint8Array chars which signify an existing encode
// set used to extend the given encodeSet.
if (chars instanceof Uint8Array) {
// iterate over fixed / known size set
encodeSet._set.forEach((encoded, index) => {
if (!encoded && chars[index]) {
// encode charCodeAt(index)
encodeSet._set[index] = 1;
}
});
return encodeSet;
}
// check if the input characters are iterable or not
if (!(chars && typeof chars.forEach === 'function')) {
return encodeSet;
}
chars.forEach((char) => {
encodeSet.add(char);
});
return encodeSet;
}
/**
* Represents a set of characters / bytes that should be percent-encoded.
*/
class EncodeSet {
/**
* @param {CharSet} chars Character set to encode
*/
constructor (chars) {
/**
* Indexes in Uint8Array represents char codes for characters to encode.
*
* Size: 128, ASCII range [0, 0x7F]
*
* where,
* 1 -> encode
* 0 -> don't encode
*
* @private
* @type {Uint8Array}
*/
this._set = new Uint8Array(0x80);
// encode C0 control codes [00, 0x1F] AND 0x7F
this._set.fill(1, 0, 0x20); // 0 to 31
this._set[0x7F] = 1; // 127
/**
* A Boolean indicating whether or not this EncodeSet is sealed.
*
* @private
* @type {Boolean}
*/
this._sealed = false;
// extend this set with input characters
extendEncodeSet(this, chars);
}
/**
* Appends a new character to the EncodeSet.
*
* @example
* var xyzEncodeSet = new EncodeSet(['x', 'y', 'z'])
*
* xyzEncodeSet
* .add('X')
* .add(89) // Y
* .add(0x5a) // Z
*
* @param {Char} char Character or character code
* @returns {EncodeSet} Current EncodeSet
*/
add (char) {
// bail out if the EncodeSet is sealed
if (this._sealed) {
return this;
}
const code = charCode(char);
// ensure ASCII range
if (code < 0x80) {
this._set[code] = 1;
}
// chaining
return this;
}
/**
* Returns a boolean asserting whether the given char code will be encoded in
* the EncodeSet or not.
*
* @note Always encode C0 control codes in the range U+0000 to U+001F and U+007F
* Refer: https://infra.spec.whatwg.org/#c0-control
*
* @example
* var tildeEncodeSet = new EncodeSet(['~'])
*
* // returns true
* tildeEncodeSet.has('~'.charCodeAt(0))
*
* // returns false
* tildeEncodeSet.has(65) // A
*
* // returns true
* tildeEncodeSet.has(31) // \u001f (control character)
*
* @param {Number} code Character code
* @returns {Boolean} Returns true if the character with the specified char code
* exists in the EncodeSet; otherwise false
*/
has (code) {
// encode if not in ASCII range (-∞, 0) OR (127, ∞)
if (code < 0 || code > 0x7F) {
return true;
}
// encode if present in the set
return Boolean(this._set[code]);
}
/**
* Creates a copy of the current EncodeSet.
*
* @example
* var set1 = new EncodeSet(['<', '>'])
* var set1Copy = set1.clone().add('=')
*
* @returns {EncodeSet} New EncodeSet instance
*/
clone () {
return new EncodeSet(this._set);
}
/**
* Seals the current EncodeSet to prevent new characters being added to it.
*
* @example
* var set = new EncodeSet()
*
* set.add(95)
* set.has(95) // returns true
*
* set.seal()
* set.add(100)
* set.has(100) // returns false
*
* @returns {EncodeSet} Current EncodeSet
*/
seal () {
this._sealed = true;
try {
// @note Cannot freeze array buffer views with elements.
// So, rely upon the alternative `Object.seal` method and avoid mutations
// via EncodeSet~add method.
// Also, sealed Uint8Array enumerates faster in V8!
Object.seal(this._set);
}
catch (_) {
// silently swallow exceptions
}
return this;
}
/**
* Creates a new EncodeSet by extending the input EncodeSet with additional
* characters.
*
* @example
* var fooEncodeSet = new EncodeSet(['f', 'o'])
* var foobarEncodeSet = EncodeSet.extend(fooEncodeSet, new Set(['b', 'a', 'r']))
*
* @param {EncodeSet} encodeSet Instance of EncodeSet
* @param {CharSet} chars Character set to encode
* @returns {EncodeSet} Copy of given `encodeSet` with extended `chars`
* @throws {TypeError} Argument `encodeSet` must be of type {@link EncodeSet}
*/
static extend (encodeSet, chars) {
if (!EncodeSet.isEncodeSet(encodeSet)) {
throw new TypeError('Argument `encodeSet` must be EncodeSet');
}
// extend the cloned encodeSet to avoid mutations
return extendEncodeSet(encodeSet.clone(), chars);
}
/**
* Determines whether the input value is an EncodeSet or not.
*
* @example
* // returns true
* EncodeSet.isEncodeSet(new EncodeSet([40, 41]))
*
* // returns false
* EncodeSet.isEncodeSet(new Set([28, 05]))
*
* @param {*} value The value to be tested
* @returns {Boolean} true if the given value is an EncodeSet; otherwise, false
*/
static isEncodeSet (value) {
return Boolean(value) && (value instanceof EncodeSet);
}
}
const // eslint-disable-line one-var
/**
* The C0 control percent-encode set are the C0 controls and all code points
* greater than U+007E (~).
*
* @const
* @type {EncodeSet}
* @see {@link https://url.spec.whatwg.org/#c0-control-percent-encode-set}
*/
C0_CONTROL_ENCODE_SET = new EncodeSet().seal(),
/**
* The fragment percent-encode set is the C0 control percent-encode set and
* U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
*
* @const
* @type {EncodeSet}
* @see {@link https://url.spec.whatwg.org/#fragment-percent-encode-set}
*/
FRAGMENT_ENCODE_SET = EncodeSet.extend(C0_CONTROL_ENCODE_SET, FRAGMENT_EXTEND_CHARS).seal(),
/**
* The path percent-encode set is the fragment percent-encode set and
* U+0023 (#), U+003F (?), U+007B ({), and U+007D (}).
*
* @const
* @type {EncodeSet}
* @see {@link https://url.spec.whatwg.org/#path-percent-encode-set}
*/
PATH_ENCODE_SET = EncodeSet.extend(FRAGMENT_ENCODE_SET, PATH_EXTEND_CHARS).seal(),
/**
* The userinfo percent-encode set is the path percent-encode set and
* U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([),
* U+005C (\), U+005D (]), U+005E (^), and U+007C (|).
*
* @const
* @type {EncodeSet}
* @see {@link https://url.spec.whatwg.org/#userinfo-percent-encode-set}
*/
USERINFO_ENCODE_SET = EncodeSet.extend(PATH_ENCODE_SET, USERINFO_EXTEND_CHARS).seal(),
/**
* The query percent-encode set is the C0 control percent-encode set and
* U+0020 SPACE, U+0022 ("), U+0023 (#), U+0027 ('), U+003C (<), and U+003E (>).
*
* @const
* @type {EncodeSet}
* @see {@link https://url.spec.whatwg.org/#query-state}
*/
QUERY_ENCODE_SET = new EncodeSet(QUERY_ENCODE_CHARS).seal();
module.exports = {
// EncodeSet class
EncodeSet,
// Constants
PATH_ENCODE_SET,
QUERY_ENCODE_SET,
FRAGMENT_ENCODE_SET,
USERINFO_ENCODE_SET,
C0_CONTROL_ENCODE_SET
};