var util = require('../util'),
_ = util.lodash,
fileType = require('file-type'),
mimeType = require('mime-types'),
mimeFormat = require('mime-format'),
/**
* @private
* @const
* @type {String}
*/
E = '',
/**
* @private
* @const
* @type {String}
*/
DOT = '.',
/**
* @private
* @const
* @type {String}
*/
QUESTION_MARK = '?',
/**
* @private
* @const
* @type {String}
*/
DOUBLE_QUOTES = '"',
/**
* @private
* @const
* @type {String}
*/
TOKEN_$1 = '$1',
/**
* @private
* @const
* @type {String}
*/
BINARY = 'binary',
/**
* @private
* @const
* @type {String}
*/
CHARSET_UTF8 = 'utf8',
/**
* @private
* @const
* @type {String}
*/
CONTENT_TYPE_TEXT_PLAIN = 'text/plain',
/**
* Enum for all the Content Headers
*
* @private
* @const
* @enum {String} HEADERS
*/
HEADERS = {
CONTENT_TYPE: 'Content-Type',
CONTENT_DISPOSITION: 'Content-Disposition'
},
/**
* @private
* @const
* @type {String}
*/
DEFAULT_RESPONSE_FILENAME = 'response',
/**
* @private
* @type {Boolean}
*/
supportsBuffer = (typeof Buffer !== undefined) && _.isFunction(Buffer.byteLength),
/**
* Regexes for extracting and decoding the filename from content-disposition header
*
* @private
* @type {Object}
*/
regexes = {
/**
* RegExp for extracting filename from content-disposition header
*
* RFC 2616 grammar
* parameter = token "=" ( token | quoted-string )
* token = 1*<any CHAR except CTLs or separators>
* separators = "(" | ")" | "<" | ">" | "@"
* | "," | ";" | ":" | "\" | <">
* | "/" | "[" | "]" | "?" | "="
* | "{" | "}" | SP | HT
* quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
* qdtext = <any TEXT except <">>
* quoted-pair = "\" CHAR
* CHAR = <any US-ASCII character (octets 0 - 127)>
* TEXT = <any OCTET except CTLs, but including LWS>
* LWS = [CRLF] 1*( SP | HT )
* CRLF = CR LF
* CR = <US-ASCII CR, carriage return (13)>
* LF = <US-ASCII LF, linefeed (10)>
* SP = <US-ASCII SP, space (32)>
* HT = <US-ASCII HT, horizontal-tab (9)>
* CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
* OCTET = <any 8-bit sequence of data>
*
* egHeader: inline; filename=testResponse.json
* egHeader: inline; filename="test Response.json"
* Reference: https://github.com/jshttp/content-disposition
*/
// eslint-disable-next-line max-len
fileNameRegex: /;[ \t]*(?:filename)[ \t]*=[ \t]*("(?:[\x20!\x23-\x5b\x5d-\x7e\x80-\xff]|\\[\x20-\x7e])*"|[!#$%&'*+.0-9A-Z^_`a-z|~-]+)[ \t]*/,
/**
* RegExp for extracting filename* from content-disposition header
*
* RFC 5987 grammar
* parameter = reg-parameter / ext-parameter
* ext-parameter = parmname "*" LWSP "=" LWSP ext-value
* parmname = 1*attr-char
* ext-value = charset "'" [ language ] "'" value-chars
; like RFC 2231's <extended-initial-value>
; (see [RFC2231], Section 7)
* charset = "UTF-8" / "ISO-8859-1" / mime-charset
* mime-charset = 1*mime-charsetc
* mime-charsetc = ALPHA / DIGIT
/ "!" / "#" / "$" / "%" / "&"
/ "+" / "-" / "^" / "_" / "`"
/ "{" / "}" / "~"
; as <mime-charset> in Section 2.3 of [RFC2978]
; except that the single quote is not included
; SHOULD be registered in the IANA charset registry
* language = <Language-Tag, defined in [RFC5646], Section 2.1>
* value-chars = *( pct-encoded / attr-char )
* pct-encoded = "%" HEXDIG HEXDIG
; see [RFC3986], Section 2.1
* attr-char = ALPHA / DIGIT
/ "!" / "#" / "$" / "&" / "+" / "-" / "."
/ "^" / "_" / "`" / "|" / "~"
; token except ( "*" / "'" / "%" )
*
* egHeader: attachment;filename*=utf-8''%E4%BD%A0%E5%A5%BD.txt
* Reference: https://github.com/jshttp/content-disposition
*/
// eslint-disable-next-line max-len
encodedFileNameRegex: /;[ \t]*(?:filename\*)[ \t]*=[ \t]*([A-Za-z0-9!#$%&+\-^_`{}~]+)'.*'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)[ \t]*/,
/**
* RegExp to match quoted-pair in RFC 2616
*
* quoted-pair = "\" CHAR
* CHAR = <any US-ASCII character (octets 0 - 127)>
*/
quotedPairRegex: /\\([ -~])/g,
/**
* Regex to match all the hexadecimal number inside encoded string
*/
hexCharMatchRegex: /%([0-9A-Fa-f]{2})/g,
/**
* Regex to match non-latin characters
*/
nonLatinCharMatchRegex: /[^\x20-\x7e\xa0-\xff]/g
},
/**
* Decodes the hexcode to charCode
*
* @private
* @param {String} str - The matched string part of a hexadecimal number
* @param {String} hex - The hexadecimal string which needs to be converted to charCode
* @returns {String} - String with decoded hexcode values
*/
decodeHexcode = function (str, hex) {
return String.fromCharCode(parseInt(hex, 16));
},
/**
* HashMap for decoding string with supported characterSets
* iso-8859-1
* utf-8
*
* @private
* @type {Object}
*/
characterDecoders = {
/**
* Replaces non-latin characters with '?'
*
* @private
* @param {String} val - Input encoded string
* @returns {String} - String with latin characters
*/
'iso-8859-1' (val) {
return val.replace(regexes.nonLatinCharMatchRegex, QUESTION_MARK);
},
/**
* Decodes the given string with utf-8 character set
*
* @private
* @param {?String} encodedString - Input encoded string
* @returns {?String} - String with decoded character with utf-8
*/
'utf-8' (encodedString) {
/* istanbul ignore if */
if (!supportsBuffer) {
return;
}
return Buffer.from(encodedString, BINARY).toString(CHARSET_UTF8);
}
},
/**
* Decodes the given filename with given charset
* The supported character sets are
* iso-8859-1
* utf-8
*
* @private
* @param {String} encodedFileName - Input encoded file name
* @param {String} charset - The character set to be used while decoding
* @returns {String} - Returns the decoded filename
*/
decodeFileName = function (encodedFileName, charset) {
/* istanbul ignore if */
if (!encodedFileName) {
return;
}
if (!characterDecoders[charset]) {
return;
}
// decodes the hexadecimal numbers to charCode in encodedFileName and then decodes with given charset
return characterDecoders[charset](encodedFileName.replace(regexes.hexCharMatchRegex, decodeHexcode));
},
/**
* Takes the content-type header value and performs the mime sniffing with known mime types.
* If content-type header is not present, detects the mime type from the response stream or response body
* If content-type is not provided and not able to detect, then text/plain is taken as default
*
* @private
* @param {?String} contentType - The value of content type header
* @param {Stream|String} response - The response stream or body, for which content-info should be determined
* @returns {Object} - mime information from response headers
*/
getMimeInfo = function (contentType, response) {
var normalized,
detected,
detectedExtension;
if (!contentType) {
detected = fileType(response);
detected && (contentType = detected.mime) && (detectedExtension = detected.ext);
}
// if contentType is not detected set text/plain as default
if (!contentType) {
contentType = CONTENT_TYPE_TEXT_PLAIN;
}
normalized = mimeFormat.lookup(contentType);
return {
contentType: normalized.source,
mimeType: normalized.type, // sanitized mime type base
mimeFormat: normalized.format, // format specific to the type returned
charset: normalized.charset || CHARSET_UTF8,
extension: detectedExtension || mimeType.extension(normalized.source) || E
};
},
/**
* Parses Content disposition header, and returns file name and extension
*
* @private
* @param {?String} dispositionHeader - Content-disposition Header from the response
* @returns {?String} - Returns file name from content disposition header if present
*/
getFileNameFromDispositionHeader = function (dispositionHeader) {
if (!dispositionHeader) {
return;
}
var encodedFileName,
fileName;
// Get filename* value from the dispositionHeader
encodedFileName = regexes.encodedFileNameRegex.exec(dispositionHeader);
if (encodedFileName) {
fileName = decodeFileName(encodedFileName[2], encodedFileName[1]);
}
// If filename* is not present or unparseable, then we are checking for filename in header
if (!fileName) {
fileName = regexes.fileNameRegex.exec(dispositionHeader);
fileName && (fileName = fileName[1]);
// check if file name is wrapped in double quotes
// file name can contain escaped characters if wrapped in quotes
if (fileName && fileName[0] === DOUBLE_QUOTES) {
// remove quotes and escapes
fileName = fileName
.substr(1, fileName.length - 2)
.replace(regexes.quotedPairRegex, TOKEN_$1);
}
}
return fileName;
};
module.exports = {
/**
* Extracts content related information from response.
* Includes response mime information, character set and file name.
*
* @private
* @param {Response} response - response instance
* @returns {Response.ResponseContentInfo} - Return contentInfo of the response
*/
contentInfo (response) {
var contentType = response.headers.get(HEADERS.CONTENT_TYPE),
contentDisposition = response.headers.get(HEADERS.CONTENT_DISPOSITION),
mimeInfo = getMimeInfo(contentType, response.stream || response.body),
fileName = getFileNameFromDispositionHeader(contentDisposition),
fileExtension = mimeInfo.extension,
/**
* @typedef Response.ResponseContentInfo
*
* @property {String} mimeType sanitized mime type
* @property {String} mimeFormat format for the identified mime type
* @property {String} charset the normalized character set
* @property {String} fileExtension extension identified from the mime type
* @property {String} fileName file name extracted from disposition header
* @property {String} contentType sanitized content-type extracted from header
*/
contentInfo = {};
// if file name is not present in the content disposition headers, use a default file name
if (!fileName) {
fileName = DEFAULT_RESPONSE_FILENAME;
// add extension to default if present
fileExtension && (fileName += (DOT + fileExtension));
}
// create a compacted list of content info from mime info and file name
mimeInfo.contentType && (contentInfo.contentType = mimeInfo.contentType);
mimeInfo.mimeType && (contentInfo.mimeType = mimeInfo.mimeType);
mimeInfo.mimeFormat && (contentInfo.mimeFormat = mimeInfo.mimeFormat);
mimeInfo.charset && (contentInfo.charset = mimeInfo.charset);
fileExtension && (contentInfo.fileExtension = fileExtension);
fileName && (contentInfo.fileName = fileName);
return contentInfo;
},
// regexes are extracted for vulnerability tests
regexes
};