Update Bot
This commit is contained in:
901
node_modules/iconv-lite/encodings/dbcs-codec.js
generated
vendored
901
node_modules/iconv-lite/encodings/dbcs-codec.js
generated
vendored
@@ -1,555 +1,532 @@
|
||||
"use strict";
|
||||
var Buffer = require("safer-buffer").Buffer;
|
||||
"use strict"
|
||||
var Buffer = require("safer-buffer").Buffer
|
||||
|
||||
// Multibyte codec. In this scheme, a character is represented by 1 or more bytes.
|
||||
// Our codec supports UTF-16 surrogates, extensions for GB18030 and unicode sequences.
|
||||
// To save memory and loading time, we read table files only when requested.
|
||||
|
||||
exports._dbcs = DBCSCodec;
|
||||
exports._dbcs = DBCSCodec
|
||||
|
||||
var UNASSIGNED = -1,
|
||||
GB18030_CODE = -2,
|
||||
SEQ_START = -10,
|
||||
NODE_START = -1000,
|
||||
UNASSIGNED_NODE = new Array(0x100),
|
||||
DEF_CHAR = -1;
|
||||
|
||||
for (var i = 0; i < 0x100; i++)
|
||||
UNASSIGNED_NODE[i] = UNASSIGNED;
|
||||
var UNASSIGNED = -1
|
||||
var GB18030_CODE = -2
|
||||
var SEQ_START = -10
|
||||
var NODE_START = -1000
|
||||
var UNASSIGNED_NODE = new Array(0x100)
|
||||
var DEF_CHAR = -1
|
||||
|
||||
for (var i = 0; i < 0x100; i++) { UNASSIGNED_NODE[i] = UNASSIGNED }
|
||||
|
||||
// Class DBCSCodec reads and initializes mapping tables.
|
||||
function DBCSCodec(codecOptions, iconv) {
|
||||
this.encodingName = codecOptions.encodingName;
|
||||
if (!codecOptions)
|
||||
throw new Error("DBCS codec is called without the data.")
|
||||
if (!codecOptions.table)
|
||||
throw new Error("Encoding '" + this.encodingName + "' has no data.");
|
||||
function DBCSCodec (codecOptions, iconv) {
|
||||
this.encodingName = codecOptions.encodingName
|
||||
if (!codecOptions) { throw new Error("DBCS codec is called without the data.") }
|
||||
if (!codecOptions.table) { throw new Error("Encoding '" + this.encodingName + "' has no data.") }
|
||||
|
||||
// Load tables.
|
||||
var mappingTable = codecOptions.table();
|
||||
// Load tables.
|
||||
var mappingTable = codecOptions.table()
|
||||
|
||||
// Decode tables: MBCS -> Unicode.
|
||||
|
||||
// Decode tables: MBCS -> Unicode.
|
||||
// decodeTables is a trie, encoded as an array of arrays of integers. Internal arrays are trie nodes and all have len = 256.
|
||||
// Trie root is decodeTables[0].
|
||||
// Values: >= 0 -> unicode character code. can be > 0xFFFF
|
||||
// == UNASSIGNED -> unknown/unassigned sequence.
|
||||
// == GB18030_CODE -> this is the end of a GB18030 4-byte sequence.
|
||||
// <= NODE_START -> index of the next node in our trie to process next byte.
|
||||
// <= SEQ_START -> index of the start of a character code sequence, in decodeTableSeq.
|
||||
this.decodeTables = []
|
||||
this.decodeTables[0] = UNASSIGNED_NODE.slice(0) // Create root node.
|
||||
|
||||
// decodeTables is a trie, encoded as an array of arrays of integers. Internal arrays are trie nodes and all have len = 256.
|
||||
// Trie root is decodeTables[0].
|
||||
// Values: >= 0 -> unicode character code. can be > 0xFFFF
|
||||
// == UNASSIGNED -> unknown/unassigned sequence.
|
||||
// == GB18030_CODE -> this is the end of a GB18030 4-byte sequence.
|
||||
// <= NODE_START -> index of the next node in our trie to process next byte.
|
||||
// <= SEQ_START -> index of the start of a character code sequence, in decodeTableSeq.
|
||||
this.decodeTables = [];
|
||||
this.decodeTables[0] = UNASSIGNED_NODE.slice(0); // Create root node.
|
||||
// Sometimes a MBCS char corresponds to a sequence of unicode chars. We store them as arrays of integers here.
|
||||
this.decodeTableSeq = []
|
||||
|
||||
// Sometimes a MBCS char corresponds to a sequence of unicode chars. We store them as arrays of integers here.
|
||||
this.decodeTableSeq = [];
|
||||
// Actual mapping tables consist of chunks. Use them to fill up decode tables.
|
||||
for (var i = 0; i < mappingTable.length; i++) { this._addDecodeChunk(mappingTable[i]) }
|
||||
|
||||
// Actual mapping tables consist of chunks. Use them to fill up decode tables.
|
||||
for (var i = 0; i < mappingTable.length; i++)
|
||||
this._addDecodeChunk(mappingTable[i]);
|
||||
// Load & create GB18030 tables when needed.
|
||||
if (typeof codecOptions.gb18030 === "function") {
|
||||
this.gb18030 = codecOptions.gb18030() // Load GB18030 ranges.
|
||||
|
||||
this.defaultCharUnicode = iconv.defaultCharUnicode;
|
||||
// Add GB18030 common decode nodes.
|
||||
var commonThirdByteNodeIdx = this.decodeTables.length
|
||||
this.decodeTables.push(UNASSIGNED_NODE.slice(0))
|
||||
|
||||
|
||||
// Encode tables: Unicode -> DBCS.
|
||||
var commonFourthByteNodeIdx = this.decodeTables.length
|
||||
this.decodeTables.push(UNASSIGNED_NODE.slice(0))
|
||||
|
||||
// `encodeTable` is array mapping from unicode char to encoded char. All its values are integers for performance.
|
||||
// Because it can be sparse, it is represented as array of buckets by 256 chars each. Bucket can be null.
|
||||
// Values: >= 0 -> it is a normal char. Write the value (if <=256 then 1 byte, if <=65536 then 2 bytes, etc.).
|
||||
// == UNASSIGNED -> no conversion found. Output a default char.
|
||||
// <= SEQ_START -> it's an index in encodeTableSeq, see below. The character starts a sequence.
|
||||
this.encodeTable = [];
|
||||
|
||||
// `encodeTableSeq` is used when a sequence of unicode characters is encoded as a single code. We use a tree of
|
||||
// objects where keys correspond to characters in sequence and leafs are the encoded dbcs values. A special DEF_CHAR key
|
||||
// means end of sequence (needed when one sequence is a strict subsequence of another).
|
||||
// Objects are kept separately from encodeTable to increase performance.
|
||||
this.encodeTableSeq = [];
|
||||
|
||||
// Some chars can be decoded, but need not be encoded.
|
||||
var skipEncodeChars = {};
|
||||
if (codecOptions.encodeSkipVals)
|
||||
for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
|
||||
var val = codecOptions.encodeSkipVals[i];
|
||||
if (typeof val === 'number')
|
||||
skipEncodeChars[val] = true;
|
||||
else
|
||||
for (var j = val.from; j <= val.to; j++)
|
||||
skipEncodeChars[j] = true;
|
||||
// Fill out the tree
|
||||
var firstByteNode = this.decodeTables[0]
|
||||
for (var i = 0x81; i <= 0xFE; i++) {
|
||||
var secondByteNode = this.decodeTables[NODE_START - firstByteNode[i]]
|
||||
for (var j = 0x30; j <= 0x39; j++) {
|
||||
if (secondByteNode[j] === UNASSIGNED) {
|
||||
secondByteNode[j] = NODE_START - commonThirdByteNodeIdx
|
||||
} else if (secondByteNode[j] > NODE_START) {
|
||||
throw new Error("gb18030 decode tables conflict at byte 2")
|
||||
}
|
||||
|
||||
// Use decode trie to recursively fill out encode tables.
|
||||
this._fillEncodeTable(0, 0, skipEncodeChars);
|
||||
|
||||
// Add more encoding pairs when needed.
|
||||
if (codecOptions.encodeAdd) {
|
||||
for (var uChar in codecOptions.encodeAdd)
|
||||
if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar))
|
||||
this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]);
|
||||
var thirdByteNode = this.decodeTables[NODE_START - secondByteNode[j]]
|
||||
for (var k = 0x81; k <= 0xFE; k++) {
|
||||
if (thirdByteNode[k] === UNASSIGNED) {
|
||||
thirdByteNode[k] = NODE_START - commonFourthByteNodeIdx
|
||||
} else if (thirdByteNode[k] === NODE_START - commonFourthByteNodeIdx) {
|
||||
continue
|
||||
} else if (thirdByteNode[k] > NODE_START) {
|
||||
throw new Error("gb18030 decode tables conflict at byte 3")
|
||||
}
|
||||
|
||||
var fourthByteNode = this.decodeTables[NODE_START - thirdByteNode[k]]
|
||||
for (var l = 0x30; l <= 0x39; l++) {
|
||||
if (fourthByteNode[l] === UNASSIGNED) { fourthByteNode[l] = GB18030_CODE }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
|
||||
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];
|
||||
if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);
|
||||
this.defaultCharUnicode = iconv.defaultCharUnicode
|
||||
|
||||
// Encode tables: Unicode -> DBCS.
|
||||
|
||||
// Load & create GB18030 tables when needed.
|
||||
if (typeof codecOptions.gb18030 === 'function') {
|
||||
this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.
|
||||
// `encodeTable` is array mapping from unicode char to encoded char. All its values are integers for performance.
|
||||
// Because it can be sparse, it is represented as array of buckets by 256 chars each. Bucket can be null.
|
||||
// Values: >= 0 -> it is a normal char. Write the value (if <=256 then 1 byte, if <=65536 then 2 bytes, etc.).
|
||||
// == UNASSIGNED -> no conversion found. Output a default char.
|
||||
// <= SEQ_START -> it's an index in encodeTableSeq, see below. The character starts a sequence.
|
||||
this.encodeTable = []
|
||||
|
||||
// Add GB18030 decode tables.
|
||||
var thirdByteNodeIdx = this.decodeTables.length;
|
||||
var thirdByteNode = this.decodeTables[thirdByteNodeIdx] = UNASSIGNED_NODE.slice(0);
|
||||
// `encodeTableSeq` is used when a sequence of unicode characters is encoded as a single code. We use a tree of
|
||||
// objects where keys correspond to characters in sequence and leafs are the encoded dbcs values. A special DEF_CHAR key
|
||||
// means end of sequence (needed when one sequence is a strict subsequence of another).
|
||||
// Objects are kept separately from encodeTable to increase performance.
|
||||
this.encodeTableSeq = []
|
||||
|
||||
var fourthByteNodeIdx = this.decodeTables.length;
|
||||
var fourthByteNode = this.decodeTables[fourthByteNodeIdx] = UNASSIGNED_NODE.slice(0);
|
||||
// Some chars can be decoded, but need not be encoded.
|
||||
var skipEncodeChars = {}
|
||||
if (codecOptions.encodeSkipVals) {
|
||||
for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
|
||||
var val = codecOptions.encodeSkipVals[i]
|
||||
if (typeof val === "number") { skipEncodeChars[val] = true } else {
|
||||
for (var j = val.from; j <= val.to; j++) { skipEncodeChars[j] = true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 0x81; i <= 0xFE; i++) {
|
||||
var secondByteNodeIdx = NODE_START - this.decodeTables[0][i];
|
||||
var secondByteNode = this.decodeTables[secondByteNodeIdx];
|
||||
for (var j = 0x30; j <= 0x39; j++)
|
||||
secondByteNode[j] = NODE_START - thirdByteNodeIdx;
|
||||
}
|
||||
for (var i = 0x81; i <= 0xFE; i++)
|
||||
thirdByteNode[i] = NODE_START - fourthByteNodeIdx;
|
||||
for (var i = 0x30; i <= 0x39; i++)
|
||||
fourthByteNode[i] = GB18030_CODE
|
||||
}
|
||||
// Use decode trie to recursively fill out encode tables.
|
||||
this._fillEncodeTable(0, 0, skipEncodeChars)
|
||||
|
||||
// Add more encoding pairs when needed.
|
||||
if (codecOptions.encodeAdd) {
|
||||
for (var uChar in codecOptions.encodeAdd) {
|
||||
if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar)) { this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]) }
|
||||
}
|
||||
}
|
||||
|
||||
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)]
|
||||
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]["?"]
|
||||
if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0)
|
||||
}
|
||||
|
||||
DBCSCodec.prototype.encoder = DBCSEncoder;
|
||||
DBCSCodec.prototype.decoder = DBCSDecoder;
|
||||
DBCSCodec.prototype.encoder = DBCSEncoder
|
||||
DBCSCodec.prototype.decoder = DBCSDecoder
|
||||
|
||||
// Decoder helpers
|
||||
DBCSCodec.prototype._getDecodeTrieNode = function(addr) {
|
||||
var bytes = [];
|
||||
for (; addr > 0; addr >>= 8)
|
||||
bytes.push(addr & 0xFF);
|
||||
if (bytes.length == 0)
|
||||
bytes.push(0);
|
||||
DBCSCodec.prototype._getDecodeTrieNode = function (addr) {
|
||||
var bytes = []
|
||||
for (; addr > 0; addr >>>= 8) { bytes.push(addr & 0xFF) }
|
||||
if (bytes.length == 0) { bytes.push(0) }
|
||||
|
||||
var node = this.decodeTables[0];
|
||||
for (var i = bytes.length-1; i > 0; i--) { // Traverse nodes deeper into the trie.
|
||||
var val = node[bytes[i]];
|
||||
var node = this.decodeTables[0]
|
||||
for (var i = bytes.length - 1; i > 0; i--) { // Traverse nodes deeper into the trie.
|
||||
var val = node[bytes[i]]
|
||||
|
||||
if (val == UNASSIGNED) { // Create new node.
|
||||
node[bytes[i]] = NODE_START - this.decodeTables.length;
|
||||
this.decodeTables.push(node = UNASSIGNED_NODE.slice(0));
|
||||
}
|
||||
else if (val <= NODE_START) { // Existing node.
|
||||
node = this.decodeTables[NODE_START - val];
|
||||
}
|
||||
else
|
||||
throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16));
|
||||
}
|
||||
return node;
|
||||
if (val == UNASSIGNED) { // Create new node.
|
||||
node[bytes[i]] = NODE_START - this.decodeTables.length
|
||||
this.decodeTables.push(node = UNASSIGNED_NODE.slice(0))
|
||||
} else if (val <= NODE_START) { // Existing node.
|
||||
node = this.decodeTables[NODE_START - val]
|
||||
} else { throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16)) }
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
DBCSCodec.prototype._addDecodeChunk = function (chunk) {
|
||||
// First element of chunk is the hex mbcs code where we start.
|
||||
var curAddr = parseInt(chunk[0], 16)
|
||||
|
||||
DBCSCodec.prototype._addDecodeChunk = function(chunk) {
|
||||
// First element of chunk is the hex mbcs code where we start.
|
||||
var curAddr = parseInt(chunk[0], 16);
|
||||
// Choose the decoding node where we'll write our chars.
|
||||
var writeTable = this._getDecodeTrieNode(curAddr)
|
||||
curAddr = curAddr & 0xFF
|
||||
|
||||
// Choose the decoding node where we'll write our chars.
|
||||
var writeTable = this._getDecodeTrieNode(curAddr);
|
||||
curAddr = curAddr & 0xFF;
|
||||
// Write all other elements of the chunk to the table.
|
||||
for (var k = 1; k < chunk.length; k++) {
|
||||
var part = chunk[k]
|
||||
if (typeof part === "string") { // String, write as-is.
|
||||
for (var l = 0; l < part.length;) {
|
||||
var code = part.charCodeAt(l++)
|
||||
if (code >= 0xD800 && code < 0xDC00) { // Decode surrogate
|
||||
var codeTrail = part.charCodeAt(l++)
|
||||
if (codeTrail >= 0xDC00 && codeTrail < 0xE000) { writeTable[curAddr++] = 0x10000 + (code - 0xD800) * 0x400 + (codeTrail - 0xDC00) } else { throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]) }
|
||||
} else if (code > 0x0FF0 && code <= 0x0FFF) { // Character sequence (our own encoding used)
|
||||
var len = 0xFFF - code + 2
|
||||
var seq = []
|
||||
for (var m = 0; m < len; m++) { seq.push(part.charCodeAt(l++)) } // Simple variation: don't support surrogates or subsequences in seq.
|
||||
|
||||
// Write all other elements of the chunk to the table.
|
||||
for (var k = 1; k < chunk.length; k++) {
|
||||
var part = chunk[k];
|
||||
if (typeof part === "string") { // String, write as-is.
|
||||
for (var l = 0; l < part.length;) {
|
||||
var code = part.charCodeAt(l++);
|
||||
if (0xD800 <= code && code < 0xDC00) { // Decode surrogate
|
||||
var codeTrail = part.charCodeAt(l++);
|
||||
if (0xDC00 <= codeTrail && codeTrail < 0xE000)
|
||||
writeTable[curAddr++] = 0x10000 + (code - 0xD800) * 0x400 + (codeTrail - 0xDC00);
|
||||
else
|
||||
throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]);
|
||||
}
|
||||
else if (0x0FF0 < code && code <= 0x0FFF) { // Character sequence (our own encoding used)
|
||||
var len = 0xFFF - code + 2;
|
||||
var seq = [];
|
||||
for (var m = 0; m < len; m++)
|
||||
seq.push(part.charCodeAt(l++)); // Simple variation: don't support surrogates or subsequences in seq.
|
||||
|
||||
writeTable[curAddr++] = SEQ_START - this.decodeTableSeq.length;
|
||||
this.decodeTableSeq.push(seq);
|
||||
}
|
||||
else
|
||||
writeTable[curAddr++] = code; // Basic char
|
||||
}
|
||||
}
|
||||
else if (typeof part === "number") { // Integer, meaning increasing sequence starting with prev character.
|
||||
var charCode = writeTable[curAddr - 1] + 1;
|
||||
for (var l = 0; l < part; l++)
|
||||
writeTable[curAddr++] = charCode++;
|
||||
}
|
||||
else
|
||||
throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]);
|
||||
}
|
||||
if (curAddr > 0xFF)
|
||||
throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
|
||||
writeTable[curAddr++] = SEQ_START - this.decodeTableSeq.length
|
||||
this.decodeTableSeq.push(seq)
|
||||
} else { writeTable[curAddr++] = code } // Basic char
|
||||
}
|
||||
} else if (typeof part === "number") { // Integer, meaning increasing sequence starting with prev character.
|
||||
var charCode = writeTable[curAddr - 1] + 1
|
||||
for (var l = 0; l < part; l++) { writeTable[curAddr++] = charCode++ }
|
||||
} else { throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]) }
|
||||
}
|
||||
if (curAddr > 0xFF) { throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr) }
|
||||
}
|
||||
|
||||
// Encoder helpers
|
||||
DBCSCodec.prototype._getEncodeBucket = function(uCode) {
|
||||
var high = uCode >> 8; // This could be > 0xFF because of astral characters.
|
||||
if (this.encodeTable[high] === undefined)
|
||||
this.encodeTable[high] = UNASSIGNED_NODE.slice(0); // Create bucket on demand.
|
||||
return this.encodeTable[high];
|
||||
DBCSCodec.prototype._getEncodeBucket = function (uCode) {
|
||||
var high = uCode >> 8 // This could be > 0xFF because of astral characters.
|
||||
if (this.encodeTable[high] === undefined) {
|
||||
this.encodeTable[high] = UNASSIGNED_NODE.slice(0)
|
||||
} // Create bucket on demand.
|
||||
return this.encodeTable[high]
|
||||
}
|
||||
|
||||
DBCSCodec.prototype._setEncodeChar = function(uCode, dbcsCode) {
|
||||
var bucket = this._getEncodeBucket(uCode);
|
||||
var low = uCode & 0xFF;
|
||||
if (bucket[low] <= SEQ_START)
|
||||
this.encodeTableSeq[SEQ_START-bucket[low]][DEF_CHAR] = dbcsCode; // There's already a sequence, set a single-char subsequence of it.
|
||||
else if (bucket[low] == UNASSIGNED)
|
||||
bucket[low] = dbcsCode;
|
||||
DBCSCodec.prototype._setEncodeChar = function (uCode, dbcsCode) {
|
||||
var bucket = this._getEncodeBucket(uCode)
|
||||
var low = uCode & 0xFF
|
||||
if (bucket[low] <= SEQ_START) { this.encodeTableSeq[SEQ_START - bucket[low]][DEF_CHAR] = dbcsCode } // There's already a sequence, set a single-char subsequence of it.
|
||||
else if (bucket[low] == UNASSIGNED) { bucket[low] = dbcsCode }
|
||||
}
|
||||
|
||||
DBCSCodec.prototype._setEncodeSequence = function(seq, dbcsCode) {
|
||||
|
||||
// Get the root of character tree according to first character of the sequence.
|
||||
var uCode = seq[0];
|
||||
var bucket = this._getEncodeBucket(uCode);
|
||||
var low = uCode & 0xFF;
|
||||
DBCSCodec.prototype._setEncodeSequence = function (seq, dbcsCode) {
|
||||
// Get the root of character tree according to first character of the sequence.
|
||||
var uCode = seq[0]
|
||||
var bucket = this._getEncodeBucket(uCode)
|
||||
var low = uCode & 0xFF
|
||||
|
||||
var node;
|
||||
if (bucket[low] <= SEQ_START) {
|
||||
// There's already a sequence with - use it.
|
||||
node = this.encodeTableSeq[SEQ_START-bucket[low]];
|
||||
}
|
||||
else {
|
||||
// There was no sequence object - allocate a new one.
|
||||
node = {};
|
||||
if (bucket[low] !== UNASSIGNED) node[DEF_CHAR] = bucket[low]; // If a char was set before - make it a single-char subsequence.
|
||||
bucket[low] = SEQ_START - this.encodeTableSeq.length;
|
||||
this.encodeTableSeq.push(node);
|
||||
}
|
||||
var node
|
||||
if (bucket[low] <= SEQ_START) {
|
||||
// There's already a sequence with - use it.
|
||||
node = this.encodeTableSeq[SEQ_START - bucket[low]]
|
||||
} else {
|
||||
// There was no sequence object - allocate a new one.
|
||||
node = {}
|
||||
if (bucket[low] !== UNASSIGNED) node[DEF_CHAR] = bucket[low] // If a char was set before - make it a single-char subsequence.
|
||||
bucket[low] = SEQ_START - this.encodeTableSeq.length
|
||||
this.encodeTableSeq.push(node)
|
||||
}
|
||||
|
||||
// Traverse the character tree, allocating new nodes as needed.
|
||||
for (var j = 1; j < seq.length-1; j++) {
|
||||
var oldVal = node[uCode];
|
||||
if (typeof oldVal === 'object')
|
||||
node = oldVal;
|
||||
else {
|
||||
node = node[uCode] = {}
|
||||
if (oldVal !== undefined)
|
||||
node[DEF_CHAR] = oldVal
|
||||
}
|
||||
// Traverse the character tree, allocating new nodes as needed.
|
||||
for (var j = 1; j < seq.length - 1; j++) {
|
||||
var oldVal = node[uCode]
|
||||
if (typeof oldVal === "object") { node = oldVal } else {
|
||||
node = node[uCode] = {}
|
||||
if (oldVal !== undefined) { node[DEF_CHAR] = oldVal }
|
||||
}
|
||||
}
|
||||
|
||||
// Set the leaf to given dbcsCode.
|
||||
uCode = seq[seq.length-1];
|
||||
node[uCode] = dbcsCode;
|
||||
// Set the leaf to given dbcsCode.
|
||||
uCode = seq[seq.length - 1]
|
||||
node[uCode] = dbcsCode
|
||||
}
|
||||
|
||||
DBCSCodec.prototype._fillEncodeTable = function(nodeIdx, prefix, skipEncodeChars) {
|
||||
var node = this.decodeTables[nodeIdx];
|
||||
for (var i = 0; i < 0x100; i++) {
|
||||
var uCode = node[i];
|
||||
var mbCode = prefix + i;
|
||||
if (skipEncodeChars[mbCode])
|
||||
continue;
|
||||
DBCSCodec.prototype._fillEncodeTable = function (nodeIdx, prefix, skipEncodeChars) {
|
||||
var node = this.decodeTables[nodeIdx]
|
||||
var hasValues = false
|
||||
var subNodeEmpty = {}
|
||||
for (var i = 0; i < 0x100; i++) {
|
||||
var uCode = node[i]
|
||||
var mbCode = prefix + i
|
||||
if (skipEncodeChars[mbCode]) { continue }
|
||||
|
||||
if (uCode >= 0)
|
||||
this._setEncodeChar(uCode, mbCode);
|
||||
else if (uCode <= NODE_START)
|
||||
this._fillEncodeTable(NODE_START - uCode, mbCode << 8, skipEncodeChars);
|
||||
else if (uCode <= SEQ_START)
|
||||
this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode);
|
||||
if (uCode >= 0) {
|
||||
this._setEncodeChar(uCode, mbCode)
|
||||
hasValues = true
|
||||
} else if (uCode <= NODE_START) {
|
||||
var subNodeIdx = NODE_START - uCode
|
||||
if (!subNodeEmpty[subNodeIdx]) { // Skip empty subtrees (they are too large in gb18030).
|
||||
var newPrefix = (mbCode << 8) >>> 0 // NOTE: '>>> 0' keeps 32-bit num positive.
|
||||
if (this._fillEncodeTable(subNodeIdx, newPrefix, skipEncodeChars)) { hasValues = true } else { subNodeEmpty[subNodeIdx] = true }
|
||||
}
|
||||
} else if (uCode <= SEQ_START) {
|
||||
this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode)
|
||||
hasValues = true
|
||||
}
|
||||
}
|
||||
return hasValues
|
||||
}
|
||||
|
||||
|
||||
|
||||
// == Encoder ==================================================================
|
||||
|
||||
function DBCSEncoder(options, codec) {
|
||||
// Encoder state
|
||||
this.leadSurrogate = -1;
|
||||
this.seqObj = undefined;
|
||||
|
||||
// Static data
|
||||
this.encodeTable = codec.encodeTable;
|
||||
this.encodeTableSeq = codec.encodeTableSeq;
|
||||
this.defaultCharSingleByte = codec.defCharSB;
|
||||
this.gb18030 = codec.gb18030;
|
||||
function DBCSEncoder (options, codec) {
|
||||
// Encoder state
|
||||
this.leadSurrogate = -1
|
||||
this.seqObj = undefined
|
||||
|
||||
// Static data
|
||||
this.encodeTable = codec.encodeTable
|
||||
this.encodeTableSeq = codec.encodeTableSeq
|
||||
this.defaultCharSingleByte = codec.defCharSB
|
||||
this.gb18030 = codec.gb18030
|
||||
}
|
||||
|
||||
DBCSEncoder.prototype.write = function(str) {
|
||||
var newBuf = Buffer.alloc(str.length * (this.gb18030 ? 4 : 3)),
|
||||
leadSurrogate = this.leadSurrogate,
|
||||
seqObj = this.seqObj, nextChar = -1,
|
||||
i = 0, j = 0;
|
||||
DBCSEncoder.prototype.write = function (str) {
|
||||
var newBuf = Buffer.alloc(str.length * (this.gb18030 ? 4 : 3))
|
||||
var leadSurrogate = this.leadSurrogate
|
||||
var seqObj = this.seqObj
|
||||
var nextChar = -1
|
||||
var i = 0; var j = 0
|
||||
|
||||
while (true) {
|
||||
// 0. Get next character.
|
||||
if (nextChar === -1) {
|
||||
if (i == str.length) break;
|
||||
var uCode = str.charCodeAt(i++);
|
||||
}
|
||||
else {
|
||||
var uCode = nextChar;
|
||||
nextChar = -1;
|
||||
}
|
||||
|
||||
// 1. Handle surrogates.
|
||||
if (0xD800 <= uCode && uCode < 0xE000) { // Char is one of surrogates.
|
||||
if (uCode < 0xDC00) { // We've got lead surrogate.
|
||||
if (leadSurrogate === -1) {
|
||||
leadSurrogate = uCode;
|
||||
continue;
|
||||
} else {
|
||||
leadSurrogate = uCode;
|
||||
// Double lead surrogate found.
|
||||
uCode = UNASSIGNED;
|
||||
}
|
||||
} else { // We've got trail surrogate.
|
||||
if (leadSurrogate !== -1) {
|
||||
uCode = 0x10000 + (leadSurrogate - 0xD800) * 0x400 + (uCode - 0xDC00);
|
||||
leadSurrogate = -1;
|
||||
} else {
|
||||
// Incomplete surrogate pair - only trail surrogate found.
|
||||
uCode = UNASSIGNED;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else if (leadSurrogate !== -1) {
|
||||
// Incomplete surrogate pair - only lead surrogate found.
|
||||
nextChar = uCode; uCode = UNASSIGNED; // Write an error, then current char.
|
||||
leadSurrogate = -1;
|
||||
}
|
||||
|
||||
// 2. Convert uCode character.
|
||||
var dbcsCode = UNASSIGNED;
|
||||
if (seqObj !== undefined && uCode != UNASSIGNED) { // We are in the middle of the sequence
|
||||
var resCode = seqObj[uCode];
|
||||
if (typeof resCode === 'object') { // Sequence continues.
|
||||
seqObj = resCode;
|
||||
continue;
|
||||
|
||||
} else if (typeof resCode == 'number') { // Sequence finished. Write it.
|
||||
dbcsCode = resCode;
|
||||
|
||||
} else if (resCode == undefined) { // Current character is not part of the sequence.
|
||||
|
||||
// Try default character for this sequence
|
||||
resCode = seqObj[DEF_CHAR];
|
||||
if (resCode !== undefined) {
|
||||
dbcsCode = resCode; // Found. Write it.
|
||||
nextChar = uCode; // Current character will be written too in the next iteration.
|
||||
|
||||
} else {
|
||||
// TODO: What if we have no default? (resCode == undefined)
|
||||
// Then, we should write first char of the sequence as-is and try the rest recursively.
|
||||
// Didn't do it for now because no encoding has this situation yet.
|
||||
// Currently, just skip the sequence and write current char.
|
||||
}
|
||||
}
|
||||
seqObj = undefined;
|
||||
}
|
||||
else if (uCode >= 0) { // Regular character
|
||||
var subtable = this.encodeTable[uCode >> 8];
|
||||
if (subtable !== undefined)
|
||||
dbcsCode = subtable[uCode & 0xFF];
|
||||
|
||||
if (dbcsCode <= SEQ_START) { // Sequence start
|
||||
seqObj = this.encodeTableSeq[SEQ_START-dbcsCode];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dbcsCode == UNASSIGNED && this.gb18030) {
|
||||
// Use GB18030 algorithm to find character(s) to write.
|
||||
var idx = findIdx(this.gb18030.uChars, uCode);
|
||||
if (idx != -1) {
|
||||
var dbcsCode = this.gb18030.gbChars[idx] + (uCode - this.gb18030.uChars[idx]);
|
||||
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 12600); dbcsCode = dbcsCode % 12600;
|
||||
newBuf[j++] = 0x30 + Math.floor(dbcsCode / 1260); dbcsCode = dbcsCode % 1260;
|
||||
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 10); dbcsCode = dbcsCode % 10;
|
||||
newBuf[j++] = 0x30 + dbcsCode;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Write dbcsCode character.
|
||||
if (dbcsCode === UNASSIGNED)
|
||||
dbcsCode = this.defaultCharSingleByte;
|
||||
|
||||
if (dbcsCode < 0x100) {
|
||||
newBuf[j++] = dbcsCode;
|
||||
}
|
||||
else if (dbcsCode < 0x10000) {
|
||||
newBuf[j++] = dbcsCode >> 8; // high byte
|
||||
newBuf[j++] = dbcsCode & 0xFF; // low byte
|
||||
}
|
||||
else {
|
||||
newBuf[j++] = dbcsCode >> 16;
|
||||
newBuf[j++] = (dbcsCode >> 8) & 0xFF;
|
||||
newBuf[j++] = dbcsCode & 0xFF;
|
||||
}
|
||||
while (true) {
|
||||
// 0. Get next character.
|
||||
if (nextChar === -1) {
|
||||
if (i == str.length) break
|
||||
var uCode = str.charCodeAt(i++)
|
||||
} else {
|
||||
var uCode = nextChar
|
||||
nextChar = -1
|
||||
}
|
||||
|
||||
this.seqObj = seqObj;
|
||||
this.leadSurrogate = leadSurrogate;
|
||||
return newBuf.slice(0, j);
|
||||
}
|
||||
|
||||
DBCSEncoder.prototype.end = function() {
|
||||
if (this.leadSurrogate === -1 && this.seqObj === undefined)
|
||||
return; // All clean. Most often case.
|
||||
|
||||
var newBuf = Buffer.alloc(10), j = 0;
|
||||
|
||||
if (this.seqObj) { // We're in the sequence.
|
||||
var dbcsCode = this.seqObj[DEF_CHAR];
|
||||
if (dbcsCode !== undefined) { // Write beginning of the sequence.
|
||||
if (dbcsCode < 0x100) {
|
||||
newBuf[j++] = dbcsCode;
|
||||
}
|
||||
else {
|
||||
newBuf[j++] = dbcsCode >> 8; // high byte
|
||||
newBuf[j++] = dbcsCode & 0xFF; // low byte
|
||||
}
|
||||
// 1. Handle surrogates.
|
||||
if (uCode >= 0xD800 && uCode < 0xE000) { // Char is one of surrogates.
|
||||
if (uCode < 0xDC00) { // We've got lead surrogate.
|
||||
if (leadSurrogate === -1) {
|
||||
leadSurrogate = uCode
|
||||
continue
|
||||
} else {
|
||||
// See todo above.
|
||||
leadSurrogate = uCode
|
||||
// Double lead surrogate found.
|
||||
uCode = UNASSIGNED
|
||||
}
|
||||
this.seqObj = undefined;
|
||||
} else { // We've got trail surrogate.
|
||||
if (leadSurrogate !== -1) {
|
||||
uCode = 0x10000 + (leadSurrogate - 0xD800) * 0x400 + (uCode - 0xDC00)
|
||||
leadSurrogate = -1
|
||||
} else {
|
||||
// Incomplete surrogate pair - only trail surrogate found.
|
||||
uCode = UNASSIGNED
|
||||
}
|
||||
}
|
||||
} else if (leadSurrogate !== -1) {
|
||||
// Incomplete surrogate pair - only lead surrogate found.
|
||||
nextChar = uCode; uCode = UNASSIGNED // Write an error, then current char.
|
||||
leadSurrogate = -1
|
||||
}
|
||||
|
||||
if (this.leadSurrogate !== -1) {
|
||||
// Incomplete surrogate pair - only lead surrogate found.
|
||||
newBuf[j++] = this.defaultCharSingleByte;
|
||||
this.leadSurrogate = -1;
|
||||
// 2. Convert uCode character.
|
||||
var dbcsCode = UNASSIGNED
|
||||
if (seqObj !== undefined && uCode != UNASSIGNED) { // We are in the middle of the sequence
|
||||
var resCode = seqObj[uCode]
|
||||
if (typeof resCode === "object") { // Sequence continues.
|
||||
seqObj = resCode
|
||||
continue
|
||||
} else if (typeof resCode === "number") { // Sequence finished. Write it.
|
||||
dbcsCode = resCode
|
||||
} else if (resCode == undefined) { // Current character is not part of the sequence.
|
||||
// Try default character for this sequence
|
||||
resCode = seqObj[DEF_CHAR]
|
||||
if (resCode !== undefined) {
|
||||
dbcsCode = resCode // Found. Write it.
|
||||
nextChar = uCode // Current character will be written too in the next iteration.
|
||||
} else {
|
||||
// TODO: What if we have no default? (resCode == undefined)
|
||||
// Then, we should write first char of the sequence as-is and try the rest recursively.
|
||||
// Didn't do it for now because no encoding has this situation yet.
|
||||
// Currently, just skip the sequence and write current char.
|
||||
}
|
||||
}
|
||||
seqObj = undefined
|
||||
} else if (uCode >= 0) { // Regular character
|
||||
var subtable = this.encodeTable[uCode >> 8]
|
||||
if (subtable !== undefined) { dbcsCode = subtable[uCode & 0xFF] }
|
||||
|
||||
if (dbcsCode <= SEQ_START) { // Sequence start
|
||||
seqObj = this.encodeTableSeq[SEQ_START - dbcsCode]
|
||||
continue
|
||||
}
|
||||
|
||||
if (dbcsCode == UNASSIGNED && this.gb18030) {
|
||||
// Use GB18030 algorithm to find character(s) to write.
|
||||
var idx = findIdx(this.gb18030.uChars, uCode)
|
||||
if (idx != -1) {
|
||||
var dbcsCode = this.gb18030.gbChars[idx] + (uCode - this.gb18030.uChars[idx])
|
||||
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 12600); dbcsCode = dbcsCode % 12600
|
||||
newBuf[j++] = 0x30 + Math.floor(dbcsCode / 1260); dbcsCode = dbcsCode % 1260
|
||||
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 10); dbcsCode = dbcsCode % 10
|
||||
newBuf[j++] = 0x30 + dbcsCode
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newBuf.slice(0, j);
|
||||
|
||||
// 3. Write dbcsCode character.
|
||||
if (dbcsCode === UNASSIGNED) { dbcsCode = this.defaultCharSingleByte }
|
||||
|
||||
if (dbcsCode < 0x100) {
|
||||
newBuf[j++] = dbcsCode
|
||||
} else if (dbcsCode < 0x10000) {
|
||||
newBuf[j++] = dbcsCode >> 8 // high byte
|
||||
newBuf[j++] = dbcsCode & 0xFF // low byte
|
||||
} else if (dbcsCode < 0x1000000) {
|
||||
newBuf[j++] = dbcsCode >> 16
|
||||
newBuf[j++] = (dbcsCode >> 8) & 0xFF
|
||||
newBuf[j++] = dbcsCode & 0xFF
|
||||
} else {
|
||||
newBuf[j++] = dbcsCode >>> 24
|
||||
newBuf[j++] = (dbcsCode >>> 16) & 0xFF
|
||||
newBuf[j++] = (dbcsCode >>> 8) & 0xFF
|
||||
newBuf[j++] = dbcsCode & 0xFF
|
||||
}
|
||||
}
|
||||
|
||||
this.seqObj = seqObj
|
||||
this.leadSurrogate = leadSurrogate
|
||||
return newBuf.slice(0, j)
|
||||
}
|
||||
|
||||
DBCSEncoder.prototype.end = function () {
|
||||
if (this.leadSurrogate === -1 && this.seqObj === undefined) { return } // All clean. Most often case.
|
||||
|
||||
var newBuf = Buffer.alloc(10); var j = 0
|
||||
|
||||
if (this.seqObj) { // We're in the sequence.
|
||||
var dbcsCode = this.seqObj[DEF_CHAR]
|
||||
if (dbcsCode !== undefined) { // Write beginning of the sequence.
|
||||
if (dbcsCode < 0x100) {
|
||||
newBuf[j++] = dbcsCode
|
||||
} else {
|
||||
newBuf[j++] = dbcsCode >> 8 // high byte
|
||||
newBuf[j++] = dbcsCode & 0xFF // low byte
|
||||
}
|
||||
} else {
|
||||
// See todo above.
|
||||
}
|
||||
this.seqObj = undefined
|
||||
}
|
||||
|
||||
if (this.leadSurrogate !== -1) {
|
||||
// Incomplete surrogate pair - only lead surrogate found.
|
||||
newBuf[j++] = this.defaultCharSingleByte
|
||||
this.leadSurrogate = -1
|
||||
}
|
||||
|
||||
return newBuf.slice(0, j)
|
||||
}
|
||||
|
||||
// Export for testing
|
||||
DBCSEncoder.prototype.findIdx = findIdx;
|
||||
|
||||
DBCSEncoder.prototype.findIdx = findIdx
|
||||
|
||||
// == Decoder ==================================================================
|
||||
|
||||
function DBCSDecoder(options, codec) {
|
||||
// Decoder state
|
||||
this.nodeIdx = 0;
|
||||
this.prevBuf = Buffer.alloc(0);
|
||||
function DBCSDecoder (options, codec) {
|
||||
// Decoder state
|
||||
this.nodeIdx = 0
|
||||
this.prevBytes = []
|
||||
|
||||
// Static data
|
||||
this.decodeTables = codec.decodeTables;
|
||||
this.decodeTableSeq = codec.decodeTableSeq;
|
||||
this.defaultCharUnicode = codec.defaultCharUnicode;
|
||||
this.gb18030 = codec.gb18030;
|
||||
// Static data
|
||||
this.decodeTables = codec.decodeTables
|
||||
this.decodeTableSeq = codec.decodeTableSeq
|
||||
this.defaultCharUnicode = codec.defaultCharUnicode
|
||||
this.gb18030 = codec.gb18030
|
||||
}
|
||||
|
||||
DBCSDecoder.prototype.write = function(buf) {
|
||||
var newBuf = Buffer.alloc(buf.length*2),
|
||||
nodeIdx = this.nodeIdx,
|
||||
prevBuf = this.prevBuf, prevBufOffset = this.prevBuf.length,
|
||||
seqStart = -this.prevBuf.length, // idx of the start of current parsed sequence.
|
||||
uCode;
|
||||
DBCSDecoder.prototype.write = function (buf) {
|
||||
var newBuf = Buffer.alloc(buf.length * 2)
|
||||
var nodeIdx = this.nodeIdx
|
||||
var prevBytes = this.prevBytes; var prevOffset = this.prevBytes.length
|
||||
var seqStart = -this.prevBytes.length // idx of the start of current parsed sequence.
|
||||
var uCode
|
||||
|
||||
if (prevBufOffset > 0) // Make prev buf overlap a little to make it easier to slice later.
|
||||
prevBuf = Buffer.concat([prevBuf, buf.slice(0, 10)]);
|
||||
|
||||
for (var i = 0, j = 0; i < buf.length; i++) {
|
||||
var curByte = (i >= 0) ? buf[i] : prevBuf[i + prevBufOffset];
|
||||
for (var i = 0, j = 0; i < buf.length; i++) {
|
||||
var curByte = (i >= 0) ? buf[i] : prevBytes[i + prevOffset]
|
||||
|
||||
// Lookup in current trie node.
|
||||
var uCode = this.decodeTables[nodeIdx][curByte];
|
||||
// Lookup in current trie node.
|
||||
var uCode = this.decodeTables[nodeIdx][curByte]
|
||||
|
||||
if (uCode >= 0) {
|
||||
// Normal character, just use it.
|
||||
}
|
||||
else if (uCode === UNASSIGNED) { // Unknown char.
|
||||
// TODO: Callback with seq.
|
||||
//var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
|
||||
i = seqStart; // Try to parse again, after skipping first byte of the sequence ('i' will be incremented by 'for' cycle).
|
||||
uCode = this.defaultCharUnicode.charCodeAt(0);
|
||||
}
|
||||
else if (uCode === GB18030_CODE) {
|
||||
var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
|
||||
var ptr = (curSeq[0]-0x81)*12600 + (curSeq[1]-0x30)*1260 + (curSeq[2]-0x81)*10 + (curSeq[3]-0x30);
|
||||
var idx = findIdx(this.gb18030.gbChars, ptr);
|
||||
uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx];
|
||||
}
|
||||
else if (uCode <= NODE_START) { // Go to next trie node.
|
||||
nodeIdx = NODE_START - uCode;
|
||||
continue;
|
||||
}
|
||||
else if (uCode <= SEQ_START) { // Output a sequence of chars.
|
||||
var seq = this.decodeTableSeq[SEQ_START - uCode];
|
||||
for (var k = 0; k < seq.length - 1; k++) {
|
||||
uCode = seq[k];
|
||||
newBuf[j++] = uCode & 0xFF;
|
||||
newBuf[j++] = uCode >> 8;
|
||||
}
|
||||
uCode = seq[seq.length-1];
|
||||
}
|
||||
else
|
||||
throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte);
|
||||
if (uCode >= 0) {
|
||||
// Normal character, just use it.
|
||||
} else if (uCode === UNASSIGNED) { // Unknown char.
|
||||
// TODO: Callback with seq.
|
||||
uCode = this.defaultCharUnicode.charCodeAt(0)
|
||||
i = seqStart // Skip one byte ('i' will be incremented by the for loop) and try to parse again.
|
||||
} else if (uCode === GB18030_CODE) {
|
||||
if (i >= 3) {
|
||||
var ptr = (buf[i - 3] - 0x81) * 12600 + (buf[i - 2] - 0x30) * 1260 + (buf[i - 1] - 0x81) * 10 + (curByte - 0x30)
|
||||
} else {
|
||||
var ptr = (prevBytes[i - 3 + prevOffset] - 0x81) * 12600 +
|
||||
(((i - 2 >= 0) ? buf[i - 2] : prevBytes[i - 2 + prevOffset]) - 0x30) * 1260 +
|
||||
(((i - 1 >= 0) ? buf[i - 1] : prevBytes[i - 1 + prevOffset]) - 0x81) * 10 +
|
||||
(curByte - 0x30)
|
||||
}
|
||||
var idx = findIdx(this.gb18030.gbChars, ptr)
|
||||
uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx]
|
||||
} else if (uCode <= NODE_START) { // Go to next trie node.
|
||||
nodeIdx = NODE_START - uCode
|
||||
continue
|
||||
} else if (uCode <= SEQ_START) { // Output a sequence of chars.
|
||||
var seq = this.decodeTableSeq[SEQ_START - uCode]
|
||||
for (var k = 0; k < seq.length - 1; k++) {
|
||||
uCode = seq[k]
|
||||
newBuf[j++] = uCode & 0xFF
|
||||
newBuf[j++] = uCode >> 8
|
||||
}
|
||||
uCode = seq[seq.length - 1]
|
||||
} else { throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte) }
|
||||
|
||||
// Write the character to buffer, handling higher planes using surrogate pair.
|
||||
if (uCode > 0xFFFF) {
|
||||
uCode -= 0x10000;
|
||||
var uCodeLead = 0xD800 + Math.floor(uCode / 0x400);
|
||||
newBuf[j++] = uCodeLead & 0xFF;
|
||||
newBuf[j++] = uCodeLead >> 8;
|
||||
// Write the character to buffer, handling higher planes using surrogate pair.
|
||||
if (uCode >= 0x10000) {
|
||||
uCode -= 0x10000
|
||||
var uCodeLead = 0xD800 | (uCode >> 10)
|
||||
newBuf[j++] = uCodeLead & 0xFF
|
||||
newBuf[j++] = uCodeLead >> 8
|
||||
|
||||
uCode = 0xDC00 + uCode % 0x400;
|
||||
}
|
||||
newBuf[j++] = uCode & 0xFF;
|
||||
newBuf[j++] = uCode >> 8;
|
||||
|
||||
// Reset trie node.
|
||||
nodeIdx = 0; seqStart = i+1;
|
||||
uCode = 0xDC00 | (uCode & 0x3FF)
|
||||
}
|
||||
newBuf[j++] = uCode & 0xFF
|
||||
newBuf[j++] = uCode >> 8
|
||||
|
||||
this.nodeIdx = nodeIdx;
|
||||
this.prevBuf = (seqStart >= 0) ? buf.slice(seqStart) : prevBuf.slice(seqStart + prevBufOffset);
|
||||
return newBuf.slice(0, j).toString('ucs2');
|
||||
// Reset trie node.
|
||||
nodeIdx = 0; seqStart = i + 1
|
||||
}
|
||||
|
||||
this.nodeIdx = nodeIdx
|
||||
this.prevBytes = (seqStart >= 0)
|
||||
? Array.prototype.slice.call(buf, seqStart)
|
||||
: prevBytes.slice(seqStart + prevOffset).concat(Array.prototype.slice.call(buf))
|
||||
|
||||
return newBuf.slice(0, j).toString("ucs2")
|
||||
}
|
||||
|
||||
DBCSDecoder.prototype.end = function() {
|
||||
var ret = '';
|
||||
DBCSDecoder.prototype.end = function () {
|
||||
var ret = ""
|
||||
|
||||
// Try to parse all remaining chars.
|
||||
while (this.prevBuf.length > 0) {
|
||||
// Skip 1 character in the buffer.
|
||||
ret += this.defaultCharUnicode;
|
||||
var buf = this.prevBuf.slice(1);
|
||||
// Try to parse all remaining chars.
|
||||
while (this.prevBytes.length > 0) {
|
||||
// Skip 1 character in the buffer.
|
||||
ret += this.defaultCharUnicode
|
||||
var bytesArr = this.prevBytes.slice(1)
|
||||
|
||||
// Parse remaining as usual.
|
||||
this.prevBuf = Buffer.alloc(0);
|
||||
this.nodeIdx = 0;
|
||||
if (buf.length > 0)
|
||||
ret += this.write(buf);
|
||||
}
|
||||
// Parse remaining as usual.
|
||||
this.prevBytes = []
|
||||
this.nodeIdx = 0
|
||||
if (bytesArr.length > 0) { ret += this.write(bytesArr) }
|
||||
}
|
||||
|
||||
this.nodeIdx = 0;
|
||||
return ret;
|
||||
this.prevBytes = []
|
||||
this.nodeIdx = 0
|
||||
return ret
|
||||
}
|
||||
|
||||
// Binary search for GB18030. Returns largest i such that table[i] <= val.
|
||||
function findIdx(table, val) {
|
||||
if (table[0] > val)
|
||||
return -1;
|
||||
function findIdx (table, val) {
|
||||
if (table[0] > val) { return -1 }
|
||||
|
||||
var l = 0, r = table.length;
|
||||
while (l < r-1) { // always table[l] <= val < table[r]
|
||||
var mid = l + Math.floor((r-l+1)/2);
|
||||
if (table[mid] <= val)
|
||||
l = mid;
|
||||
else
|
||||
r = mid;
|
||||
}
|
||||
return l;
|
||||
var l = 0; var r = table.length
|
||||
while (l < r - 1) { // always table[l] <= val < table[r]
|
||||
var mid = l + ((r - l + 1) >> 1)
|
||||
if (table[mid] <= val) { l = mid } else { r = mid }
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
|
||||
313
node_modules/iconv-lite/encodings/dbcs-data.js
generated
vendored
313
node_modules/iconv-lite/encodings/dbcs-data.js
generated
vendored
@@ -1,176 +1,185 @@
|
||||
"use strict";
|
||||
"use strict"
|
||||
|
||||
// Description of supported double byte encodings and aliases.
|
||||
// Tables are not require()-d until they are needed to speed up library load.
|
||||
// require()-s are direct to support Browserify.
|
||||
|
||||
module.exports = {
|
||||
|
||||
// == Japanese/ShiftJIS ====================================================
|
||||
// All japanese encodings are based on JIS X set of standards:
|
||||
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
|
||||
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
|
||||
// Has several variations in 1978, 1983, 1990 and 1997.
|
||||
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
|
||||
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
|
||||
// 2 planes, first is superset of 0208, second - revised 0212.
|
||||
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
|
||||
|
||||
// Byte encodings are:
|
||||
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
|
||||
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
|
||||
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
|
||||
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
|
||||
// 0x00-0x7F - lower part of 0201
|
||||
// 0x8E, 0xA1-0xDF - upper part of 0201
|
||||
// (0xA1-0xFE)x2 - 0208 plane (94x94).
|
||||
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
|
||||
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
|
||||
// Used as-is in ISO2022 family.
|
||||
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
|
||||
// 0201-1976 Roman, 0208-1978, 0208-1983.
|
||||
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
|
||||
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
|
||||
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
|
||||
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
|
||||
//
|
||||
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
|
||||
//
|
||||
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
|
||||
// == Japanese/ShiftJIS ====================================================
|
||||
// All japanese encodings are based on JIS X set of standards:
|
||||
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
|
||||
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
|
||||
// Has several variations in 1978, 1983, 1990 and 1997.
|
||||
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
|
||||
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
|
||||
// 2 planes, first is superset of 0208, second - revised 0212.
|
||||
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
|
||||
|
||||
'shiftjis': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/shiftjis.json') },
|
||||
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
|
||||
encodeSkipVals: [{from: 0xED40, to: 0xF940}],
|
||||
},
|
||||
'csshiftjis': 'shiftjis',
|
||||
'mskanji': 'shiftjis',
|
||||
'sjis': 'shiftjis',
|
||||
'windows31j': 'shiftjis',
|
||||
'ms31j': 'shiftjis',
|
||||
'xsjis': 'shiftjis',
|
||||
'windows932': 'shiftjis',
|
||||
'ms932': 'shiftjis',
|
||||
'932': 'shiftjis',
|
||||
'cp932': 'shiftjis',
|
||||
// Byte encodings are:
|
||||
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
|
||||
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
|
||||
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
|
||||
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
|
||||
// 0x00-0x7F - lower part of 0201
|
||||
// 0x8E, 0xA1-0xDF - upper part of 0201
|
||||
// (0xA1-0xFE)x2 - 0208 plane (94x94).
|
||||
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
|
||||
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
|
||||
// Used as-is in ISO2022 family.
|
||||
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
|
||||
// 0201-1976 Roman, 0208-1978, 0208-1983.
|
||||
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
|
||||
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
|
||||
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
|
||||
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
|
||||
//
|
||||
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
|
||||
//
|
||||
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
|
||||
|
||||
'eucjp': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/eucjp.json') },
|
||||
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
|
||||
},
|
||||
shiftjis: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/shiftjis.json") },
|
||||
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E },
|
||||
encodeSkipVals: [{ from: 0xED40, to: 0xF940 }]
|
||||
},
|
||||
csshiftjis: "shiftjis",
|
||||
mskanji: "shiftjis",
|
||||
sjis: "shiftjis",
|
||||
windows31j: "shiftjis",
|
||||
ms31j: "shiftjis",
|
||||
xsjis: "shiftjis",
|
||||
windows932: "shiftjis",
|
||||
ms932: "shiftjis",
|
||||
932: "shiftjis",
|
||||
cp932: "shiftjis",
|
||||
|
||||
// TODO: KDDI extension to Shift_JIS
|
||||
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
|
||||
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
|
||||
eucjp: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/eucjp.json") },
|
||||
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E }
|
||||
},
|
||||
|
||||
// TODO: KDDI extension to Shift_JIS
|
||||
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
|
||||
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
|
||||
|
||||
// == Chinese/GBK ==========================================================
|
||||
// http://en.wikipedia.org/wiki/GBK
|
||||
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
|
||||
// == Chinese/GBK ==========================================================
|
||||
// http://en.wikipedia.org/wiki/GBK
|
||||
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
|
||||
|
||||
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
|
||||
'gb2312': 'cp936',
|
||||
'gb231280': 'cp936',
|
||||
'gb23121980': 'cp936',
|
||||
'csgb2312': 'cp936',
|
||||
'csiso58gb231280': 'cp936',
|
||||
'euccn': 'cp936',
|
||||
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
|
||||
gb2312: "cp936",
|
||||
gb231280: "cp936",
|
||||
gb23121980: "cp936",
|
||||
csgb2312: "cp936",
|
||||
csiso58gb231280: "cp936",
|
||||
euccn: "cp936",
|
||||
|
||||
// Microsoft's CP936 is a subset and approximation of GBK.
|
||||
'windows936': 'cp936',
|
||||
'ms936': 'cp936',
|
||||
'936': 'cp936',
|
||||
'cp936': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp936.json') },
|
||||
},
|
||||
// Microsoft's CP936 is a subset and approximation of GBK.
|
||||
windows936: "cp936",
|
||||
ms936: "cp936",
|
||||
936: "cp936",
|
||||
cp936: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp936.json") }
|
||||
},
|
||||
|
||||
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
|
||||
'gbk': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
|
||||
},
|
||||
'xgbk': 'gbk',
|
||||
'isoir58': 'gbk',
|
||||
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
|
||||
gbk: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) }
|
||||
},
|
||||
xgbk: "gbk",
|
||||
isoir58: "gbk",
|
||||
|
||||
// GB18030 is an algorithmic extension of GBK.
|
||||
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
|
||||
// http://icu-project.org/docs/papers/gb18030.html
|
||||
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
|
||||
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
|
||||
'gb18030': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
|
||||
gb18030: function() { return require('./tables/gb18030-ranges.json') },
|
||||
encodeSkipVals: [0x80],
|
||||
encodeAdd: {'€': 0xA2E3},
|
||||
},
|
||||
// GB18030 is an algorithmic extension of GBK.
|
||||
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
|
||||
// http://icu-project.org/docs/papers/gb18030.html
|
||||
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
|
||||
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
|
||||
gb18030: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) },
|
||||
gb18030: function () { return require("./tables/gb18030-ranges.json") },
|
||||
encodeSkipVals: [0x80],
|
||||
encodeAdd: { "€": 0xA2E3 }
|
||||
},
|
||||
|
||||
'chinese': 'gb18030',
|
||||
chinese: "gb18030",
|
||||
|
||||
// == Korean ===============================================================
|
||||
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
|
||||
windows949: "cp949",
|
||||
ms949: "cp949",
|
||||
949: "cp949",
|
||||
cp949: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp949.json") }
|
||||
},
|
||||
|
||||
// == Korean ===============================================================
|
||||
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
|
||||
'windows949': 'cp949',
|
||||
'ms949': 'cp949',
|
||||
'949': 'cp949',
|
||||
'cp949': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp949.json') },
|
||||
},
|
||||
cseuckr: "cp949",
|
||||
csksc56011987: "cp949",
|
||||
euckr: "cp949",
|
||||
isoir149: "cp949",
|
||||
korean: "cp949",
|
||||
ksc56011987: "cp949",
|
||||
ksc56011989: "cp949",
|
||||
ksc5601: "cp949",
|
||||
|
||||
'cseuckr': 'cp949',
|
||||
'csksc56011987': 'cp949',
|
||||
'euckr': 'cp949',
|
||||
'isoir149': 'cp949',
|
||||
'korean': 'cp949',
|
||||
'ksc56011987': 'cp949',
|
||||
'ksc56011989': 'cp949',
|
||||
'ksc5601': 'cp949',
|
||||
// == Big5/Taiwan/Hong Kong ================================================
|
||||
// There are lots of tables for Big5 and cp950. Please see the following links for history:
|
||||
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
|
||||
// Variations, in roughly number of defined chars:
|
||||
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
|
||||
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
|
||||
// * Big5-2003 (Taiwan standard) almost superset of cp950.
|
||||
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
|
||||
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
|
||||
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
|
||||
// Plus, it has 4 combining sequences.
|
||||
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
|
||||
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
|
||||
// Implementations are not consistent within browsers; sometimes labeled as just big5.
|
||||
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
|
||||
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
|
||||
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
|
||||
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
|
||||
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
|
||||
//
|
||||
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
|
||||
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
|
||||
|
||||
windows950: "cp950",
|
||||
ms950: "cp950",
|
||||
950: "cp950",
|
||||
cp950: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp950.json") }
|
||||
},
|
||||
|
||||
// == Big5/Taiwan/Hong Kong ================================================
|
||||
// There are lots of tables for Big5 and cp950. Please see the following links for history:
|
||||
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
|
||||
// Variations, in roughly number of defined chars:
|
||||
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
|
||||
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
|
||||
// * Big5-2003 (Taiwan standard) almost superset of cp950.
|
||||
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
|
||||
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
|
||||
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
|
||||
// Plus, it has 4 combining sequences.
|
||||
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
|
||||
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
|
||||
// Implementations are not consistent within browsers; sometimes labeled as just big5.
|
||||
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
|
||||
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
|
||||
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
|
||||
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
|
||||
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
|
||||
//
|
||||
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
|
||||
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
|
||||
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
|
||||
big5: "big5hkscs",
|
||||
big5hkscs: {
|
||||
type: "_dbcs",
|
||||
table: function () { return require("./tables/cp950.json").concat(require("./tables/big5-added.json")) },
|
||||
encodeSkipVals: [
|
||||
// Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of
|
||||
// https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU.
|
||||
// But if a single unicode point can be encoded both as HKSCS and regular Big5, we prefer the latter.
|
||||
0x8e69, 0x8e6f, 0x8e7e, 0x8eab, 0x8eb4, 0x8ecd, 0x8ed0, 0x8f57, 0x8f69, 0x8f6e, 0x8fcb, 0x8ffe,
|
||||
0x906d, 0x907a, 0x90c4, 0x90dc, 0x90f1, 0x91bf, 0x92af, 0x92b0, 0x92b1, 0x92b2, 0x92d1, 0x9447, 0x94ca,
|
||||
0x95d9, 0x96fc, 0x9975, 0x9b76, 0x9b78, 0x9b7b, 0x9bc6, 0x9bde, 0x9bec, 0x9bf6, 0x9c42, 0x9c53, 0x9c62,
|
||||
0x9c68, 0x9c6b, 0x9c77, 0x9cbc, 0x9cbd, 0x9cd0, 0x9d57, 0x9d5a, 0x9dc4, 0x9def, 0x9dfb, 0x9ea9, 0x9eef,
|
||||
0x9efd, 0x9f60, 0x9fcb, 0xa077, 0xa0dc, 0xa0df, 0x8fcc, 0x92c8, 0x9644, 0x96ed,
|
||||
|
||||
'windows950': 'cp950',
|
||||
'ms950': 'cp950',
|
||||
'950': 'cp950',
|
||||
'cp950': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp950.json') },
|
||||
},
|
||||
// Step 2 of https://encoding.spec.whatwg.org/#index-big5-pointer: Use last pointer for U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345
|
||||
0xa2a4, 0xa2a5, 0xa2a7, 0xa2a6, 0xa2cc, 0xa2ce
|
||||
]
|
||||
},
|
||||
|
||||
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
|
||||
'big5': 'big5hkscs',
|
||||
'big5hkscs': {
|
||||
type: '_dbcs',
|
||||
table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
|
||||
encodeSkipVals: [0xa2cc],
|
||||
},
|
||||
|
||||
'cnbig5': 'big5hkscs',
|
||||
'csbig5': 'big5hkscs',
|
||||
'xxbig5': 'big5hkscs',
|
||||
};
|
||||
cnbig5: "big5hkscs",
|
||||
csbig5: "big5hkscs",
|
||||
xxbig5: "big5hkscs"
|
||||
}
|
||||
|
||||
31
node_modules/iconv-lite/encodings/index.js
generated
vendored
31
node_modules/iconv-lite/encodings/index.js
generated
vendored
@@ -1,22 +1,23 @@
|
||||
"use strict";
|
||||
"use strict"
|
||||
|
||||
var mergeModules = require("../lib/helpers/merge-exports")
|
||||
|
||||
// Update this array if you add/rename/remove files in this directory.
|
||||
// We support Browserify by skipping automatic module discovery and requiring modules directly.
|
||||
var modules = [
|
||||
require("./internal"),
|
||||
require("./utf16"),
|
||||
require("./utf7"),
|
||||
require("./sbcs-codec"),
|
||||
require("./sbcs-data"),
|
||||
require("./sbcs-data-generated"),
|
||||
require("./dbcs-codec"),
|
||||
require("./dbcs-data"),
|
||||
];
|
||||
require("./internal"),
|
||||
require("./utf32"),
|
||||
require("./utf16"),
|
||||
require("./utf7"),
|
||||
require("./sbcs-codec"),
|
||||
require("./sbcs-data"),
|
||||
require("./sbcs-data-generated"),
|
||||
require("./dbcs-codec"),
|
||||
require("./dbcs-data")
|
||||
]
|
||||
|
||||
// Put all encoding/alias/codec definitions to single object and export it.
|
||||
// Put all encoding/alias/codec definitions to single object and export it.
|
||||
for (var i = 0; i < modules.length; i++) {
|
||||
var module = modules[i];
|
||||
for (var enc in module)
|
||||
if (Object.prototype.hasOwnProperty.call(module, enc))
|
||||
exports[enc] = module[enc];
|
||||
var module = modules[i]
|
||||
mergeModules(exports, module)
|
||||
}
|
||||
|
||||
300
node_modules/iconv-lite/encodings/internal.js
generated
vendored
300
node_modules/iconv-lite/encodings/internal.js
generated
vendored
@@ -1,188 +1,218 @@
|
||||
"use strict";
|
||||
var Buffer = require("safer-buffer").Buffer;
|
||||
"use strict"
|
||||
var Buffer = require("safer-buffer").Buffer
|
||||
|
||||
// Export Node.js internal encodings.
|
||||
|
||||
module.exports = {
|
||||
// Encodings
|
||||
utf8: { type: "_internal", bomAware: true},
|
||||
cesu8: { type: "_internal", bomAware: true},
|
||||
unicode11utf8: "utf8",
|
||||
// Encodings
|
||||
utf8: { type: "_internal", bomAware: true },
|
||||
cesu8: { type: "_internal", bomAware: true },
|
||||
unicode11utf8: "utf8",
|
||||
|
||||
ucs2: { type: "_internal", bomAware: true},
|
||||
utf16le: "ucs2",
|
||||
ucs2: { type: "_internal", bomAware: true },
|
||||
utf16le: "ucs2",
|
||||
|
||||
binary: { type: "_internal" },
|
||||
base64: { type: "_internal" },
|
||||
hex: { type: "_internal" },
|
||||
binary: { type: "_internal" },
|
||||
base64: { type: "_internal" },
|
||||
hex: { type: "_internal" },
|
||||
|
||||
// Codec.
|
||||
_internal: InternalCodec,
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
function InternalCodec(codecOptions, iconv) {
|
||||
this.enc = codecOptions.encodingName;
|
||||
this.bomAware = codecOptions.bomAware;
|
||||
|
||||
if (this.enc === "base64")
|
||||
this.encoder = InternalEncoderBase64;
|
||||
else if (this.enc === "cesu8") {
|
||||
this.enc = "utf8"; // Use utf8 for decoding.
|
||||
this.encoder = InternalEncoderCesu8;
|
||||
|
||||
// Add decoder for versions of Node not supporting CESU-8
|
||||
if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') {
|
||||
this.decoder = InternalDecoderCesu8;
|
||||
this.defaultCharUnicode = iconv.defaultCharUnicode;
|
||||
}
|
||||
}
|
||||
// Codec.
|
||||
_internal: InternalCodec
|
||||
}
|
||||
|
||||
InternalCodec.prototype.encoder = InternalEncoder;
|
||||
InternalCodec.prototype.decoder = InternalDecoder;
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
function InternalCodec (codecOptions, iconv) {
|
||||
this.enc = codecOptions.encodingName
|
||||
this.bomAware = codecOptions.bomAware
|
||||
|
||||
if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
|
||||
this.enc = "utf8" // Use utf8 for decoding.
|
||||
this.encoder = InternalEncoderCesu8
|
||||
|
||||
// Add decoder for versions of Node not supporting CESU-8
|
||||
if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
|
||||
this.decoder = InternalDecoderCesu8
|
||||
this.defaultCharUnicode = iconv.defaultCharUnicode
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InternalCodec.prototype.encoder = InternalEncoder
|
||||
InternalCodec.prototype.decoder = InternalDecoder
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
// We use node.js internal decoder. Its signature is the same as ours.
|
||||
var StringDecoder = require('string_decoder').StringDecoder;
|
||||
var StringDecoder = require("string_decoder").StringDecoder
|
||||
|
||||
if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
|
||||
StringDecoder.prototype.end = function() {};
|
||||
|
||||
|
||||
function InternalDecoder(options, codec) {
|
||||
StringDecoder.call(this, codec.enc);
|
||||
function InternalDecoder (options, codec) {
|
||||
this.decoder = new StringDecoder(codec.enc)
|
||||
}
|
||||
|
||||
InternalDecoder.prototype = StringDecoder.prototype;
|
||||
InternalDecoder.prototype.write = function (buf) {
|
||||
if (!Buffer.isBuffer(buf)) {
|
||||
buf = Buffer.from(buf)
|
||||
}
|
||||
|
||||
return this.decoder.write(buf)
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
InternalDecoder.prototype.end = function () {
|
||||
return this.decoder.end()
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
// Encoder is mostly trivial
|
||||
|
||||
function InternalEncoder(options, codec) {
|
||||
this.enc = codec.enc;
|
||||
function InternalEncoder (options, codec) {
|
||||
this.enc = codec.enc
|
||||
}
|
||||
|
||||
InternalEncoder.prototype.write = function(str) {
|
||||
return Buffer.from(str, this.enc);
|
||||
InternalEncoder.prototype.write = function (str) {
|
||||
return Buffer.from(str, this.enc)
|
||||
}
|
||||
|
||||
InternalEncoder.prototype.end = function() {
|
||||
InternalEncoder.prototype.end = function () {
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------
|
||||
// Except base64 encoder, which must keep its state.
|
||||
|
||||
function InternalEncoderBase64(options, codec) {
|
||||
this.prevStr = '';
|
||||
function InternalEncoderBase64 (options, codec) {
|
||||
this.prevStr = ""
|
||||
}
|
||||
|
||||
InternalEncoderBase64.prototype.write = function(str) {
|
||||
str = this.prevStr + str;
|
||||
var completeQuads = str.length - (str.length % 4);
|
||||
this.prevStr = str.slice(completeQuads);
|
||||
str = str.slice(0, completeQuads);
|
||||
InternalEncoderBase64.prototype.write = function (str) {
|
||||
str = this.prevStr + str
|
||||
var completeQuads = str.length - (str.length % 4)
|
||||
this.prevStr = str.slice(completeQuads)
|
||||
str = str.slice(0, completeQuads)
|
||||
|
||||
return Buffer.from(str, "base64");
|
||||
return Buffer.from(str, "base64")
|
||||
}
|
||||
|
||||
InternalEncoderBase64.prototype.end = function() {
|
||||
return Buffer.from(this.prevStr, "base64");
|
||||
InternalEncoderBase64.prototype.end = function () {
|
||||
return Buffer.from(this.prevStr, "base64")
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------
|
||||
// CESU-8 encoder is also special.
|
||||
|
||||
function InternalEncoderCesu8(options, codec) {
|
||||
function InternalEncoderCesu8 (options, codec) {
|
||||
}
|
||||
|
||||
InternalEncoderCesu8.prototype.write = function(str) {
|
||||
var buf = Buffer.alloc(str.length * 3), bufIdx = 0;
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
var charCode = str.charCodeAt(i);
|
||||
// Naive implementation, but it works because CESU-8 is especially easy
|
||||
// to convert from UTF-16 (which all JS strings are encoded in).
|
||||
if (charCode < 0x80)
|
||||
buf[bufIdx++] = charCode;
|
||||
else if (charCode < 0x800) {
|
||||
buf[bufIdx++] = 0xC0 + (charCode >>> 6);
|
||||
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
|
||||
}
|
||||
else { // charCode will always be < 0x10000 in javascript.
|
||||
buf[bufIdx++] = 0xE0 + (charCode >>> 12);
|
||||
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
|
||||
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
|
||||
}
|
||||
InternalEncoderCesu8.prototype.write = function (str) {
|
||||
var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
var charCode = str.charCodeAt(i)
|
||||
// Naive implementation, but it works because CESU-8 is especially easy
|
||||
// to convert from UTF-16 (which all JS strings are encoded in).
|
||||
if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
|
||||
buf[bufIdx++] = 0xC0 + (charCode >>> 6)
|
||||
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
|
||||
} else { // charCode will always be < 0x10000 in javascript.
|
||||
buf[bufIdx++] = 0xE0 + (charCode >>> 12)
|
||||
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
|
||||
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
|
||||
}
|
||||
return buf.slice(0, bufIdx);
|
||||
}
|
||||
return buf.slice(0, bufIdx)
|
||||
}
|
||||
|
||||
InternalEncoderCesu8.prototype.end = function() {
|
||||
InternalEncoderCesu8.prototype.end = function () {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// ------------------------------------------------------------------------------
|
||||
// CESU-8 decoder is not implemented in Node v4.0+
|
||||
|
||||
function InternalDecoderCesu8(options, codec) {
|
||||
this.acc = 0;
|
||||
this.contBytes = 0;
|
||||
this.accBytes = 0;
|
||||
this.defaultCharUnicode = codec.defaultCharUnicode;
|
||||
function InternalDecoderCesu8 (options, codec) {
|
||||
this.acc = 0
|
||||
this.contBytes = 0
|
||||
this.accBytes = 0
|
||||
this.defaultCharUnicode = codec.defaultCharUnicode
|
||||
}
|
||||
|
||||
InternalDecoderCesu8.prototype.write = function(buf) {
|
||||
var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
|
||||
res = '';
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
var curByte = buf[i];
|
||||
if ((curByte & 0xC0) !== 0x80) { // Leading byte
|
||||
if (contBytes > 0) { // Previous code is invalid
|
||||
res += this.defaultCharUnicode;
|
||||
contBytes = 0;
|
||||
}
|
||||
InternalDecoderCesu8.prototype.write = function (buf) {
|
||||
var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
|
||||
var res = ""
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
var curByte = buf[i]
|
||||
if ((curByte & 0xC0) !== 0x80) { // Leading byte
|
||||
if (contBytes > 0) { // Previous code is invalid
|
||||
res += this.defaultCharUnicode
|
||||
contBytes = 0
|
||||
}
|
||||
|
||||
if (curByte < 0x80) { // Single-byte code
|
||||
res += String.fromCharCode(curByte);
|
||||
} else if (curByte < 0xE0) { // Two-byte code
|
||||
acc = curByte & 0x1F;
|
||||
contBytes = 1; accBytes = 1;
|
||||
} else if (curByte < 0xF0) { // Three-byte code
|
||||
acc = curByte & 0x0F;
|
||||
contBytes = 2; accBytes = 1;
|
||||
} else { // Four or more are not supported for CESU-8.
|
||||
res += this.defaultCharUnicode;
|
||||
}
|
||||
} else { // Continuation byte
|
||||
if (contBytes > 0) { // We're waiting for it.
|
||||
acc = (acc << 6) | (curByte & 0x3f);
|
||||
contBytes--; accBytes++;
|
||||
if (contBytes === 0) {
|
||||
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
|
||||
if (accBytes === 2 && acc < 0x80 && acc > 0)
|
||||
res += this.defaultCharUnicode;
|
||||
else if (accBytes === 3 && acc < 0x800)
|
||||
res += this.defaultCharUnicode;
|
||||
else
|
||||
// Actually add character.
|
||||
res += String.fromCharCode(acc);
|
||||
}
|
||||
} else { // Unexpected continuation byte
|
||||
res += this.defaultCharUnicode;
|
||||
}
|
||||
if (curByte < 0x80) { // Single-byte code
|
||||
res += String.fromCharCode(curByte)
|
||||
} else if (curByte < 0xE0) { // Two-byte code
|
||||
acc = curByte & 0x1F
|
||||
contBytes = 1; accBytes = 1
|
||||
} else if (curByte < 0xF0) { // Three-byte code
|
||||
acc = curByte & 0x0F
|
||||
contBytes = 2; accBytes = 1
|
||||
} else { // Four or more are not supported for CESU-8.
|
||||
res += this.defaultCharUnicode
|
||||
}
|
||||
} else { // Continuation byte
|
||||
if (contBytes > 0) { // We're waiting for it.
|
||||
acc = (acc << 6) | (curByte & 0x3f)
|
||||
contBytes--; accBytes++
|
||||
if (contBytes === 0) {
|
||||
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
|
||||
if (accBytes === 2 && acc < 0x80 && acc > 0) {
|
||||
res += this.defaultCharUnicode
|
||||
} else if (accBytes === 3 && acc < 0x800) {
|
||||
res += this.defaultCharUnicode
|
||||
} else {
|
||||
// Actually add character.
|
||||
res += String.fromCharCode(acc)
|
||||
}
|
||||
}
|
||||
} else { // Unexpected continuation byte
|
||||
res += this.defaultCharUnicode
|
||||
}
|
||||
}
|
||||
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
|
||||
return res;
|
||||
}
|
||||
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
|
||||
return res
|
||||
}
|
||||
|
||||
InternalDecoderCesu8.prototype.end = function() {
|
||||
var res = 0;
|
||||
if (this.contBytes > 0)
|
||||
res += this.defaultCharUnicode;
|
||||
return res;
|
||||
InternalDecoderCesu8.prototype.end = function () {
|
||||
var res = 0
|
||||
if (this.contBytes > 0) { res += this.defaultCharUnicode }
|
||||
return res
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
// check the chunk boundaries for surrogate pair
|
||||
|
||||
function InternalEncoderUtf8 (options, codec) {
|
||||
this.highSurrogate = ""
|
||||
}
|
||||
|
||||
InternalEncoderUtf8.prototype.write = function (str) {
|
||||
if (this.highSurrogate) {
|
||||
str = this.highSurrogate + str
|
||||
this.highSurrogate = ""
|
||||
}
|
||||
|
||||
if (str.length > 0) {
|
||||
var charCode = str.charCodeAt(str.length - 1)
|
||||
if (charCode >= 0xd800 && charCode < 0xdc00) {
|
||||
this.highSurrogate = str[str.length - 1]
|
||||
str = str.slice(0, str.length - 1)
|
||||
}
|
||||
}
|
||||
|
||||
return Buffer.from(str, this.enc)
|
||||
}
|
||||
|
||||
InternalEncoderUtf8.prototype.end = function () {
|
||||
if (this.highSurrogate) {
|
||||
var str = this.highSurrogate
|
||||
this.highSurrogate = ""
|
||||
return Buffer.from(str, this.enc)
|
||||
}
|
||||
}
|
||||
|
||||
105
node_modules/iconv-lite/encodings/sbcs-codec.js
generated
vendored
105
node_modules/iconv-lite/encodings/sbcs-codec.js
generated
vendored
@@ -1,72 +1,75 @@
|
||||
"use strict";
|
||||
var Buffer = require("safer-buffer").Buffer;
|
||||
"use strict"
|
||||
var Buffer = require("safer-buffer").Buffer
|
||||
|
||||
// Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that
|
||||
// correspond to encoded bytes (if 128 - then lower half is ASCII).
|
||||
// correspond to encoded bytes (if 128 - then lower half is ASCII).
|
||||
|
||||
exports._sbcs = SBCSCodec;
|
||||
function SBCSCodec(codecOptions, iconv) {
|
||||
if (!codecOptions)
|
||||
throw new Error("SBCS codec is called without the data.")
|
||||
|
||||
// Prepare char buffer for decoding.
|
||||
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
|
||||
throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");
|
||||
|
||||
if (codecOptions.chars.length === 128) {
|
||||
var asciiString = "";
|
||||
for (var i = 0; i < 128; i++)
|
||||
asciiString += String.fromCharCode(i);
|
||||
codecOptions.chars = asciiString + codecOptions.chars;
|
||||
exports._sbcs = SBCSCodec
|
||||
function SBCSCodec (codecOptions, iconv) {
|
||||
if (!codecOptions) {
|
||||
throw new Error("SBCS codec is called without the data.")
|
||||
}
|
||||
|
||||
// Prepare char buffer for decoding.
|
||||
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256)) {
|
||||
throw new Error("Encoding '" + codecOptions.type + "' has incorrect 'chars' (must be of len 128 or 256)")
|
||||
}
|
||||
|
||||
if (codecOptions.chars.length === 128) {
|
||||
var asciiString = ""
|
||||
for (var i = 0; i < 128; i++) {
|
||||
asciiString += String.fromCharCode(i)
|
||||
}
|
||||
codecOptions.chars = asciiString + codecOptions.chars
|
||||
}
|
||||
|
||||
this.decodeBuf = Buffer.from(codecOptions.chars, 'ucs2');
|
||||
|
||||
// Encoding buffer.
|
||||
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0));
|
||||
this.decodeBuf = Buffer.from(codecOptions.chars, "ucs2")
|
||||
|
||||
for (var i = 0; i < codecOptions.chars.length; i++)
|
||||
encodeBuf[codecOptions.chars.charCodeAt(i)] = i;
|
||||
// Encoding buffer.
|
||||
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0))
|
||||
|
||||
this.encodeBuf = encodeBuf;
|
||||
for (var i = 0; i < codecOptions.chars.length; i++) {
|
||||
encodeBuf[codecOptions.chars.charCodeAt(i)] = i
|
||||
}
|
||||
|
||||
this.encodeBuf = encodeBuf
|
||||
}
|
||||
|
||||
SBCSCodec.prototype.encoder = SBCSEncoder;
|
||||
SBCSCodec.prototype.decoder = SBCSDecoder;
|
||||
SBCSCodec.prototype.encoder = SBCSEncoder
|
||||
SBCSCodec.prototype.decoder = SBCSDecoder
|
||||
|
||||
|
||||
function SBCSEncoder(options, codec) {
|
||||
this.encodeBuf = codec.encodeBuf;
|
||||
function SBCSEncoder (options, codec) {
|
||||
this.encodeBuf = codec.encodeBuf
|
||||
}
|
||||
|
||||
SBCSEncoder.prototype.write = function(str) {
|
||||
var buf = Buffer.alloc(str.length);
|
||||
for (var i = 0; i < str.length; i++)
|
||||
buf[i] = this.encodeBuf[str.charCodeAt(i)];
|
||||
|
||||
return buf;
|
||||
SBCSEncoder.prototype.write = function (str) {
|
||||
var buf = Buffer.alloc(str.length)
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
buf[i] = this.encodeBuf[str.charCodeAt(i)]
|
||||
}
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
SBCSEncoder.prototype.end = function() {
|
||||
SBCSEncoder.prototype.end = function () {
|
||||
}
|
||||
|
||||
|
||||
function SBCSDecoder(options, codec) {
|
||||
this.decodeBuf = codec.decodeBuf;
|
||||
function SBCSDecoder (options, codec) {
|
||||
this.decodeBuf = codec.decodeBuf
|
||||
}
|
||||
|
||||
SBCSDecoder.prototype.write = function(buf) {
|
||||
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
|
||||
var decodeBuf = this.decodeBuf;
|
||||
var newBuf = Buffer.alloc(buf.length*2);
|
||||
var idx1 = 0, idx2 = 0;
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
idx1 = buf[i]*2; idx2 = i*2;
|
||||
newBuf[idx2] = decodeBuf[idx1];
|
||||
newBuf[idx2+1] = decodeBuf[idx1+1];
|
||||
}
|
||||
return newBuf.toString('ucs2');
|
||||
SBCSDecoder.prototype.write = function (buf) {
|
||||
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
|
||||
var decodeBuf = this.decodeBuf
|
||||
var newBuf = Buffer.alloc(buf.length * 2)
|
||||
var idx1 = 0; var idx2 = 0
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
idx1 = buf[i] * 2; idx2 = i * 2
|
||||
newBuf[idx2] = decodeBuf[idx1]
|
||||
newBuf[idx2 + 1] = decodeBuf[idx1 + 1]
|
||||
}
|
||||
return newBuf.toString("ucs2")
|
||||
}
|
||||
|
||||
SBCSDecoder.prototype.end = function() {
|
||||
SBCSDecoder.prototype.end = function () {
|
||||
}
|
||||
|
||||
288
node_modules/iconv-lite/encodings/sbcs-data.js
generated
vendored
288
node_modules/iconv-lite/encodings/sbcs-data.js
generated
vendored
@@ -1,174 +1,178 @@
|
||||
"use strict";
|
||||
"use strict"
|
||||
|
||||
// Manually added data to be used by sbcs codec in addition to generated one.
|
||||
|
||||
module.exports = {
|
||||
// Not supported by iconv, not sure why.
|
||||
"10029": "maccenteuro",
|
||||
"maccenteuro": {
|
||||
"type": "_sbcs",
|
||||
"chars": "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
|
||||
},
|
||||
// Not supported by iconv, not sure why.
|
||||
10029: "maccenteuro",
|
||||
maccenteuro: {
|
||||
type: "_sbcs",
|
||||
chars: "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
|
||||
},
|
||||
|
||||
"808": "cp808",
|
||||
"ibm808": "cp808",
|
||||
"cp808": {
|
||||
"type": "_sbcs",
|
||||
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
|
||||
},
|
||||
808: "cp808",
|
||||
ibm808: "cp808",
|
||||
cp808: {
|
||||
type: "_sbcs",
|
||||
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
|
||||
},
|
||||
|
||||
"mik": {
|
||||
"type": "_sbcs",
|
||||
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
|
||||
},
|
||||
mik: {
|
||||
type: "_sbcs",
|
||||
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
|
||||
},
|
||||
|
||||
// Aliases of generated encodings.
|
||||
"ascii8bit": "ascii",
|
||||
"usascii": "ascii",
|
||||
"ansix34": "ascii",
|
||||
"ansix341968": "ascii",
|
||||
"ansix341986": "ascii",
|
||||
"csascii": "ascii",
|
||||
"cp367": "ascii",
|
||||
"ibm367": "ascii",
|
||||
"isoir6": "ascii",
|
||||
"iso646us": "ascii",
|
||||
"iso646irv": "ascii",
|
||||
"us": "ascii",
|
||||
cp720: {
|
||||
type: "_sbcs",
|
||||
chars: "\x80\x81éâ\x84à\x86çêëèïî\x8d\x8e\x8f\x90\u0651\u0652ô¤ـûùءآأؤ£إئابةتثجحخدذرزسشص«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ضطظعغفµقكلمنهوىي≡\u064b\u064c\u064d\u064e\u064f\u0650≈°∙·√ⁿ²■\u00a0"
|
||||
},
|
||||
|
||||
"latin1": "iso88591",
|
||||
"latin2": "iso88592",
|
||||
"latin3": "iso88593",
|
||||
"latin4": "iso88594",
|
||||
"latin5": "iso88599",
|
||||
"latin6": "iso885910",
|
||||
"latin7": "iso885913",
|
||||
"latin8": "iso885914",
|
||||
"latin9": "iso885915",
|
||||
"latin10": "iso885916",
|
||||
// Aliases of generated encodings.
|
||||
ascii8bit: "ascii",
|
||||
usascii: "ascii",
|
||||
ansix34: "ascii",
|
||||
ansix341968: "ascii",
|
||||
ansix341986: "ascii",
|
||||
csascii: "ascii",
|
||||
cp367: "ascii",
|
||||
ibm367: "ascii",
|
||||
isoir6: "ascii",
|
||||
iso646us: "ascii",
|
||||
iso646irv: "ascii",
|
||||
us: "ascii",
|
||||
|
||||
"csisolatin1": "iso88591",
|
||||
"csisolatin2": "iso88592",
|
||||
"csisolatin3": "iso88593",
|
||||
"csisolatin4": "iso88594",
|
||||
"csisolatincyrillic": "iso88595",
|
||||
"csisolatinarabic": "iso88596",
|
||||
"csisolatingreek" : "iso88597",
|
||||
"csisolatinhebrew": "iso88598",
|
||||
"csisolatin5": "iso88599",
|
||||
"csisolatin6": "iso885910",
|
||||
latin1: "iso88591",
|
||||
latin2: "iso88592",
|
||||
latin3: "iso88593",
|
||||
latin4: "iso88594",
|
||||
latin5: "iso88599",
|
||||
latin6: "iso885910",
|
||||
latin7: "iso885913",
|
||||
latin8: "iso885914",
|
||||
latin9: "iso885915",
|
||||
latin10: "iso885916",
|
||||
|
||||
"l1": "iso88591",
|
||||
"l2": "iso88592",
|
||||
"l3": "iso88593",
|
||||
"l4": "iso88594",
|
||||
"l5": "iso88599",
|
||||
"l6": "iso885910",
|
||||
"l7": "iso885913",
|
||||
"l8": "iso885914",
|
||||
"l9": "iso885915",
|
||||
"l10": "iso885916",
|
||||
csisolatin1: "iso88591",
|
||||
csisolatin2: "iso88592",
|
||||
csisolatin3: "iso88593",
|
||||
csisolatin4: "iso88594",
|
||||
csisolatincyrillic: "iso88595",
|
||||
csisolatinarabic: "iso88596",
|
||||
csisolatingreek: "iso88597",
|
||||
csisolatinhebrew: "iso88598",
|
||||
csisolatin5: "iso88599",
|
||||
csisolatin6: "iso885910",
|
||||
|
||||
"isoir14": "iso646jp",
|
||||
"isoir57": "iso646cn",
|
||||
"isoir100": "iso88591",
|
||||
"isoir101": "iso88592",
|
||||
"isoir109": "iso88593",
|
||||
"isoir110": "iso88594",
|
||||
"isoir144": "iso88595",
|
||||
"isoir127": "iso88596",
|
||||
"isoir126": "iso88597",
|
||||
"isoir138": "iso88598",
|
||||
"isoir148": "iso88599",
|
||||
"isoir157": "iso885910",
|
||||
"isoir166": "tis620",
|
||||
"isoir179": "iso885913",
|
||||
"isoir199": "iso885914",
|
||||
"isoir203": "iso885915",
|
||||
"isoir226": "iso885916",
|
||||
l1: "iso88591",
|
||||
l2: "iso88592",
|
||||
l3: "iso88593",
|
||||
l4: "iso88594",
|
||||
l5: "iso88599",
|
||||
l6: "iso885910",
|
||||
l7: "iso885913",
|
||||
l8: "iso885914",
|
||||
l9: "iso885915",
|
||||
l10: "iso885916",
|
||||
|
||||
"cp819": "iso88591",
|
||||
"ibm819": "iso88591",
|
||||
isoir14: "iso646jp",
|
||||
isoir57: "iso646cn",
|
||||
isoir100: "iso88591",
|
||||
isoir101: "iso88592",
|
||||
isoir109: "iso88593",
|
||||
isoir110: "iso88594",
|
||||
isoir144: "iso88595",
|
||||
isoir127: "iso88596",
|
||||
isoir126: "iso88597",
|
||||
isoir138: "iso88598",
|
||||
isoir148: "iso88599",
|
||||
isoir157: "iso885910",
|
||||
isoir166: "tis620",
|
||||
isoir179: "iso885913",
|
||||
isoir199: "iso885914",
|
||||
isoir203: "iso885915",
|
||||
isoir226: "iso885916",
|
||||
|
||||
"cyrillic": "iso88595",
|
||||
cp819: "iso88591",
|
||||
ibm819: "iso88591",
|
||||
|
||||
"arabic": "iso88596",
|
||||
"arabic8": "iso88596",
|
||||
"ecma114": "iso88596",
|
||||
"asmo708": "iso88596",
|
||||
cyrillic: "iso88595",
|
||||
|
||||
"greek" : "iso88597",
|
||||
"greek8" : "iso88597",
|
||||
"ecma118" : "iso88597",
|
||||
"elot928" : "iso88597",
|
||||
arabic: "iso88596",
|
||||
arabic8: "iso88596",
|
||||
ecma114: "iso88596",
|
||||
asmo708: "iso88596",
|
||||
|
||||
"hebrew": "iso88598",
|
||||
"hebrew8": "iso88598",
|
||||
greek: "iso88597",
|
||||
greek8: "iso88597",
|
||||
ecma118: "iso88597",
|
||||
elot928: "iso88597",
|
||||
|
||||
"turkish": "iso88599",
|
||||
"turkish8": "iso88599",
|
||||
hebrew: "iso88598",
|
||||
hebrew8: "iso88598",
|
||||
|
||||
"thai": "iso885911",
|
||||
"thai8": "iso885911",
|
||||
turkish: "iso88599",
|
||||
turkish8: "iso88599",
|
||||
|
||||
"celtic": "iso885914",
|
||||
"celtic8": "iso885914",
|
||||
"isoceltic": "iso885914",
|
||||
thai: "iso885911",
|
||||
thai8: "iso885911",
|
||||
|
||||
"tis6200": "tis620",
|
||||
"tis62025291": "tis620",
|
||||
"tis62025330": "tis620",
|
||||
celtic: "iso885914",
|
||||
celtic8: "iso885914",
|
||||
isoceltic: "iso885914",
|
||||
|
||||
"10000": "macroman",
|
||||
"10006": "macgreek",
|
||||
"10007": "maccyrillic",
|
||||
"10079": "maciceland",
|
||||
"10081": "macturkish",
|
||||
tis6200: "tis620",
|
||||
tis62025291: "tis620",
|
||||
tis62025330: "tis620",
|
||||
|
||||
"cspc8codepage437": "cp437",
|
||||
"cspc775baltic": "cp775",
|
||||
"cspc850multilingual": "cp850",
|
||||
"cspcp852": "cp852",
|
||||
"cspc862latinhebrew": "cp862",
|
||||
"cpgr": "cp869",
|
||||
10000: "macroman",
|
||||
10006: "macgreek",
|
||||
10007: "maccyrillic",
|
||||
10079: "maciceland",
|
||||
10081: "macturkish",
|
||||
|
||||
"msee": "cp1250",
|
||||
"mscyrl": "cp1251",
|
||||
"msansi": "cp1252",
|
||||
"msgreek": "cp1253",
|
||||
"msturk": "cp1254",
|
||||
"mshebr": "cp1255",
|
||||
"msarab": "cp1256",
|
||||
"winbaltrim": "cp1257",
|
||||
cspc8codepage437: "cp437",
|
||||
cspc775baltic: "cp775",
|
||||
cspc850multilingual: "cp850",
|
||||
cspcp852: "cp852",
|
||||
cspc862latinhebrew: "cp862",
|
||||
cpgr: "cp869",
|
||||
|
||||
"cp20866": "koi8r",
|
||||
"20866": "koi8r",
|
||||
"ibm878": "koi8r",
|
||||
"cskoi8r": "koi8r",
|
||||
msee: "cp1250",
|
||||
mscyrl: "cp1251",
|
||||
msansi: "cp1252",
|
||||
msgreek: "cp1253",
|
||||
msturk: "cp1254",
|
||||
mshebr: "cp1255",
|
||||
msarab: "cp1256",
|
||||
winbaltrim: "cp1257",
|
||||
|
||||
"cp21866": "koi8u",
|
||||
"21866": "koi8u",
|
||||
"ibm1168": "koi8u",
|
||||
cp20866: "koi8r",
|
||||
20866: "koi8r",
|
||||
ibm878: "koi8r",
|
||||
cskoi8r: "koi8r",
|
||||
|
||||
"strk10482002": "rk1048",
|
||||
cp21866: "koi8u",
|
||||
21866: "koi8u",
|
||||
ibm1168: "koi8u",
|
||||
|
||||
"tcvn5712": "tcvn",
|
||||
"tcvn57121": "tcvn",
|
||||
strk10482002: "rk1048",
|
||||
|
||||
"gb198880": "iso646cn",
|
||||
"cn": "iso646cn",
|
||||
tcvn5712: "tcvn",
|
||||
tcvn57121: "tcvn",
|
||||
|
||||
"csiso14jisc6220ro": "iso646jp",
|
||||
"jisc62201969ro": "iso646jp",
|
||||
"jp": "iso646jp",
|
||||
gb198880: "iso646cn",
|
||||
cn: "iso646cn",
|
||||
|
||||
"cshproman8": "hproman8",
|
||||
"r8": "hproman8",
|
||||
"roman8": "hproman8",
|
||||
"xroman8": "hproman8",
|
||||
"ibm1051": "hproman8",
|
||||
csiso14jisc6220ro: "iso646jp",
|
||||
jisc62201969ro: "iso646jp",
|
||||
jp: "iso646jp",
|
||||
|
||||
"mac": "macintosh",
|
||||
"csmacintosh": "macintosh",
|
||||
};
|
||||
cshproman8: "hproman8",
|
||||
r8: "hproman8",
|
||||
roman8: "hproman8",
|
||||
xroman8: "hproman8",
|
||||
ibm1051: "hproman8",
|
||||
|
||||
mac: "macintosh",
|
||||
csmacintosh: "macintosh"
|
||||
}
|
||||
|
||||
5
node_modules/iconv-lite/encodings/tables/gbk-added.json
generated
vendored
5
node_modules/iconv-lite/encodings/tables/gbk-added.json
generated
vendored
@@ -27,7 +27,7 @@
|
||||
["a7c2","",14],
|
||||
["a7f2","",12],
|
||||
["a896","",10],
|
||||
["a8bc",""],
|
||||
["a8bc","ḿ"],
|
||||
["a8bf","ǹ"],
|
||||
["a8c1",""],
|
||||
["a8ea","",20],
|
||||
@@ -51,5 +51,6 @@
|
||||
["fca1","",93],
|
||||
["fda1","",93],
|
||||
["fe50","⺁⺄㑳㑇⺈⺋㖞㘚㘎⺌⺗㥮㤘㧏㧟㩳㧐㭎㱮㳠⺧⺪䁖䅟⺮䌷⺳⺶⺷䎱䎬⺻䏝䓖䙡䙌"],
|
||||
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93]
|
||||
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93],
|
||||
["8135f437",""]
|
||||
]
|
||||
|
||||
232
node_modules/iconv-lite/encodings/utf16.js
generated
vendored
232
node_modules/iconv-lite/encodings/utf16.js
generated
vendored
@@ -1,69 +1,66 @@
|
||||
"use strict";
|
||||
var Buffer = require("safer-buffer").Buffer;
|
||||
"use strict"
|
||||
var Buffer = require("safer-buffer").Buffer
|
||||
|
||||
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
|
||||
|
||||
// == UTF16-BE codec. ==========================================================
|
||||
|
||||
exports.utf16be = Utf16BECodec;
|
||||
function Utf16BECodec() {
|
||||
exports.utf16be = Utf16BECodec
|
||||
function Utf16BECodec () {
|
||||
}
|
||||
|
||||
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
|
||||
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
|
||||
Utf16BECodec.prototype.bomAware = true;
|
||||
|
||||
Utf16BECodec.prototype.encoder = Utf16BEEncoder
|
||||
Utf16BECodec.prototype.decoder = Utf16BEDecoder
|
||||
Utf16BECodec.prototype.bomAware = true
|
||||
|
||||
// -- Encoding
|
||||
|
||||
function Utf16BEEncoder() {
|
||||
function Utf16BEEncoder () {
|
||||
}
|
||||
|
||||
Utf16BEEncoder.prototype.write = function(str) {
|
||||
var buf = Buffer.from(str, 'ucs2');
|
||||
for (var i = 0; i < buf.length; i += 2) {
|
||||
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
|
||||
}
|
||||
return buf;
|
||||
Utf16BEEncoder.prototype.write = function (str) {
|
||||
var buf = Buffer.from(str, "ucs2")
|
||||
for (var i = 0; i < buf.length; i += 2) {
|
||||
var tmp = buf[i]; buf[i] = buf[i + 1]; buf[i + 1] = tmp
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
Utf16BEEncoder.prototype.end = function() {
|
||||
Utf16BEEncoder.prototype.end = function () {
|
||||
}
|
||||
|
||||
|
||||
// -- Decoding
|
||||
|
||||
function Utf16BEDecoder() {
|
||||
this.overflowByte = -1;
|
||||
function Utf16BEDecoder () {
|
||||
this.overflowByte = -1
|
||||
}
|
||||
|
||||
Utf16BEDecoder.prototype.write = function(buf) {
|
||||
if (buf.length == 0)
|
||||
return '';
|
||||
Utf16BEDecoder.prototype.write = function (buf) {
|
||||
if (buf.length == 0) { return "" }
|
||||
|
||||
var buf2 = Buffer.alloc(buf.length + 1),
|
||||
i = 0, j = 0;
|
||||
var buf2 = Buffer.alloc(buf.length + 1)
|
||||
var i = 0; var j = 0
|
||||
|
||||
if (this.overflowByte !== -1) {
|
||||
buf2[0] = buf[0];
|
||||
buf2[1] = this.overflowByte;
|
||||
i = 1; j = 2;
|
||||
}
|
||||
if (this.overflowByte !== -1) {
|
||||
buf2[0] = buf[0]
|
||||
buf2[1] = this.overflowByte
|
||||
i = 1; j = 2
|
||||
}
|
||||
|
||||
for (; i < buf.length-1; i += 2, j+= 2) {
|
||||
buf2[j] = buf[i+1];
|
||||
buf2[j+1] = buf[i];
|
||||
}
|
||||
for (; i < buf.length - 1; i += 2, j += 2) {
|
||||
buf2[j] = buf[i + 1]
|
||||
buf2[j + 1] = buf[i]
|
||||
}
|
||||
|
||||
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
|
||||
this.overflowByte = (i == buf.length - 1) ? buf[buf.length - 1] : -1
|
||||
|
||||
return buf2.slice(0, j).toString('ucs2');
|
||||
return buf2.slice(0, j).toString("ucs2")
|
||||
}
|
||||
|
||||
Utf16BEDecoder.prototype.end = function() {
|
||||
Utf16BEDecoder.prototype.end = function () {
|
||||
this.overflowByte = -1
|
||||
}
|
||||
|
||||
|
||||
// == UTF-16 codec =============================================================
|
||||
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
|
||||
// Defaults to UTF-16LE, as it's prevalent and default in Node.
|
||||
@@ -72,106 +69,119 @@ Utf16BEDecoder.prototype.end = function() {
|
||||
|
||||
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
|
||||
|
||||
exports.utf16 = Utf16Codec;
|
||||
function Utf16Codec(codecOptions, iconv) {
|
||||
this.iconv = iconv;
|
||||
exports.utf16 = Utf16Codec
|
||||
function Utf16Codec (codecOptions, iconv) {
|
||||
this.iconv = iconv
|
||||
}
|
||||
|
||||
Utf16Codec.prototype.encoder = Utf16Encoder;
|
||||
Utf16Codec.prototype.decoder = Utf16Decoder;
|
||||
|
||||
Utf16Codec.prototype.encoder = Utf16Encoder
|
||||
Utf16Codec.prototype.decoder = Utf16Decoder
|
||||
|
||||
// -- Encoding (pass-through)
|
||||
|
||||
function Utf16Encoder(options, codec) {
|
||||
options = options || {};
|
||||
if (options.addBOM === undefined)
|
||||
options.addBOM = true;
|
||||
this.encoder = codec.iconv.getEncoder('utf-16le', options);
|
||||
function Utf16Encoder (options, codec) {
|
||||
options = options || {}
|
||||
if (options.addBOM === undefined) { options.addBOM = true }
|
||||
this.encoder = codec.iconv.getEncoder("utf-16le", options)
|
||||
}
|
||||
|
||||
Utf16Encoder.prototype.write = function(str) {
|
||||
return this.encoder.write(str);
|
||||
Utf16Encoder.prototype.write = function (str) {
|
||||
return this.encoder.write(str)
|
||||
}
|
||||
|
||||
Utf16Encoder.prototype.end = function() {
|
||||
return this.encoder.end();
|
||||
Utf16Encoder.prototype.end = function () {
|
||||
return this.encoder.end()
|
||||
}
|
||||
|
||||
|
||||
// -- Decoding
|
||||
|
||||
function Utf16Decoder(options, codec) {
|
||||
this.decoder = null;
|
||||
this.initialBytes = [];
|
||||
this.initialBytesLen = 0;
|
||||
function Utf16Decoder (options, codec) {
|
||||
this.decoder = null
|
||||
this.initialBufs = []
|
||||
this.initialBufsLen = 0
|
||||
|
||||
this.options = options || {};
|
||||
this.iconv = codec.iconv;
|
||||
this.options = options || {}
|
||||
this.iconv = codec.iconv
|
||||
}
|
||||
|
||||
Utf16Decoder.prototype.write = function(buf) {
|
||||
if (!this.decoder) {
|
||||
// Codec is not chosen yet. Accumulate initial bytes.
|
||||
this.initialBytes.push(buf);
|
||||
this.initialBytesLen += buf.length;
|
||||
|
||||
if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
|
||||
return '';
|
||||
Utf16Decoder.prototype.write = function (buf) {
|
||||
if (!this.decoder) {
|
||||
// Codec is not chosen yet. Accumulate initial bytes.
|
||||
this.initialBufs.push(buf)
|
||||
this.initialBufsLen += buf.length
|
||||
|
||||
// We have enough bytes -> detect endianness.
|
||||
var buf = Buffer.concat(this.initialBytes),
|
||||
encoding = detectEncoding(buf, this.options.defaultEncoding);
|
||||
this.decoder = this.iconv.getDecoder(encoding, this.options);
|
||||
this.initialBytes.length = this.initialBytesLen = 0;
|
||||
}
|
||||
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
|
||||
{ return "" }
|
||||
|
||||
return this.decoder.write(buf);
|
||||
// We have enough bytes -> detect endianness.
|
||||
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
|
||||
this.decoder = this.iconv.getDecoder(encoding, this.options)
|
||||
|
||||
var resStr = ""
|
||||
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
|
||||
|
||||
this.initialBufs.length = this.initialBufsLen = 0
|
||||
return resStr
|
||||
}
|
||||
|
||||
return this.decoder.write(buf)
|
||||
}
|
||||
|
||||
Utf16Decoder.prototype.end = function() {
|
||||
if (!this.decoder) {
|
||||
var buf = Buffer.concat(this.initialBytes),
|
||||
encoding = detectEncoding(buf, this.options.defaultEncoding);
|
||||
this.decoder = this.iconv.getDecoder(encoding, this.options);
|
||||
Utf16Decoder.prototype.end = function () {
|
||||
if (!this.decoder) {
|
||||
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
|
||||
this.decoder = this.iconv.getDecoder(encoding, this.options)
|
||||
|
||||
var res = this.decoder.write(buf),
|
||||
trail = this.decoder.end();
|
||||
var resStr = ""
|
||||
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
|
||||
|
||||
return trail ? (res + trail) : res;
|
||||
}
|
||||
return this.decoder.end();
|
||||
var trail = this.decoder.end()
|
||||
if (trail) { resStr += trail }
|
||||
|
||||
this.initialBufs.length = this.initialBufsLen = 0
|
||||
return resStr
|
||||
}
|
||||
return this.decoder.end()
|
||||
}
|
||||
|
||||
function detectEncoding(buf, defaultEncoding) {
|
||||
var enc = defaultEncoding || 'utf-16le';
|
||||
function detectEncoding (bufs, defaultEncoding) {
|
||||
var b = []
|
||||
var charsProcessed = 0
|
||||
// Number of ASCII chars when decoded as LE or BE.
|
||||
var asciiCharsLE = 0
|
||||
var asciiCharsBE = 0
|
||||
|
||||
if (buf.length >= 2) {
|
||||
// Check BOM.
|
||||
if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
|
||||
enc = 'utf-16be';
|
||||
else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
|
||||
enc = 'utf-16le';
|
||||
else {
|
||||
// No BOM found. Try to deduce encoding from initial content.
|
||||
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
|
||||
// So, we count ASCII as if it was LE or BE, and decide from that.
|
||||
var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
|
||||
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
|
||||
|
||||
for (var i = 0; i < _len; i += 2) {
|
||||
if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
|
||||
if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
|
||||
}
|
||||
|
||||
if (asciiCharsBE > asciiCharsLE)
|
||||
enc = 'utf-16be';
|
||||
else if (asciiCharsBE < asciiCharsLE)
|
||||
enc = 'utf-16le';
|
||||
outerLoop:
|
||||
for (var i = 0; i < bufs.length; i++) {
|
||||
var buf = bufs[i]
|
||||
for (var j = 0; j < buf.length; j++) {
|
||||
b.push(buf[j])
|
||||
if (b.length === 2) {
|
||||
if (charsProcessed === 0) {
|
||||
// Check BOM first.
|
||||
if (b[0] === 0xFF && b[1] === 0xFE) return "utf-16le"
|
||||
if (b[0] === 0xFE && b[1] === 0xFF) return "utf-16be"
|
||||
}
|
||||
|
||||
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++
|
||||
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++
|
||||
|
||||
b.length = 0
|
||||
charsProcessed++
|
||||
|
||||
if (charsProcessed >= 100) {
|
||||
break outerLoop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return enc;
|
||||
// Make decisions.
|
||||
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
|
||||
// So, we count ASCII as if it was LE or BE, and decide from that.
|
||||
if (asciiCharsBE > asciiCharsLE) return "utf-16be"
|
||||
if (asciiCharsBE < asciiCharsLE) return "utf-16le"
|
||||
|
||||
// Couldn't decide (likely all zeros or not enough data).
|
||||
return defaultEncoding || "utf-16le"
|
||||
}
|
||||
|
||||
|
||||
|
||||
415
node_modules/iconv-lite/encodings/utf7.js
generated
vendored
415
node_modules/iconv-lite/encodings/utf7.js
generated
vendored
@@ -1,122 +1,122 @@
|
||||
"use strict";
|
||||
var Buffer = require("safer-buffer").Buffer;
|
||||
"use strict"
|
||||
var Buffer = require("safer-buffer").Buffer
|
||||
|
||||
// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
|
||||
// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
|
||||
|
||||
exports.utf7 = Utf7Codec;
|
||||
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
|
||||
function Utf7Codec(codecOptions, iconv) {
|
||||
this.iconv = iconv;
|
||||
exports.utf7 = Utf7Codec
|
||||
exports.unicode11utf7 = "utf7" // Alias UNICODE-1-1-UTF-7
|
||||
function Utf7Codec (codecOptions, iconv) {
|
||||
this.iconv = iconv
|
||||
};
|
||||
|
||||
Utf7Codec.prototype.encoder = Utf7Encoder;
|
||||
Utf7Codec.prototype.decoder = Utf7Decoder;
|
||||
Utf7Codec.prototype.bomAware = true;
|
||||
|
||||
Utf7Codec.prototype.encoder = Utf7Encoder
|
||||
Utf7Codec.prototype.decoder = Utf7Decoder
|
||||
Utf7Codec.prototype.bomAware = true
|
||||
|
||||
// -- Encoding
|
||||
|
||||
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
|
||||
// Why scape ()?./?
|
||||
// eslint-disable-next-line no-useless-escape
|
||||
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g
|
||||
|
||||
function Utf7Encoder(options, codec) {
|
||||
this.iconv = codec.iconv;
|
||||
function Utf7Encoder (options, codec) {
|
||||
this.iconv = codec.iconv
|
||||
}
|
||||
|
||||
Utf7Encoder.prototype.write = function(str) {
|
||||
// Naive implementation.
|
||||
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
|
||||
return Buffer.from(str.replace(nonDirectChars, function(chunk) {
|
||||
return "+" + (chunk === '+' ? '' :
|
||||
this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
|
||||
+ "-";
|
||||
}.bind(this)));
|
||||
Utf7Encoder.prototype.write = function (str) {
|
||||
// Naive implementation.
|
||||
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
|
||||
return Buffer.from(str.replace(nonDirectChars, function (chunk) {
|
||||
return "+" + (chunk === "+"
|
||||
? ""
|
||||
: this.iconv.encode(chunk, "utf16-be").toString("base64").replace(/=+$/, "")) +
|
||||
"-"
|
||||
}.bind(this)))
|
||||
}
|
||||
|
||||
Utf7Encoder.prototype.end = function() {
|
||||
Utf7Encoder.prototype.end = function () {
|
||||
}
|
||||
|
||||
|
||||
// -- Decoding
|
||||
|
||||
function Utf7Decoder(options, codec) {
|
||||
this.iconv = codec.iconv;
|
||||
this.inBase64 = false;
|
||||
this.base64Accum = '';
|
||||
function Utf7Decoder (options, codec) {
|
||||
this.iconv = codec.iconv
|
||||
this.inBase64 = false
|
||||
this.base64Accum = ""
|
||||
}
|
||||
|
||||
var base64Regex = /[A-Za-z0-9\/+]/;
|
||||
var base64Chars = [];
|
||||
for (var i = 0; i < 256; i++)
|
||||
base64Chars[i] = base64Regex.test(String.fromCharCode(i));
|
||||
// Why scape /?
|
||||
// eslint-disable-next-line no-useless-escape
|
||||
var base64Regex = /[A-Za-z0-9\/+]/
|
||||
var base64Chars = []
|
||||
for (var i = 0; i < 256; i++) { base64Chars[i] = base64Regex.test(String.fromCharCode(i)) }
|
||||
|
||||
var plusChar = '+'.charCodeAt(0),
|
||||
minusChar = '-'.charCodeAt(0),
|
||||
andChar = '&'.charCodeAt(0);
|
||||
var plusChar = "+".charCodeAt(0)
|
||||
var minusChar = "-".charCodeAt(0)
|
||||
var andChar = "&".charCodeAt(0)
|
||||
|
||||
Utf7Decoder.prototype.write = function(buf) {
|
||||
var res = "", lastI = 0,
|
||||
inBase64 = this.inBase64,
|
||||
base64Accum = this.base64Accum;
|
||||
Utf7Decoder.prototype.write = function (buf) {
|
||||
var res = ""; var lastI = 0
|
||||
var inBase64 = this.inBase64
|
||||
var base64Accum = this.base64Accum
|
||||
|
||||
// The decoder is more involved as we must handle chunks in stream.
|
||||
// The decoder is more involved as we must handle chunks in stream.
|
||||
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
if (!inBase64) { // We're in direct mode.
|
||||
// Write direct chars until '+'
|
||||
if (buf[i] == plusChar) {
|
||||
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
|
||||
lastI = i+1;
|
||||
inBase64 = true;
|
||||
}
|
||||
} else { // We decode base64.
|
||||
if (!base64Chars[buf[i]]) { // Base64 ended.
|
||||
if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
|
||||
res += "+";
|
||||
} else {
|
||||
var b64str = base64Accum + buf.slice(lastI, i).toString();
|
||||
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
|
||||
}
|
||||
|
||||
if (buf[i] != minusChar) // Minus is absorbed after base64.
|
||||
i--;
|
||||
|
||||
lastI = i+1;
|
||||
inBase64 = false;
|
||||
base64Accum = '';
|
||||
}
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
if (!inBase64) { // We're in direct mode.
|
||||
// Write direct chars until '+'
|
||||
if (buf[i] == plusChar) {
|
||||
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
|
||||
lastI = i + 1
|
||||
inBase64 = true
|
||||
}
|
||||
} else { // We decode base64.
|
||||
if (!base64Chars[buf[i]]) { // Base64 ended.
|
||||
if (i == lastI && buf[i] == minusChar) { // "+-" -> "+"
|
||||
res += "+"
|
||||
} else {
|
||||
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii")
|
||||
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
|
||||
}
|
||||
|
||||
if (buf[i] != minusChar) // Minus is absorbed after base64.
|
||||
{ i-- }
|
||||
|
||||
lastI = i + 1
|
||||
inBase64 = false
|
||||
base64Accum = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!inBase64) {
|
||||
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
|
||||
} else {
|
||||
var b64str = base64Accum + buf.slice(lastI).toString();
|
||||
if (!inBase64) {
|
||||
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
|
||||
} else {
|
||||
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii")
|
||||
|
||||
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
|
||||
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
|
||||
b64str = b64str.slice(0, canBeDecoded);
|
||||
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
|
||||
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
|
||||
b64str = b64str.slice(0, canBeDecoded)
|
||||
|
||||
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
|
||||
}
|
||||
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
|
||||
}
|
||||
|
||||
this.inBase64 = inBase64;
|
||||
this.base64Accum = base64Accum;
|
||||
this.inBase64 = inBase64
|
||||
this.base64Accum = base64Accum
|
||||
|
||||
return res;
|
||||
return res
|
||||
}
|
||||
|
||||
Utf7Decoder.prototype.end = function() {
|
||||
var res = "";
|
||||
if (this.inBase64 && this.base64Accum.length > 0)
|
||||
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
|
||||
Utf7Decoder.prototype.end = function () {
|
||||
var res = ""
|
||||
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
|
||||
|
||||
this.inBase64 = false;
|
||||
this.base64Accum = '';
|
||||
return res;
|
||||
this.inBase64 = false
|
||||
this.base64Accum = ""
|
||||
return res
|
||||
}
|
||||
|
||||
|
||||
// UTF-7-IMAP codec.
|
||||
// RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
|
||||
// Differences:
|
||||
@@ -128,163 +128,156 @@ Utf7Decoder.prototype.end = function() {
|
||||
// * String must end in non-shifted position.
|
||||
// * "-&" while in base64 is not allowed.
|
||||
|
||||
|
||||
exports.utf7imap = Utf7IMAPCodec;
|
||||
function Utf7IMAPCodec(codecOptions, iconv) {
|
||||
this.iconv = iconv;
|
||||
exports.utf7imap = Utf7IMAPCodec
|
||||
function Utf7IMAPCodec (codecOptions, iconv) {
|
||||
this.iconv = iconv
|
||||
};
|
||||
|
||||
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
|
||||
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
|
||||
Utf7IMAPCodec.prototype.bomAware = true;
|
||||
|
||||
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder
|
||||
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder
|
||||
Utf7IMAPCodec.prototype.bomAware = true
|
||||
|
||||
// -- Encoding
|
||||
|
||||
function Utf7IMAPEncoder(options, codec) {
|
||||
this.iconv = codec.iconv;
|
||||
this.inBase64 = false;
|
||||
this.base64Accum = Buffer.alloc(6);
|
||||
this.base64AccumIdx = 0;
|
||||
function Utf7IMAPEncoder (options, codec) {
|
||||
this.iconv = codec.iconv
|
||||
this.inBase64 = false
|
||||
this.base64Accum = Buffer.alloc(6)
|
||||
this.base64AccumIdx = 0
|
||||
}
|
||||
|
||||
Utf7IMAPEncoder.prototype.write = function(str) {
|
||||
var inBase64 = this.inBase64,
|
||||
base64Accum = this.base64Accum,
|
||||
base64AccumIdx = this.base64AccumIdx,
|
||||
buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0;
|
||||
Utf7IMAPEncoder.prototype.write = function (str) {
|
||||
var inBase64 = this.inBase64
|
||||
var base64Accum = this.base64Accum
|
||||
var base64AccumIdx = this.base64AccumIdx
|
||||
var buf = Buffer.alloc(str.length * 5 + 10); var bufIdx = 0
|
||||
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
var uChar = str.charCodeAt(i);
|
||||
if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
|
||||
if (inBase64) {
|
||||
if (base64AccumIdx > 0) {
|
||||
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
|
||||
base64AccumIdx = 0;
|
||||
}
|
||||
|
||||
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
|
||||
inBase64 = false;
|
||||
}
|
||||
|
||||
if (!inBase64) {
|
||||
buf[bufIdx++] = uChar; // Write direct character
|
||||
|
||||
if (uChar === andChar) // Ampersand -> '&-'
|
||||
buf[bufIdx++] = minusChar;
|
||||
}
|
||||
|
||||
} else { // Non-direct character
|
||||
if (!inBase64) {
|
||||
buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
|
||||
inBase64 = true;
|
||||
}
|
||||
if (inBase64) {
|
||||
base64Accum[base64AccumIdx++] = uChar >> 8;
|
||||
base64Accum[base64AccumIdx++] = uChar & 0xFF;
|
||||
|
||||
if (base64AccumIdx == base64Accum.length) {
|
||||
bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
|
||||
base64AccumIdx = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.inBase64 = inBase64;
|
||||
this.base64AccumIdx = base64AccumIdx;
|
||||
|
||||
return buf.slice(0, bufIdx);
|
||||
}
|
||||
|
||||
Utf7IMAPEncoder.prototype.end = function() {
|
||||
var buf = Buffer.alloc(10), bufIdx = 0;
|
||||
if (this.inBase64) {
|
||||
if (this.base64AccumIdx > 0) {
|
||||
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
|
||||
this.base64AccumIdx = 0;
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
var uChar = str.charCodeAt(i)
|
||||
if (uChar >= 0x20 && uChar <= 0x7E) { // Direct character or '&'.
|
||||
if (inBase64) {
|
||||
if (base64AccumIdx > 0) {
|
||||
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
|
||||
base64AccumIdx = 0
|
||||
}
|
||||
|
||||
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
|
||||
this.inBase64 = false;
|
||||
}
|
||||
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
|
||||
inBase64 = false
|
||||
}
|
||||
|
||||
return buf.slice(0, bufIdx);
|
||||
if (!inBase64) {
|
||||
buf[bufIdx++] = uChar // Write direct character
|
||||
|
||||
if (uChar === andChar) // Ampersand -> '&-'
|
||||
{ buf[bufIdx++] = minusChar }
|
||||
}
|
||||
} else { // Non-direct character
|
||||
if (!inBase64) {
|
||||
buf[bufIdx++] = andChar // Write '&', then go to base64 mode.
|
||||
inBase64 = true
|
||||
}
|
||||
if (inBase64) {
|
||||
base64Accum[base64AccumIdx++] = uChar >> 8
|
||||
base64Accum[base64AccumIdx++] = uChar & 0xFF
|
||||
|
||||
if (base64AccumIdx == base64Accum.length) {
|
||||
bufIdx += buf.write(base64Accum.toString("base64").replace(/\//g, ","), bufIdx)
|
||||
base64AccumIdx = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.inBase64 = inBase64
|
||||
this.base64AccumIdx = base64AccumIdx
|
||||
|
||||
return buf.slice(0, bufIdx)
|
||||
}
|
||||
|
||||
Utf7IMAPEncoder.prototype.end = function () {
|
||||
var buf = Buffer.alloc(10); var bufIdx = 0
|
||||
if (this.inBase64) {
|
||||
if (this.base64AccumIdx > 0) {
|
||||
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
|
||||
this.base64AccumIdx = 0
|
||||
}
|
||||
|
||||
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
|
||||
this.inBase64 = false
|
||||
}
|
||||
|
||||
return buf.slice(0, bufIdx)
|
||||
}
|
||||
|
||||
// -- Decoding
|
||||
|
||||
function Utf7IMAPDecoder(options, codec) {
|
||||
this.iconv = codec.iconv;
|
||||
this.inBase64 = false;
|
||||
this.base64Accum = '';
|
||||
function Utf7IMAPDecoder (options, codec) {
|
||||
this.iconv = codec.iconv
|
||||
this.inBase64 = false
|
||||
this.base64Accum = ""
|
||||
}
|
||||
|
||||
var base64IMAPChars = base64Chars.slice();
|
||||
base64IMAPChars[','.charCodeAt(0)] = true;
|
||||
var base64IMAPChars = base64Chars.slice()
|
||||
base64IMAPChars[",".charCodeAt(0)] = true
|
||||
|
||||
Utf7IMAPDecoder.prototype.write = function(buf) {
|
||||
var res = "", lastI = 0,
|
||||
inBase64 = this.inBase64,
|
||||
base64Accum = this.base64Accum;
|
||||
Utf7IMAPDecoder.prototype.write = function (buf) {
|
||||
var res = ""; var lastI = 0
|
||||
var inBase64 = this.inBase64
|
||||
var base64Accum = this.base64Accum
|
||||
|
||||
// The decoder is more involved as we must handle chunks in stream.
|
||||
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
|
||||
// The decoder is more involved as we must handle chunks in stream.
|
||||
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
|
||||
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
if (!inBase64) { // We're in direct mode.
|
||||
// Write direct chars until '&'
|
||||
if (buf[i] == andChar) {
|
||||
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
|
||||
lastI = i+1;
|
||||
inBase64 = true;
|
||||
}
|
||||
} else { // We decode base64.
|
||||
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
|
||||
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
|
||||
res += "&";
|
||||
} else {
|
||||
var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
|
||||
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
|
||||
}
|
||||
|
||||
if (buf[i] != minusChar) // Minus may be absorbed after base64.
|
||||
i--;
|
||||
|
||||
lastI = i+1;
|
||||
inBase64 = false;
|
||||
base64Accum = '';
|
||||
}
|
||||
for (var i = 0; i < buf.length; i++) {
|
||||
if (!inBase64) { // We're in direct mode.
|
||||
// Write direct chars until '&'
|
||||
if (buf[i] == andChar) {
|
||||
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
|
||||
lastI = i + 1
|
||||
inBase64 = true
|
||||
}
|
||||
} else { // We decode base64.
|
||||
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
|
||||
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
|
||||
res += "&"
|
||||
} else {
|
||||
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, "/")
|
||||
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
|
||||
}
|
||||
|
||||
if (buf[i] != minusChar) // Minus may be absorbed after base64.
|
||||
{ i-- }
|
||||
|
||||
lastI = i + 1
|
||||
inBase64 = false
|
||||
base64Accum = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!inBase64) {
|
||||
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
|
||||
} else {
|
||||
var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
|
||||
if (!inBase64) {
|
||||
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
|
||||
} else {
|
||||
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, "/")
|
||||
|
||||
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
|
||||
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
|
||||
b64str = b64str.slice(0, canBeDecoded);
|
||||
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
|
||||
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
|
||||
b64str = b64str.slice(0, canBeDecoded)
|
||||
|
||||
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
|
||||
}
|
||||
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
|
||||
}
|
||||
|
||||
this.inBase64 = inBase64;
|
||||
this.base64Accum = base64Accum;
|
||||
this.inBase64 = inBase64
|
||||
this.base64Accum = base64Accum
|
||||
|
||||
return res;
|
||||
return res
|
||||
}
|
||||
|
||||
Utf7IMAPDecoder.prototype.end = function() {
|
||||
var res = "";
|
||||
if (this.inBase64 && this.base64Accum.length > 0)
|
||||
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
|
||||
Utf7IMAPDecoder.prototype.end = function () {
|
||||
var res = ""
|
||||
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
|
||||
|
||||
this.inBase64 = false;
|
||||
this.base64Accum = '';
|
||||
return res;
|
||||
this.inBase64 = false
|
||||
this.base64Accum = ""
|
||||
return res
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user