J'ai juste update le cutie.js, CONNARD
Ah oui, j'ai aussi add le info.js, qui est merdique d'ailleurs
This commit is contained in:
Syxpi
2025-10-11 22:06:46 +02:00
parent 14d4df5a40
commit b5cba1c318
283 changed files with 15040 additions and 12924 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,188 +1,185 @@
"use strict";
"use strict"
// Description of supported double byte encodings and aliases.
// Tables are not require()-d until they are needed to speed up library load.
// require()-s are direct to support Browserify.
module.exports = {
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
'shiftjis': {
type: '_dbcs',
table: function() { return require('./tables/shiftjis.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
encodeSkipVals: [{from: 0xED40, to: 0xF940}],
},
'csshiftjis': 'shiftjis',
'mskanji': 'shiftjis',
'sjis': 'shiftjis',
'windows31j': 'shiftjis',
'ms31j': 'shiftjis',
'xsjis': 'shiftjis',
'windows932': 'shiftjis',
'ms932': 'shiftjis',
'932': 'shiftjis',
'cp932': 'shiftjis',
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
'eucjp': {
type: '_dbcs',
table: function() { return require('./tables/eucjp.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
},
shiftjis: {
type: "_dbcs",
table: function () { return require("./tables/shiftjis.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E },
encodeSkipVals: [{ from: 0xED40, to: 0xF940 }]
},
csshiftjis: "shiftjis",
mskanji: "shiftjis",
sjis: "shiftjis",
windows31j: "shiftjis",
ms31j: "shiftjis",
xsjis: "shiftjis",
windows932: "shiftjis",
ms932: "shiftjis",
932: "shiftjis",
cp932: "shiftjis",
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
eucjp: {
type: "_dbcs",
table: function () { return require("./tables/eucjp.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E }
},
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
'gb2312': 'cp936',
'gb231280': 'cp936',
'gb23121980': 'cp936',
'csgb2312': 'cp936',
'csiso58gb231280': 'cp936',
'euccn': 'cp936',
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
gb2312: "cp936",
gb231280: "cp936",
gb23121980: "cp936",
csgb2312: "cp936",
csiso58gb231280: "cp936",
euccn: "cp936",
// Microsoft's CP936 is a subset and approximation of GBK.
'windows936': 'cp936',
'ms936': 'cp936',
'936': 'cp936',
'cp936': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json') },
},
// Microsoft's CP936 is a subset and approximation of GBK.
windows936: "cp936",
ms936: "cp936",
936: "cp936",
cp936: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json") }
},
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
'gbk': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
},
'xgbk': 'gbk',
'isoir58': 'gbk',
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
gbk: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) }
},
xgbk: "gbk",
isoir58: "gbk",
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
'gb18030': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
gb18030: function() { return require('./tables/gb18030-ranges.json') },
encodeSkipVals: [0x80],
encodeAdd: {'€': 0xA2E3},
},
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
gb18030: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) },
gb18030: function () { return require("./tables/gb18030-ranges.json") },
encodeSkipVals: [0x80],
encodeAdd: { "€": 0xA2E3 }
},
'chinese': 'gb18030',
chinese: "gb18030",
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
windows949: "cp949",
ms949: "cp949",
949: "cp949",
cp949: {
type: "_dbcs",
table: function () { return require("./tables/cp949.json") }
},
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
'windows949': 'cp949',
'ms949': 'cp949',
'949': 'cp949',
'cp949': {
type: '_dbcs',
table: function() { return require('./tables/cp949.json') },
},
cseuckr: "cp949",
csksc56011987: "cp949",
euckr: "cp949",
isoir149: "cp949",
korean: "cp949",
ksc56011987: "cp949",
ksc56011989: "cp949",
ksc5601: "cp949",
'cseuckr': 'cp949',
'csksc56011987': 'cp949',
'euckr': 'cp949',
'isoir149': 'cp949',
'korean': 'cp949',
'ksc56011987': 'cp949',
'ksc56011989': 'cp949',
'ksc5601': 'cp949',
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
windows950: "cp950",
ms950: "cp950",
950: "cp950",
cp950: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json") }
},
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
big5: "big5hkscs",
big5hkscs: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json").concat(require("./tables/big5-added.json")) },
encodeSkipVals: [
// Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of
// https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU.
// But if a single unicode point can be encoded both as HKSCS and regular Big5, we prefer the latter.
0x8e69, 0x8e6f, 0x8e7e, 0x8eab, 0x8eb4, 0x8ecd, 0x8ed0, 0x8f57, 0x8f69, 0x8f6e, 0x8fcb, 0x8ffe,
0x906d, 0x907a, 0x90c4, 0x90dc, 0x90f1, 0x91bf, 0x92af, 0x92b0, 0x92b1, 0x92b2, 0x92d1, 0x9447, 0x94ca,
0x95d9, 0x96fc, 0x9975, 0x9b76, 0x9b78, 0x9b7b, 0x9bc6, 0x9bde, 0x9bec, 0x9bf6, 0x9c42, 0x9c53, 0x9c62,
0x9c68, 0x9c6b, 0x9c77, 0x9cbc, 0x9cbd, 0x9cd0, 0x9d57, 0x9d5a, 0x9dc4, 0x9def, 0x9dfb, 0x9ea9, 0x9eef,
0x9efd, 0x9f60, 0x9fcb, 0xa077, 0xa0dc, 0xa0df, 0x8fcc, 0x92c8, 0x9644, 0x96ed,
'windows950': 'cp950',
'ms950': 'cp950',
'950': 'cp950',
'cp950': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json') },
},
// Step 2 of https://encoding.spec.whatwg.org/#index-big5-pointer: Use last pointer for U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345
0xa2a4, 0xa2a5, 0xa2a7, 0xa2a6, 0xa2cc, 0xa2ce
]
},
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
'big5': 'big5hkscs',
'big5hkscs': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
encodeSkipVals: [
// Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of
// https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU.
// But if a single unicode point can be encoded both as HKSCS and regular Big5, we prefer the latter.
0x8e69, 0x8e6f, 0x8e7e, 0x8eab, 0x8eb4, 0x8ecd, 0x8ed0, 0x8f57, 0x8f69, 0x8f6e, 0x8fcb, 0x8ffe,
0x906d, 0x907a, 0x90c4, 0x90dc, 0x90f1, 0x91bf, 0x92af, 0x92b0, 0x92b1, 0x92b2, 0x92d1, 0x9447, 0x94ca,
0x95d9, 0x96fc, 0x9975, 0x9b76, 0x9b78, 0x9b7b, 0x9bc6, 0x9bde, 0x9bec, 0x9bf6, 0x9c42, 0x9c53, 0x9c62,
0x9c68, 0x9c6b, 0x9c77, 0x9cbc, 0x9cbd, 0x9cd0, 0x9d57, 0x9d5a, 0x9dc4, 0x9def, 0x9dfb, 0x9ea9, 0x9eef,
0x9efd, 0x9f60, 0x9fcb, 0xa077, 0xa0dc, 0xa0df, 0x8fcc, 0x92c8, 0x9644, 0x96ed,
// Step 2 of https://encoding.spec.whatwg.org/#index-big5-pointer: Use last pointer for U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345
0xa2a4, 0xa2a5, 0xa2a7, 0xa2a6, 0xa2cc, 0xa2ce,
],
},
'cnbig5': 'big5hkscs',
'csbig5': 'big5hkscs',
'xxbig5': 'big5hkscs',
};
cnbig5: "big5hkscs",
csbig5: "big5hkscs",
xxbig5: "big5hkscs"
}

View File

@@ -1,23 +1,23 @@
"use strict";
"use strict"
var mergeModules = require("../lib/helpers/merge-exports")
// Update this array if you add/rename/remove files in this directory.
// We support Browserify by skipping automatic module discovery and requiring modules directly.
var modules = [
require("./internal"),
require("./utf32"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data"),
];
require("./internal"),
require("./utf32"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data")
]
// Put all encoding/alias/codec definitions to single object and export it.
for (var i = 0; i < modules.length; i++) {
var module = modules[i];
for (var enc in module)
if (Object.prototype.hasOwnProperty.call(module, enc))
exports[enc] = module[enc];
var module = modules[i]
mergeModules(exports, module)
}

View File

@@ -1,198 +1,218 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Export Node.js internal encodings.
module.exports = {
// Encodings
utf8: { type: "_internal", bomAware: true},
cesu8: { type: "_internal", bomAware: true},
unicode11utf8: "utf8",
// Encodings
utf8: { type: "_internal", bomAware: true },
cesu8: { type: "_internal", bomAware: true },
unicode11utf8: "utf8",
ucs2: { type: "_internal", bomAware: true},
utf16le: "ucs2",
ucs2: { type: "_internal", bomAware: true },
utf16le: "ucs2",
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
// Codec.
_internal: InternalCodec,
};
//------------------------------------------------------------------------------
function InternalCodec(codecOptions, iconv) {
this.enc = codecOptions.encodingName;
this.bomAware = codecOptions.bomAware;
if (this.enc === "base64")
this.encoder = InternalEncoderBase64;
else if (this.enc === "cesu8") {
this.enc = "utf8"; // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8;
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') {
this.decoder = InternalDecoderCesu8;
this.defaultCharUnicode = iconv.defaultCharUnicode;
}
}
// Codec.
_internal: InternalCodec
}
InternalCodec.prototype.encoder = InternalEncoder;
InternalCodec.prototype.decoder = InternalDecoder;
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
function InternalCodec (codecOptions, iconv) {
this.enc = codecOptions.encodingName
this.bomAware = codecOptions.bomAware
if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
this.enc = "utf8" // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
this.decoder = InternalDecoderCesu8
this.defaultCharUnicode = iconv.defaultCharUnicode
}
}
}
InternalCodec.prototype.encoder = InternalEncoder
InternalCodec.prototype.decoder = InternalDecoder
// ------------------------------------------------------------------------------
// We use node.js internal decoder. Its signature is the same as ours.
var StringDecoder = require('string_decoder').StringDecoder;
var StringDecoder = require("string_decoder").StringDecoder
if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
StringDecoder.prototype.end = function() {};
function InternalDecoder(options, codec) {
this.decoder = new StringDecoder(codec.enc);
function InternalDecoder (options, codec) {
this.decoder = new StringDecoder(codec.enc)
}
InternalDecoder.prototype.write = function(buf) {
if (!Buffer.isBuffer(buf)) {
buf = Buffer.from(buf);
}
InternalDecoder.prototype.write = function (buf) {
if (!Buffer.isBuffer(buf)) {
buf = Buffer.from(buf)
}
return this.decoder.write(buf);
return this.decoder.write(buf)
}
InternalDecoder.prototype.end = function() {
return this.decoder.end();
InternalDecoder.prototype.end = function () {
return this.decoder.end()
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// Encoder is mostly trivial
function InternalEncoder(options, codec) {
this.enc = codec.enc;
function InternalEncoder (options, codec) {
this.enc = codec.enc
}
InternalEncoder.prototype.write = function(str) {
return Buffer.from(str, this.enc);
InternalEncoder.prototype.write = function (str) {
return Buffer.from(str, this.enc)
}
InternalEncoder.prototype.end = function() {
InternalEncoder.prototype.end = function () {
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// Except base64 encoder, which must keep its state.
function InternalEncoderBase64(options, codec) {
this.prevStr = '';
function InternalEncoderBase64 (options, codec) {
this.prevStr = ""
}
InternalEncoderBase64.prototype.write = function(str) {
str = this.prevStr + str;
var completeQuads = str.length - (str.length % 4);
this.prevStr = str.slice(completeQuads);
str = str.slice(0, completeQuads);
InternalEncoderBase64.prototype.write = function (str) {
str = this.prevStr + str
var completeQuads = str.length - (str.length % 4)
this.prevStr = str.slice(completeQuads)
str = str.slice(0, completeQuads)
return Buffer.from(str, "base64");
return Buffer.from(str, "base64")
}
InternalEncoderBase64.prototype.end = function() {
return Buffer.from(this.prevStr, "base64");
InternalEncoderBase64.prototype.end = function () {
return Buffer.from(this.prevStr, "base64")
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// CESU-8 encoder is also special.
function InternalEncoderCesu8(options, codec) {
function InternalEncoderCesu8 (options, codec) {
}
InternalEncoderCesu8.prototype.write = function(str) {
var buf = Buffer.alloc(str.length * 3), bufIdx = 0;
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i);
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80)
buf[bufIdx++] = charCode;
else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12);
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
InternalEncoderCesu8.prototype.write = function (str) {
var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i)
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
} else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12)
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
}
return buf.slice(0, bufIdx);
}
return buf.slice(0, bufIdx)
}
InternalEncoderCesu8.prototype.end = function() {
InternalEncoderCesu8.prototype.end = function () {
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// CESU-8 decoder is not implemented in Node v4.0+
function InternalDecoderCesu8(options, codec) {
this.acc = 0;
this.contBytes = 0;
this.accBytes = 0;
this.defaultCharUnicode = codec.defaultCharUnicode;
function InternalDecoderCesu8 (options, codec) {
this.acc = 0
this.contBytes = 0
this.accBytes = 0
this.defaultCharUnicode = codec.defaultCharUnicode
}
InternalDecoderCesu8.prototype.write = function(buf) {
var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
res = '';
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i];
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode;
contBytes = 0;
}
InternalDecoderCesu8.prototype.write = function (buf) {
var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
var res = ""
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i]
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode
contBytes = 0
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte);
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F;
contBytes = 1; accBytes = 1;
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F;
contBytes = 2; accBytes = 1;
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode;
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f);
contBytes--; accBytes++;
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0)
res += this.defaultCharUnicode;
else if (accBytes === 3 && acc < 0x800)
res += this.defaultCharUnicode;
else
// Actually add character.
res += String.fromCharCode(acc);
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode;
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte)
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F
contBytes = 1; accBytes = 1
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F
contBytes = 2; accBytes = 1
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f)
contBytes--; accBytes++
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0) {
res += this.defaultCharUnicode
} else if (accBytes === 3 && acc < 0x800) {
res += this.defaultCharUnicode
} else {
// Actually add character.
res += String.fromCharCode(acc)
}
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode
}
}
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
return res;
}
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
return res
}
InternalDecoderCesu8.prototype.end = function() {
var res = 0;
if (this.contBytes > 0)
res += this.defaultCharUnicode;
return res;
InternalDecoderCesu8.prototype.end = function () {
var res = 0
if (this.contBytes > 0) { res += this.defaultCharUnicode }
return res
}
// ------------------------------------------------------------------------------
// check the chunk boundaries for surrogate pair
function InternalEncoderUtf8 (options, codec) {
this.highSurrogate = ""
}
InternalEncoderUtf8.prototype.write = function (str) {
if (this.highSurrogate) {
str = this.highSurrogate + str
this.highSurrogate = ""
}
if (str.length > 0) {
var charCode = str.charCodeAt(str.length - 1)
if (charCode >= 0xd800 && charCode < 0xdc00) {
this.highSurrogate = str[str.length - 1]
str = str.slice(0, str.length - 1)
}
}
return Buffer.from(str, this.enc)
}
InternalEncoderUtf8.prototype.end = function () {
if (this.highSurrogate) {
var str = this.highSurrogate
this.highSurrogate = ""
return Buffer.from(str, this.enc)
}
}

View File

@@ -1,72 +1,75 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that
// correspond to encoded bytes (if 128 - then lower half is ASCII).
// correspond to encoded bytes (if 128 - then lower half is ASCII).
exports._sbcs = SBCSCodec;
function SBCSCodec(codecOptions, iconv) {
if (!codecOptions)
throw new Error("SBCS codec is called without the data.")
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (codecOptions.chars.length === 128) {
var asciiString = "";
for (var i = 0; i < 128; i++)
asciiString += String.fromCharCode(i);
codecOptions.chars = asciiString + codecOptions.chars;
exports._sbcs = SBCSCodec
function SBCSCodec (codecOptions, iconv) {
if (!codecOptions) {
throw new Error("SBCS codec is called without the data.")
}
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256)) {
throw new Error("Encoding '" + codecOptions.type + "' has incorrect 'chars' (must be of len 128 or 256)")
}
if (codecOptions.chars.length === 128) {
var asciiString = ""
for (var i = 0; i < 128; i++) {
asciiString += String.fromCharCode(i)
}
codecOptions.chars = asciiString + codecOptions.chars
}
this.decodeBuf = Buffer.from(codecOptions.chars, 'ucs2');
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0));
this.decodeBuf = Buffer.from(codecOptions.chars, "ucs2")
for (var i = 0; i < codecOptions.chars.length; i++)
encodeBuf[codecOptions.chars.charCodeAt(i)] = i;
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0))
this.encodeBuf = encodeBuf;
for (var i = 0; i < codecOptions.chars.length; i++) {
encodeBuf[codecOptions.chars.charCodeAt(i)] = i
}
this.encodeBuf = encodeBuf
}
SBCSCodec.prototype.encoder = SBCSEncoder;
SBCSCodec.prototype.decoder = SBCSDecoder;
SBCSCodec.prototype.encoder = SBCSEncoder
SBCSCodec.prototype.decoder = SBCSDecoder
function SBCSEncoder(options, codec) {
this.encodeBuf = codec.encodeBuf;
function SBCSEncoder (options, codec) {
this.encodeBuf = codec.encodeBuf
}
SBCSEncoder.prototype.write = function(str) {
var buf = Buffer.alloc(str.length);
for (var i = 0; i < str.length; i++)
buf[i] = this.encodeBuf[str.charCodeAt(i)];
return buf;
SBCSEncoder.prototype.write = function (str) {
var buf = Buffer.alloc(str.length)
for (var i = 0; i < str.length; i++) {
buf[i] = this.encodeBuf[str.charCodeAt(i)]
}
return buf
}
SBCSEncoder.prototype.end = function() {
SBCSEncoder.prototype.end = function () {
}
function SBCSDecoder(options, codec) {
this.decodeBuf = codec.decodeBuf;
function SBCSDecoder (options, codec) {
this.decodeBuf = codec.decodeBuf
}
SBCSDecoder.prototype.write = function(buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf;
var newBuf = Buffer.alloc(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = decodeBuf[idx1];
newBuf[idx2+1] = decodeBuf[idx1+1];
}
return newBuf.toString('ucs2');
SBCSDecoder.prototype.write = function (buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf
var newBuf = Buffer.alloc(buf.length * 2)
var idx1 = 0; var idx2 = 0
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i] * 2; idx2 = i * 2
newBuf[idx2] = decodeBuf[idx1]
newBuf[idx2 + 1] = decodeBuf[idx1 + 1]
}
return newBuf.toString("ucs2")
}
SBCSDecoder.prototype.end = function() {
SBCSDecoder.prototype.end = function () {
}

View File

@@ -1,179 +1,178 @@
"use strict";
"use strict"
// Manually added data to be used by sbcs codec in addition to generated one.
module.exports = {
// Not supported by iconv, not sure why.
"10029": "maccenteuro",
"maccenteuro": {
"type": "_sbcs",
"chars": "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
// Not supported by iconv, not sure why.
10029: "maccenteuro",
maccenteuro: {
type: "_sbcs",
chars: "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
"808": "cp808",
"ibm808": "cp808",
"cp808": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
808: "cp808",
ibm808: "cp808",
cp808: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
"mik": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
mik: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
"cp720": {
"type": "_sbcs",
"chars": "\x80\x81éâ\x84à\x86çêëèïî\x8d\x8e\x8f\x90\u0651\u0652ô¤ـûùءآأؤ£إئابةتثجحخدذرزسشص«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ضطظعغفµقكلمنهوىي≡\u064b\u064c\u064d\u064e\u064f\u0650≈°∙·√ⁿ²■\u00a0"
},
cp720: {
type: "_sbcs",
chars: "\x80\x81éâ\x84à\x86çêëèïî\x8d\x8e\x8f\x90\u0651\u0652ô¤ـûùءآأؤ£إئابةتثجحخدذرزسشص«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ضطظعغفµقكلمنهوىي≡\u064b\u064c\u064d\u064e\u064f\u0650≈°∙·√ⁿ²■\u00a0"
},
// Aliases of generated encodings.
"ascii8bit": "ascii",
"usascii": "ascii",
"ansix34": "ascii",
"ansix341968": "ascii",
"ansix341986": "ascii",
"csascii": "ascii",
"cp367": "ascii",
"ibm367": "ascii",
"isoir6": "ascii",
"iso646us": "ascii",
"iso646irv": "ascii",
"us": "ascii",
// Aliases of generated encodings.
ascii8bit: "ascii",
usascii: "ascii",
ansix34: "ascii",
ansix341968: "ascii",
ansix341986: "ascii",
csascii: "ascii",
cp367: "ascii",
ibm367: "ascii",
isoir6: "ascii",
iso646us: "ascii",
iso646irv: "ascii",
us: "ascii",
"latin1": "iso88591",
"latin2": "iso88592",
"latin3": "iso88593",
"latin4": "iso88594",
"latin5": "iso88599",
"latin6": "iso885910",
"latin7": "iso885913",
"latin8": "iso885914",
"latin9": "iso885915",
"latin10": "iso885916",
latin1: "iso88591",
latin2: "iso88592",
latin3: "iso88593",
latin4: "iso88594",
latin5: "iso88599",
latin6: "iso885910",
latin7: "iso885913",
latin8: "iso885914",
latin9: "iso885915",
latin10: "iso885916",
"csisolatin1": "iso88591",
"csisolatin2": "iso88592",
"csisolatin3": "iso88593",
"csisolatin4": "iso88594",
"csisolatincyrillic": "iso88595",
"csisolatinarabic": "iso88596",
"csisolatingreek" : "iso88597",
"csisolatinhebrew": "iso88598",
"csisolatin5": "iso88599",
"csisolatin6": "iso885910",
csisolatin1: "iso88591",
csisolatin2: "iso88592",
csisolatin3: "iso88593",
csisolatin4: "iso88594",
csisolatincyrillic: "iso88595",
csisolatinarabic: "iso88596",
csisolatingreek: "iso88597",
csisolatinhebrew: "iso88598",
csisolatin5: "iso88599",
csisolatin6: "iso885910",
"l1": "iso88591",
"l2": "iso88592",
"l3": "iso88593",
"l4": "iso88594",
"l5": "iso88599",
"l6": "iso885910",
"l7": "iso885913",
"l8": "iso885914",
"l9": "iso885915",
"l10": "iso885916",
l1: "iso88591",
l2: "iso88592",
l3: "iso88593",
l4: "iso88594",
l5: "iso88599",
l6: "iso885910",
l7: "iso885913",
l8: "iso885914",
l9: "iso885915",
l10: "iso885916",
"isoir14": "iso646jp",
"isoir57": "iso646cn",
"isoir100": "iso88591",
"isoir101": "iso88592",
"isoir109": "iso88593",
"isoir110": "iso88594",
"isoir144": "iso88595",
"isoir127": "iso88596",
"isoir126": "iso88597",
"isoir138": "iso88598",
"isoir148": "iso88599",
"isoir157": "iso885910",
"isoir166": "tis620",
"isoir179": "iso885913",
"isoir199": "iso885914",
"isoir203": "iso885915",
"isoir226": "iso885916",
isoir14: "iso646jp",
isoir57: "iso646cn",
isoir100: "iso88591",
isoir101: "iso88592",
isoir109: "iso88593",
isoir110: "iso88594",
isoir144: "iso88595",
isoir127: "iso88596",
isoir126: "iso88597",
isoir138: "iso88598",
isoir148: "iso88599",
isoir157: "iso885910",
isoir166: "tis620",
isoir179: "iso885913",
isoir199: "iso885914",
isoir203: "iso885915",
isoir226: "iso885916",
"cp819": "iso88591",
"ibm819": "iso88591",
cp819: "iso88591",
ibm819: "iso88591",
"cyrillic": "iso88595",
cyrillic: "iso88595",
"arabic": "iso88596",
"arabic8": "iso88596",
"ecma114": "iso88596",
"asmo708": "iso88596",
arabic: "iso88596",
arabic8: "iso88596",
ecma114: "iso88596",
asmo708: "iso88596",
"greek" : "iso88597",
"greek8" : "iso88597",
"ecma118" : "iso88597",
"elot928" : "iso88597",
greek: "iso88597",
greek8: "iso88597",
ecma118: "iso88597",
elot928: "iso88597",
"hebrew": "iso88598",
"hebrew8": "iso88598",
hebrew: "iso88598",
hebrew8: "iso88598",
"turkish": "iso88599",
"turkish8": "iso88599",
turkish: "iso88599",
turkish8: "iso88599",
"thai": "iso885911",
"thai8": "iso885911",
thai: "iso885911",
thai8: "iso885911",
"celtic": "iso885914",
"celtic8": "iso885914",
"isoceltic": "iso885914",
celtic: "iso885914",
celtic8: "iso885914",
isoceltic: "iso885914",
"tis6200": "tis620",
"tis62025291": "tis620",
"tis62025330": "tis620",
tis6200: "tis620",
tis62025291: "tis620",
tis62025330: "tis620",
"10000": "macroman",
"10006": "macgreek",
"10007": "maccyrillic",
"10079": "maciceland",
"10081": "macturkish",
10000: "macroman",
10006: "macgreek",
10007: "maccyrillic",
10079: "maciceland",
10081: "macturkish",
"cspc8codepage437": "cp437",
"cspc775baltic": "cp775",
"cspc850multilingual": "cp850",
"cspcp852": "cp852",
"cspc862latinhebrew": "cp862",
"cpgr": "cp869",
cspc8codepage437: "cp437",
cspc775baltic: "cp775",
cspc850multilingual: "cp850",
cspcp852: "cp852",
cspc862latinhebrew: "cp862",
cpgr: "cp869",
"msee": "cp1250",
"mscyrl": "cp1251",
"msansi": "cp1252",
"msgreek": "cp1253",
"msturk": "cp1254",
"mshebr": "cp1255",
"msarab": "cp1256",
"winbaltrim": "cp1257",
msee: "cp1250",
mscyrl: "cp1251",
msansi: "cp1252",
msgreek: "cp1253",
msturk: "cp1254",
mshebr: "cp1255",
msarab: "cp1256",
winbaltrim: "cp1257",
"cp20866": "koi8r",
"20866": "koi8r",
"ibm878": "koi8r",
"cskoi8r": "koi8r",
cp20866: "koi8r",
20866: "koi8r",
ibm878: "koi8r",
cskoi8r: "koi8r",
"cp21866": "koi8u",
"21866": "koi8u",
"ibm1168": "koi8u",
cp21866: "koi8u",
21866: "koi8u",
ibm1168: "koi8u",
"strk10482002": "rk1048",
strk10482002: "rk1048",
"tcvn5712": "tcvn",
"tcvn57121": "tcvn",
tcvn5712: "tcvn",
tcvn57121: "tcvn",
"gb198880": "iso646cn",
"cn": "iso646cn",
gb198880: "iso646cn",
cn: "iso646cn",
"csiso14jisc6220ro": "iso646jp",
"jisc62201969ro": "iso646jp",
"jp": "iso646jp",
csiso14jisc6220ro: "iso646jp",
jisc62201969ro: "iso646jp",
jp: "iso646jp",
"cshproman8": "hproman8",
"r8": "hproman8",
"roman8": "hproman8",
"xroman8": "hproman8",
"ibm1051": "hproman8",
"mac": "macintosh",
"csmacintosh": "macintosh",
};
cshproman8: "hproman8",
r8: "hproman8",
roman8: "hproman8",
xroman8: "hproman8",
ibm1051: "hproman8",
mac: "macintosh",
csmacintosh: "macintosh"
}

View File

@@ -1,70 +1,66 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
// == UTF16-BE codec. ==========================================================
exports.utf16be = Utf16BECodec;
function Utf16BECodec() {
exports.utf16be = Utf16BECodec
function Utf16BECodec () {
}
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
Utf16BECodec.prototype.bomAware = true;
Utf16BECodec.prototype.encoder = Utf16BEEncoder
Utf16BECodec.prototype.decoder = Utf16BEDecoder
Utf16BECodec.prototype.bomAware = true
// -- Encoding
function Utf16BEEncoder() {
function Utf16BEEncoder () {
}
Utf16BEEncoder.prototype.write = function(str) {
var buf = Buffer.from(str, 'ucs2');
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
}
return buf;
Utf16BEEncoder.prototype.write = function (str) {
var buf = Buffer.from(str, "ucs2")
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i + 1]; buf[i + 1] = tmp
}
return buf
}
Utf16BEEncoder.prototype.end = function() {
Utf16BEEncoder.prototype.end = function () {
}
// -- Decoding
function Utf16BEDecoder() {
this.overflowByte = -1;
function Utf16BEDecoder () {
this.overflowByte = -1
}
Utf16BEDecoder.prototype.write = function(buf) {
if (buf.length == 0)
return '';
Utf16BEDecoder.prototype.write = function (buf) {
if (buf.length == 0) { return "" }
var buf2 = Buffer.alloc(buf.length + 1),
i = 0, j = 0;
var buf2 = Buffer.alloc(buf.length + 1)
var i = 0; var j = 0
if (this.overflowByte !== -1) {
buf2[0] = buf[0];
buf2[1] = this.overflowByte;
i = 1; j = 2;
}
if (this.overflowByte !== -1) {
buf2[0] = buf[0]
buf2[1] = this.overflowByte
i = 1; j = 2
}
for (; i < buf.length-1; i += 2, j+= 2) {
buf2[j] = buf[i+1];
buf2[j+1] = buf[i];
}
for (; i < buf.length - 1; i += 2, j += 2) {
buf2[j] = buf[i + 1]
buf2[j + 1] = buf[i]
}
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
this.overflowByte = (i == buf.length - 1) ? buf[buf.length - 1] : -1
return buf2.slice(0, j).toString('ucs2');
return buf2.slice(0, j).toString("ucs2")
}
Utf16BEDecoder.prototype.end = function() {
this.overflowByte = -1;
Utf16BEDecoder.prototype.end = function () {
this.overflowByte = -1
}
// == UTF-16 codec =============================================================
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
// Defaults to UTF-16LE, as it's prevalent and default in Node.
@@ -73,125 +69,119 @@ Utf16BEDecoder.prototype.end = function() {
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
exports.utf16 = Utf16Codec;
function Utf16Codec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf16 = Utf16Codec
function Utf16Codec (codecOptions, iconv) {
this.iconv = iconv
}
Utf16Codec.prototype.encoder = Utf16Encoder;
Utf16Codec.prototype.decoder = Utf16Decoder;
Utf16Codec.prototype.encoder = Utf16Encoder
Utf16Codec.prototype.decoder = Utf16Decoder
// -- Encoding (pass-through)
function Utf16Encoder(options, codec) {
options = options || {};
if (options.addBOM === undefined)
options.addBOM = true;
this.encoder = codec.iconv.getEncoder('utf-16le', options);
function Utf16Encoder (options, codec) {
options = options || {}
if (options.addBOM === undefined) { options.addBOM = true }
this.encoder = codec.iconv.getEncoder("utf-16le", options)
}
Utf16Encoder.prototype.write = function(str) {
return this.encoder.write(str);
Utf16Encoder.prototype.write = function (str) {
return this.encoder.write(str)
}
Utf16Encoder.prototype.end = function() {
return this.encoder.end();
Utf16Encoder.prototype.end = function () {
return this.encoder.end()
}
// -- Decoding
function Utf16Decoder(options, codec) {
this.decoder = null;
this.initialBufs = [];
this.initialBufsLen = 0;
function Utf16Decoder (options, codec) {
this.decoder = null
this.initialBufs = []
this.initialBufsLen = 0
this.options = options || {};
this.iconv = codec.iconv;
this.options = options || {}
this.iconv = codec.iconv
}
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf);
this.initialBufsLen += buf.length;
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
return '';
Utf16Decoder.prototype.write = function (buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf)
this.initialBufsLen += buf.length
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
{ return "" }
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
return this.decoder.write(buf);
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.write(buf)
}
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
Utf16Decoder.prototype.end = function () {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
var trail = this.decoder.end();
if (trail)
resStr += trail;
var trail = this.decoder.end()
if (trail) { resStr += trail }
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
return this.decoder.end();
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.end()
}
function detectEncoding(bufs, defaultEncoding) {
var b = [];
var charsProcessed = 0;
var asciiCharsLE = 0, asciiCharsBE = 0; // Number of ASCII chars when decoded as LE or BE.
function detectEncoding (bufs, defaultEncoding) {
var b = []
var charsProcessed = 0
// Number of ASCII chars when decoded as LE or BE.
var asciiCharsLE = 0
var asciiCharsBE = 0
outer_loop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i];
for (var j = 0; j < buf.length; j++) {
b.push(buf[j]);
if (b.length === 2) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE) return 'utf-16le';
if (b[0] === 0xFE && b[1] === 0xFF) return 'utf-16be';
}
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++;
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++;
b.length = 0;
charsProcessed++;
if (charsProcessed >= 100) {
break outer_loop;
}
}
outerLoop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i]
for (var j = 0; j < buf.length; j++) {
b.push(buf[j])
if (b.length === 2) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE) return "utf-16le"
if (b[0] === 0xFE && b[1] === 0xFF) return "utf-16be"
}
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++
b.length = 0
charsProcessed++
if (charsProcessed >= 100) {
break outerLoop
}
}
}
}
// Make decisions.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
if (asciiCharsBE > asciiCharsLE) return 'utf-16be';
if (asciiCharsBE < asciiCharsLE) return 'utf-16le';
// Make decisions.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
if (asciiCharsBE > asciiCharsLE) return "utf-16be"
if (asciiCharsBE < asciiCharsLE) return "utf-16le"
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || 'utf-16le';
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || "utf-16le"
}

View File

@@ -1,186 +1,176 @@
'use strict';
"use strict"
var Buffer = require('safer-buffer').Buffer;
var Buffer = require("safer-buffer").Buffer
// == UTF32-LE/BE codec. ==========================================================
exports._utf32 = Utf32Codec;
exports._utf32 = Utf32Codec
function Utf32Codec(codecOptions, iconv) {
this.iconv = iconv;
this.bomAware = true;
this.isLE = codecOptions.isLE;
function Utf32Codec (codecOptions, iconv) {
this.iconv = iconv
this.bomAware = true
this.isLE = codecOptions.isLE
}
exports.utf32le = { type: '_utf32', isLE: true };
exports.utf32be = { type: '_utf32', isLE: false };
exports.utf32le = { type: "_utf32", isLE: true }
exports.utf32be = { type: "_utf32", isLE: false }
// Aliases
exports.ucs4le = 'utf32le';
exports.ucs4be = 'utf32be';
exports.ucs4le = "utf32le"
exports.ucs4be = "utf32be"
Utf32Codec.prototype.encoder = Utf32Encoder;
Utf32Codec.prototype.decoder = Utf32Decoder;
Utf32Codec.prototype.encoder = Utf32Encoder
Utf32Codec.prototype.decoder = Utf32Decoder
// -- Encoding
function Utf32Encoder(options, codec) {
this.isLE = codec.isLE;
this.highSurrogate = 0;
function Utf32Encoder (options, codec) {
this.isLE = codec.isLE
this.highSurrogate = 0
}
Utf32Encoder.prototype.write = function(str) {
var src = Buffer.from(str, 'ucs2');
var dst = Buffer.alloc(src.length * 2);
var write32 = this.isLE ? dst.writeUInt32LE : dst.writeUInt32BE;
var offset = 0;
Utf32Encoder.prototype.write = function (str) {
var src = Buffer.from(str, "ucs2")
var dst = Buffer.alloc(src.length * 2)
var write32 = this.isLE ? dst.writeUInt32LE : dst.writeUInt32BE
var offset = 0
for (var i = 0; i < src.length; i += 2) {
var code = src.readUInt16LE(i);
var isHighSurrogate = (0xD800 <= code && code < 0xDC00);
var isLowSurrogate = (0xDC00 <= code && code < 0xE000);
for (var i = 0; i < src.length; i += 2) {
var code = src.readUInt16LE(i)
var isHighSurrogate = (code >= 0xD800 && code < 0xDC00)
var isLowSurrogate = (code >= 0xDC00 && code < 0xE000)
if (this.highSurrogate) {
if (isHighSurrogate || !isLowSurrogate) {
// There shouldn't be two high surrogates in a row, nor a high surrogate which isn't followed by a low
// surrogate. If this happens, keep the pending high surrogate as a stand-alone semi-invalid character
// (technically wrong, but expected by some applications, like Windows file names).
write32.call(dst, this.highSurrogate, offset);
offset += 4;
}
else {
// Create 32-bit value from high and low surrogates;
var codepoint = (((this.highSurrogate - 0xD800) << 10) | (code - 0xDC00)) + 0x10000;
if (this.highSurrogate) {
if (isHighSurrogate || !isLowSurrogate) {
// There shouldn't be two high surrogates in a row, nor a high surrogate which isn't followed by a low
// surrogate. If this happens, keep the pending high surrogate as a stand-alone semi-invalid character
// (technically wrong, but expected by some applications, like Windows file names).
write32.call(dst, this.highSurrogate, offset)
offset += 4
} else {
// Create 32-bit value from high and low surrogates;
var codepoint = (((this.highSurrogate - 0xD800) << 10) | (code - 0xDC00)) + 0x10000
write32.call(dst, codepoint, offset);
offset += 4;
this.highSurrogate = 0;
write32.call(dst, codepoint, offset)
offset += 4
this.highSurrogate = 0
continue;
}
}
if (isHighSurrogate)
this.highSurrogate = code;
else {
// Even if the current character is a low surrogate, with no previous high surrogate, we'll
// encode it as a semi-invalid stand-alone character for the same reasons expressed above for
// unpaired high surrogates.
write32.call(dst, code, offset);
offset += 4;
this.highSurrogate = 0;
}
continue
}
}
if (offset < dst.length)
dst = dst.slice(0, offset);
if (isHighSurrogate) { this.highSurrogate = code } else {
// Even if the current character is a low surrogate, with no previous high surrogate, we'll
// encode it as a semi-invalid stand-alone character for the same reasons expressed above for
// unpaired high surrogates.
write32.call(dst, code, offset)
offset += 4
this.highSurrogate = 0
}
}
return dst;
};
if (offset < dst.length) { dst = dst.slice(0, offset) }
Utf32Encoder.prototype.end = function() {
// Treat any leftover high surrogate as a semi-valid independent character.
if (!this.highSurrogate)
return;
return dst
}
var buf = Buffer.alloc(4);
Utf32Encoder.prototype.end = function () {
// Treat any leftover high surrogate as a semi-valid independent character.
if (!this.highSurrogate) { return }
if (this.isLE)
buf.writeUInt32LE(this.highSurrogate, 0);
else
buf.writeUInt32BE(this.highSurrogate, 0);
var buf = Buffer.alloc(4)
this.highSurrogate = 0;
if (this.isLE) { buf.writeUInt32LE(this.highSurrogate, 0) } else { buf.writeUInt32BE(this.highSurrogate, 0) }
return buf;
};
this.highSurrogate = 0
return buf
}
// -- Decoding
function Utf32Decoder(options, codec) {
this.isLE = codec.isLE;
this.badChar = codec.iconv.defaultCharUnicode.charCodeAt(0);
this.overflow = [];
function Utf32Decoder (options, codec) {
this.isLE = codec.isLE
this.badChar = codec.iconv.defaultCharUnicode.charCodeAt(0)
this.overflow = []
}
Utf32Decoder.prototype.write = function(src) {
if (src.length === 0)
return '';
Utf32Decoder.prototype.write = function (src) {
if (src.length === 0) { return "" }
var i = 0;
var codepoint = 0;
var dst = Buffer.alloc(src.length + 4);
var offset = 0;
var isLE = this.isLE;
var overflow = this.overflow;
var badChar = this.badChar;
var i = 0
var codepoint = 0
var dst = Buffer.alloc(src.length + 4)
var offset = 0
var isLE = this.isLE
var overflow = this.overflow
var badChar = this.badChar
if (overflow.length > 0) {
for (; i < src.length && overflow.length < 4; i++)
overflow.push(src[i]);
if (overflow.length === 4) {
// NOTE: codepoint is a signed int32 and can be negative.
// NOTE: We copied this block from below to help V8 optimize it (it works with array, not buffer).
if (isLE) {
codepoint = overflow[i] | (overflow[i+1] << 8) | (overflow[i+2] << 16) | (overflow[i+3] << 24);
} else {
codepoint = overflow[i+3] | (overflow[i+2] << 8) | (overflow[i+1] << 16) | (overflow[i] << 24);
}
overflow.length = 0;
if (overflow.length > 0) {
for (; i < src.length && overflow.length < 4; i++) { overflow.push(src[i]) }
offset = _writeCodepoint(dst, offset, codepoint, badChar);
}
if (overflow.length === 4) {
// NOTE: codepoint is a signed int32 and can be negative.
// NOTE: We copied this block from below to help V8 optimize it (it works with array, not buffer).
if (isLE) {
codepoint = overflow[i] | (overflow[i + 1] << 8) | (overflow[i + 2] << 16) | (overflow[i + 3] << 24)
} else {
codepoint = overflow[i + 3] | (overflow[i + 2] << 8) | (overflow[i + 1] << 16) | (overflow[i] << 24)
}
overflow.length = 0
offset = _writeCodepoint(dst, offset, codepoint, badChar)
}
}
// Main loop. Should be as optimized as possible.
for (; i < src.length - 3; i += 4) {
// NOTE: codepoint is a signed int32 and can be negative.
if (isLE) {
codepoint = src[i] | (src[i+1] << 8) | (src[i+2] << 16) | (src[i+3] << 24);
} else {
codepoint = src[i+3] | (src[i+2] << 8) | (src[i+1] << 16) | (src[i] << 24);
}
offset = _writeCodepoint(dst, offset, codepoint, badChar);
// Main loop. Should be as optimized as possible.
for (; i < src.length - 3; i += 4) {
// NOTE: codepoint is a signed int32 and can be negative.
if (isLE) {
codepoint = src[i] | (src[i + 1] << 8) | (src[i + 2] << 16) | (src[i + 3] << 24)
} else {
codepoint = src[i + 3] | (src[i + 2] << 8) | (src[i + 1] << 16) | (src[i] << 24)
}
offset = _writeCodepoint(dst, offset, codepoint, badChar)
}
// Keep overflowing bytes.
for (; i < src.length; i++) {
overflow.push(src[i]);
}
// Keep overflowing bytes.
for (; i < src.length; i++) {
overflow.push(src[i])
}
return dst.slice(0, offset).toString('ucs2');
return dst.slice(0, offset).toString("ucs2")
}
function _writeCodepoint (dst, offset, codepoint, badChar) {
// NOTE: codepoint is signed int32 and can be negative. We keep it that way to help V8 with optimizations.
if (codepoint < 0 || codepoint > 0x10FFFF) {
// Not a valid Unicode codepoint
codepoint = badChar
}
// Ephemeral Planes: Write high surrogate.
if (codepoint >= 0x10000) {
codepoint -= 0x10000
var high = 0xD800 | (codepoint >> 10)
dst[offset++] = high & 0xff
dst[offset++] = high >> 8
// Low surrogate is written below.
var codepoint = 0xDC00 | (codepoint & 0x3FF)
}
// Write BMP char or low surrogate.
dst[offset++] = codepoint & 0xff
dst[offset++] = codepoint >> 8
return offset
};
function _writeCodepoint(dst, offset, codepoint, badChar) {
// NOTE: codepoint is signed int32 and can be negative. We keep it that way to help V8 with optimizations.
if (codepoint < 0 || codepoint > 0x10FFFF) {
// Not a valid Unicode codepoint
codepoint = badChar;
}
// Ephemeral Planes: Write high surrogate.
if (codepoint >= 0x10000) {
codepoint -= 0x10000;
var high = 0xD800 | (codepoint >> 10);
dst[offset++] = high & 0xff;
dst[offset++] = high >> 8;
// Low surrogate is written below.
var codepoint = 0xDC00 | (codepoint & 0x3FF);
}
// Write BMP char or low surrogate.
dst[offset++] = codepoint & 0xff;
dst[offset++] = codepoint >> 8;
return offset;
};
Utf32Decoder.prototype.end = function() {
this.overflow.length = 0;
};
Utf32Decoder.prototype.end = function () {
this.overflow.length = 0
}
// == UTF-32 Auto codec =============================================================
// Decoder chooses automatically from UTF-32LE and UTF-32BE using BOM and space-based heuristic.
@@ -189,131 +179,129 @@ Utf32Decoder.prototype.end = function() {
// Encoder prepends BOM (which can be overridden with (addBOM: false}).
exports.utf32 = Utf32AutoCodec;
exports.ucs4 = 'utf32';
exports.utf32 = Utf32AutoCodec
exports.ucs4 = "utf32"
function Utf32AutoCodec(options, iconv) {
this.iconv = iconv;
function Utf32AutoCodec (options, iconv) {
this.iconv = iconv
}
Utf32AutoCodec.prototype.encoder = Utf32AutoEncoder;
Utf32AutoCodec.prototype.decoder = Utf32AutoDecoder;
Utf32AutoCodec.prototype.encoder = Utf32AutoEncoder
Utf32AutoCodec.prototype.decoder = Utf32AutoDecoder
// -- Encoding
function Utf32AutoEncoder(options, codec) {
options = options || {};
function Utf32AutoEncoder (options, codec) {
options = options || {}
if (options.addBOM === undefined)
options.addBOM = true;
if (options.addBOM === undefined) {
options.addBOM = true
}
this.encoder = codec.iconv.getEncoder(options.defaultEncoding || 'utf-32le', options);
this.encoder = codec.iconv.getEncoder(options.defaultEncoding || "utf-32le", options)
}
Utf32AutoEncoder.prototype.write = function(str) {
return this.encoder.write(str);
};
Utf32AutoEncoder.prototype.write = function (str) {
return this.encoder.write(str)
}
Utf32AutoEncoder.prototype.end = function() {
return this.encoder.end();
};
Utf32AutoEncoder.prototype.end = function () {
return this.encoder.end()
}
// -- Decoding
function Utf32AutoDecoder(options, codec) {
this.decoder = null;
this.initialBufs = [];
this.initialBufsLen = 0;
this.options = options || {};
this.iconv = codec.iconv;
function Utf32AutoDecoder (options, codec) {
this.decoder = null
this.initialBufs = []
this.initialBufsLen = 0
this.options = options || {}
this.iconv = codec.iconv
}
Utf32AutoDecoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf);
this.initialBufsLen += buf.length;
Utf32AutoDecoder.prototype.write = function (buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf)
this.initialBufsLen += buf.length
if (this.initialBufsLen < 32) // We need more bytes to use space heuristic (see below)
return '';
if (this.initialBufsLen < 32) // We need more bytes to use space heuristic (see below)
{ return "" }
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.write(buf);
};
return this.decoder.write(buf)
}
Utf32AutoDecoder.prototype.end = function() {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
Utf32AutoDecoder.prototype.end = function () {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
var trail = this.decoder.end();
if (trail)
resStr += trail;
var trail = this.decoder.end()
if (trail) { resStr += trail }
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.end();
};
return this.decoder.end()
}
function detectEncoding(bufs, defaultEncoding) {
var b = [];
var charsProcessed = 0;
var invalidLE = 0, invalidBE = 0; // Number of invalid chars when decoded as LE or BE.
var bmpCharsLE = 0, bmpCharsBE = 0; // Number of BMP chars when decoded as LE or BE.
function detectEncoding (bufs, defaultEncoding) {
var b = []
var charsProcessed = 0
var invalidLE = 0; var invalidBE = 0 // Number of invalid chars when decoded as LE or BE.
var bmpCharsLE = 0; var bmpCharsBE = 0 // Number of BMP chars when decoded as LE or BE.
outer_loop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i];
for (var j = 0; j < buf.length; j++) {
b.push(buf[j]);
if (b.length === 4) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE && b[2] === 0 && b[3] === 0) {
return 'utf-32le';
}
if (b[0] === 0 && b[1] === 0 && b[2] === 0xFE && b[3] === 0xFF) {
return 'utf-32be';
}
}
if (b[0] !== 0 || b[1] > 0x10) invalidBE++;
if (b[3] !== 0 || b[2] > 0x10) invalidLE++;
if (b[0] === 0 && b[1] === 0 && (b[2] !== 0 || b[3] !== 0)) bmpCharsBE++;
if ((b[0] !== 0 || b[1] !== 0) && b[2] === 0 && b[3] === 0) bmpCharsLE++;
b.length = 0;
charsProcessed++;
if (charsProcessed >= 100) {
break outer_loop;
}
}
outerLoop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i]
for (var j = 0; j < buf.length; j++) {
b.push(buf[j])
if (b.length === 4) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE && b[2] === 0 && b[3] === 0) {
return "utf-32le"
}
if (b[0] === 0 && b[1] === 0 && b[2] === 0xFE && b[3] === 0xFF) {
return "utf-32be"
}
}
if (b[0] !== 0 || b[1] > 0x10) invalidBE++
if (b[3] !== 0 || b[2] > 0x10) invalidLE++
if (b[0] === 0 && b[1] === 0 && (b[2] !== 0 || b[3] !== 0)) bmpCharsBE++
if ((b[0] !== 0 || b[1] !== 0) && b[2] === 0 && b[3] === 0) bmpCharsLE++
b.length = 0
charsProcessed++
if (charsProcessed >= 100) {
break outerLoop
}
}
}
}
// Make decisions.
if (bmpCharsBE - invalidBE > bmpCharsLE - invalidLE) return 'utf-32be';
if (bmpCharsBE - invalidBE < bmpCharsLE - invalidLE) return 'utf-32le';
// Make decisions.
if (bmpCharsBE - invalidBE > bmpCharsLE - invalidLE) return "utf-32be"
if (bmpCharsBE - invalidBE < bmpCharsLE - invalidLE) return "utf-32le"
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || 'utf-32le';
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || "utf-32le"
}

View File

@@ -1,122 +1,122 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
exports.utf7 = Utf7Codec;
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
function Utf7Codec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf7 = Utf7Codec
exports.unicode11utf7 = "utf7" // Alias UNICODE-1-1-UTF-7
function Utf7Codec (codecOptions, iconv) {
this.iconv = iconv
};
Utf7Codec.prototype.encoder = Utf7Encoder;
Utf7Codec.prototype.decoder = Utf7Decoder;
Utf7Codec.prototype.bomAware = true;
Utf7Codec.prototype.encoder = Utf7Encoder
Utf7Codec.prototype.decoder = Utf7Decoder
Utf7Codec.prototype.bomAware = true
// -- Encoding
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
// Why scape ()?./?
// eslint-disable-next-line no-useless-escape
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g
function Utf7Encoder(options, codec) {
this.iconv = codec.iconv;
function Utf7Encoder (options, codec) {
this.iconv = codec.iconv
}
Utf7Encoder.prototype.write = function(str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function(chunk) {
return "+" + (chunk === '+' ? '' :
this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
+ "-";
}.bind(this)));
Utf7Encoder.prototype.write = function (str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function (chunk) {
return "+" + (chunk === "+"
? ""
: this.iconv.encode(chunk, "utf16-be").toString("base64").replace(/=+$/, "")) +
"-"
}.bind(this)))
}
Utf7Encoder.prototype.end = function() {
Utf7Encoder.prototype.end = function () {
}
// -- Decoding
function Utf7Decoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
function Utf7Decoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
}
var base64Regex = /[A-Za-z0-9\/+]/;
var base64Chars = [];
for (var i = 0; i < 256; i++)
base64Chars[i] = base64Regex.test(String.fromCharCode(i));
// Why scape /?
// eslint-disable-next-line no-useless-escape
var base64Regex = /[A-Za-z0-9\/+]/
var base64Chars = []
for (var i = 0; i < 256; i++) { base64Chars[i] = base64Regex.test(String.fromCharCode(i)) }
var plusChar = '+'.charCodeAt(0),
minusChar = '-'.charCodeAt(0),
andChar = '&'.charCodeAt(0);
var plusChar = "+".charCodeAt(0)
var minusChar = "-".charCodeAt(0)
var andChar = "&".charCodeAt(0)
Utf7Decoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
Utf7Decoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
// The decoder is more involved as we must handle chunks in stream.
// The decoder is more involved as we must handle chunks in stream.
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
res += "+";
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii");
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "+-" -> "+"
res += "+"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii");
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii")
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
this.inBase64 = inBase64
this.base64Accum = base64Accum
return res;
return res
}
Utf7Decoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
Utf7Decoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
this.inBase64 = false;
this.base64Accum = '';
return res;
this.inBase64 = false
this.base64Accum = ""
return res
}
// UTF-7-IMAP codec.
// RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
// Differences:
@@ -128,163 +128,156 @@ Utf7Decoder.prototype.end = function() {
// * String must end in non-shifted position.
// * "-&" while in base64 is not allowed.
exports.utf7imap = Utf7IMAPCodec;
function Utf7IMAPCodec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf7imap = Utf7IMAPCodec
function Utf7IMAPCodec (codecOptions, iconv) {
this.iconv = iconv
};
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
Utf7IMAPCodec.prototype.bomAware = true;
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder
Utf7IMAPCodec.prototype.bomAware = true
// -- Encoding
function Utf7IMAPEncoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = Buffer.alloc(6);
this.base64AccumIdx = 0;
function Utf7IMAPEncoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = Buffer.alloc(6)
this.base64AccumIdx = 0
}
Utf7IMAPEncoder.prototype.write = function(str) {
var inBase64 = this.inBase64,
base64Accum = this.base64Accum,
base64AccumIdx = this.base64AccumIdx,
buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0;
Utf7IMAPEncoder.prototype.write = function (str) {
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
var base64AccumIdx = this.base64AccumIdx
var buf = Buffer.alloc(str.length * 5 + 10); var bufIdx = 0
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i);
if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
base64AccumIdx = 0;
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
inBase64 = false;
}
if (!inBase64) {
buf[bufIdx++] = uChar; // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
buf[bufIdx++] = minusChar;
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
inBase64 = true;
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8;
base64Accum[base64AccumIdx++] = uChar & 0xFF;
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
base64AccumIdx = 0;
}
}
}
}
this.inBase64 = inBase64;
this.base64AccumIdx = base64AccumIdx;
return buf.slice(0, bufIdx);
}
Utf7IMAPEncoder.prototype.end = function() {
var buf = Buffer.alloc(10), bufIdx = 0;
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
this.base64AccumIdx = 0;
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i)
if (uChar >= 0x20 && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
base64AccumIdx = 0
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
this.inBase64 = false;
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
inBase64 = false
}
return buf.slice(0, bufIdx);
if (!inBase64) {
buf[bufIdx++] = uChar // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
{ buf[bufIdx++] = minusChar }
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar // Write '&', then go to base64 mode.
inBase64 = true
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8
base64Accum[base64AccumIdx++] = uChar & 0xFF
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString("base64").replace(/\//g, ","), bufIdx)
base64AccumIdx = 0
}
}
}
}
this.inBase64 = inBase64
this.base64AccumIdx = base64AccumIdx
return buf.slice(0, bufIdx)
}
Utf7IMAPEncoder.prototype.end = function () {
var buf = Buffer.alloc(10); var bufIdx = 0
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
this.base64AccumIdx = 0
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
this.inBase64 = false
}
return buf.slice(0, bufIdx)
}
// -- Decoding
function Utf7IMAPDecoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
function Utf7IMAPDecoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
}
var base64IMAPChars = base64Chars.slice();
base64IMAPChars[','.charCodeAt(0)] = true;
var base64IMAPChars = base64Chars.slice()
base64IMAPChars[",".charCodeAt(0)] = true
Utf7IMAPDecoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
Utf7IMAPDecoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&";
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, '/');
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, "/")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, '/');
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, "/")
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
this.inBase64 = inBase64
this.base64Accum = base64Accum
return res;
return res
}
Utf7IMAPDecoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
Utf7IMAPDecoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
this.inBase64 = false;
this.base64Accum = '';
return res;
this.inBase64 = false
this.base64Accum = ""
return res
}