This commit is contained in:
2026-03-15 12:22:42 +01:00
parent cd99275933
commit 311ba5e7f3
558 changed files with 55182 additions and 22981 deletions

124
node_modules/iconv-lite/Changelog.md generated vendored
View File

@@ -1,124 +1,50 @@
## 0.7.0
### 🐞 Bug fixes
* Handle split surrogate pairs when encoding utf8 - by [@yosion-p](https://github.com/yosion-p) and [@ashtuchkin](https://github.com/ashtuchkin) in [#282](https://github.com/ashtuchkin/iconv-lite/pull/282):
Handle a case where streaming utf8 encoder (converting js strings -> buffers) encounters
surrogate pairs split between chunks (last character of one chunk is high surrogate and first
character of the next chunk is a low surrogate).
* Avoid false positives in encodingExists by using objects without a prototype - by [@bjohansebas](https://github.com/bjohansebas) in [#328](https://github.com/ashtuchkin/iconv-lite/pull/328)
The encodingExists method could return incorrect results if the lookup matched properties inherited
from the prototype of the object that stores the encodings, such as constructor and others. This change
replaces that object with one that has no prototype, ensuring that only explicitly defined valid encodings
in the library are considered. In addition, the fix is applied to the internal cache system to avoid the same
kind of false positives
### 🚀 Improvements
* Make explicit that decode() method supports Uint8Array input - by [@jardicc](https://github.com/jardicc) in [#271](https://github.com/ashtuchkin/iconv-lite/pull/271)
* Remove compatibility check for StringDecoder.end method - by [@bjohansebas](https://github.com/bjohansebas) in [#331](https://github.com/ashtuchkin/iconv-lite/pull/331)
## 0.6.3 / 2021-05-23
* Fix HKSCS encoding to prefer Big5 codes if both Big5 and HKSCS codes are possible (#264)
## 0.6.2 / 2020-07-08
* Support Uint8Array-s decoding without conversion to Buffers, plus fix an edge case.
## 0.6.1 / 2020-06-28
* Support Uint8Array-s directly when decoding (#246, by @gyzerok)
* Unify package.json version ranges to be strictly semver-compatible (#241)
* Fix minor issue in UTF-32 decoder's endianness detection code.
## 0.6.0 / 2020-06-08
* Updated 'gb18030' encoding to :2005 edition (see https://github.com/whatwg/encoding/issues/22).
* Removed `iconv.extendNodeEncodings()` mechanism. It was deprecated 5 years ago and didn't work
in recent Node versions.
* Reworked Streaming API behavior in browser environments to fix #204. Streaming API will be
excluded by default in browser packs, saving ~100Kb bundle size, unless enabled explicitly using
`iconv.enableStreamingAPI(require('stream'))`.
* Updates to development environment & tests:
* Added ./test/webpack private package to test complex new use cases that need custom environment.
It's tested as a separate job in Travis CI.
* Updated generation code for the new EUC-KR index file format from Encoding Standard.
* Removed Buffer() constructor in tests (#197 by @gabrielschulhof).
## 0.5.2 / 2020-06-08
* Added `iconv.getEncoder()` and `iconv.getDecoder()` methods to typescript definitions (#229).
* Fixed semver version to 6.1.2 to support Node 8.x (by @tanandara).
* Capped iconv version to 2.x as 3.x has dropped support for older Node versions.
* Switched from instanbul to c8 for code coverage.
## 0.5.1 / 2020-01-18
* Added cp720 encoding (#221, by @kr-deps)
* (minor) Changed Changelog.md formatting to use h2.
## 0.5.0 / 2019-06-26
* Added UTF-32 encoding, both little-endian and big-endian variants (UTF-32LE, UTF32-BE). If endianness
is not provided for decoding, it's deduced automatically from the stream using a heuristic similar to
what we use in UTF-16. (great work in #216 by @kshetline)
* Several minor updates to README (#217 by @oldj, plus some more)
* Added Node versions 10 and 12 to Travis test harness.
## 0.4.24 / 2018-08-22
# 0.4.24 / 2018-08-22
* Added MIK encoding (#196, by @Ivan-Kalatchev)
## 0.4.23 / 2018-05-07
# 0.4.23 / 2018-05-07
* Fix deprecation warning in Node v10 due to the last usage of `new Buffer` (#185, by @felixbuenemann)
* Switched from NodeBuffer to Buffer in typings (#155 by @felixfbecker, #186 by @larssn)
## 0.4.22 / 2018-05-05
# 0.4.22 / 2018-05-05
* Use older semver style for dependencies to be compatible with Node version 0.10 (#182, by @dougwilson)
* Fix tests to accomodate fixes in Node v10 (#182, by @dougwilson)
## 0.4.21 / 2018-04-06
# 0.4.21 / 2018-04-06
* Fix encoding canonicalization (#156)
* Fix the paths in the "browser" field in package.json (#174 by @LMLB)
* Removed "contributors" section in package.json - see Git history instead.
## 0.4.20 / 2018-04-06
# 0.4.20 / 2018-04-06
* Updated `new Buffer()` usages with recommended replacements as it's being deprecated in Node v10 (#176, #178 by @ChALkeR)
## 0.4.19 / 2017-09-09
# 0.4.19 / 2017-09-09
* Fixed iso8859-1 codec regression in handling untranslatable characters (#162, caused by #147)
* Re-generated windows1255 codec, because it was updated in iconv project
* Fixed grammar in error message when iconv-lite is loaded with encoding other than utf8
## 0.4.18 / 2017-06-13
# 0.4.18 / 2017-06-13
* Fixed CESU-8 regression in Node v8.
## 0.4.17 / 2017-04-22
# 0.4.17 / 2017-04-22
* Updated typescript definition file to support Angular 2 AoT mode (#153 by @larssn)
## 0.4.16 / 2017-04-22
# 0.4.16 / 2017-04-22
* Added support for React Native (#150)
* Changed iso8859-1 encoding to usine internal 'binary' encoding, as it's the same thing (#147 by @mscdex)
@@ -127,12 +53,12 @@
* Added a warning if iconv-lite is loaded not as utf-8 (see #142)
## 0.4.15 / 2016-11-21
# 0.4.15 / 2016-11-21
* Fixed typescript type definition (#137)
## 0.4.14 / 2016-11-20
# 0.4.14 / 2016-11-20
* Preparation for v1.0
* Added Node v6 and latest Node versions to Travis CI test rig
@@ -142,12 +68,12 @@
* Add ms prefix to dbcs windows encodings (@rokoroku)
## 0.4.13 / 2015-10-01
# 0.4.13 / 2015-10-01
* Fix silly mistake in deprecation notice.
## 0.4.12 / 2015-09-26
# 0.4.12 / 2015-09-26
* Node v4 support:
* Added CESU-8 decoding (#106)
@@ -155,18 +81,18 @@
* Added Travis tests for Node v4 and io.js latest (#105 by @Mithgol)
## 0.4.11 / 2015-07-03
# 0.4.11 / 2015-07-03
* Added CESU-8 encoding.
## 0.4.10 / 2015-05-26
# 0.4.10 / 2015-05-26
* Changed UTF-16 endianness heuristic to take into account any ASCII chars, not
just spaces. This should minimize the importance of "default" endianness.
## 0.4.9 / 2015-05-24
# 0.4.9 / 2015-05-24
* Streamlined BOM handling: strip BOM by default, add BOM when encoding if
addBOM: true. Added docs to Readme.
@@ -178,12 +104,12 @@
* Use strict mode in all files.
## 0.4.8 / 2015-04-14
# 0.4.8 / 2015-04-14
* added alias UNICODE-1-1-UTF-7 for UTF-7 encoding (#94)
## 0.4.7 / 2015-02-05
# 0.4.7 / 2015-02-05
* stop official support of Node.js v0.8. Should still work, but no guarantees.
reason: Packages needed for testing are hard to get on Travis CI.
@@ -191,40 +117,40 @@
props (#89).
## 0.4.6 / 2015-01-12
# 0.4.6 / 2015-01-12
* fix rare aliases of single-byte encodings (thanks @mscdex)
* double the timeout for dbcs tests to make them less flaky on travis
## 0.4.5 / 2014-11-20
# 0.4.5 / 2014-11-20
* fix windows-31j and x-sjis encoding support (@nleush)
* minor fix: undefined variable reference when internal error happens
## 0.4.4 / 2014-07-16
# 0.4.4 / 2014-07-16
* added encodings UTF-7 (RFC2152) and UTF-7-IMAP (RFC3501 Section 5.1.3)
* fixed streaming base64 encoding
## 0.4.3 / 2014-06-14
# 0.4.3 / 2014-06-14
* added encodings UTF-16BE and UTF-16 with BOM
## 0.4.2 / 2014-06-12
# 0.4.2 / 2014-06-12
* don't throw exception if `extendNodeEncodings()` is called more than once
## 0.4.1 / 2014-06-11
# 0.4.1 / 2014-06-11
* codepage 808 added
## 0.4.0 / 2014-06-10
# 0.4.0 / 2014-06-10
* code is rewritten from scratch
* all widespread encodings are supported

112
node_modules/iconv-lite/README.md generated vendored
View File

@@ -1,40 +1,38 @@
## iconv-lite: Pure JS character encoding conversion
## Pure JS character encoding conversion [![Build Status](https://travis-ci.org/ashtuchkin/iconv-lite.svg?branch=master)](https://travis-ci.org/ashtuchkin/iconv-lite)
[![NPM Version][npm-version-image]][npm-url]
[![NPM Downloads][npm-downloads-image]][npm-downloads-url]
[![License][license-image]][license-url]
[![NPM Install Size][npm-install-size-image]][npm-install-size-url]
* Doesn't need native code compilation. Works on Windows and in sandboxed environments like [Cloud9](http://c9.io).
* Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),
[Grunt](http://gruntjs.com/), [Nodemailer](http://www.nodemailer.com/), [Yeoman](http://yeoman.io/) and others.
* Faster than [node-iconv](https://github.com/bnoordhuis/node-iconv) (see below for performance comparison).
* Intuitive encode/decode API
* Streaming support for Node v0.10+
* [Deprecated] Can extend Node.js primitives (buffers, streams) to support all iconv-lite encodings.
* In-browser usage via [Browserify](https://github.com/substack/node-browserify) (~180k gzip compressed with Buffer shim included).
* Typescript [type definition file](https://github.com/ashtuchkin/iconv-lite/blob/master/lib/index.d.ts) included.
* React Native is supported (need to explicitly `npm install` two more modules: `buffer` and `stream`).
* License: MIT.
* No need for native code compilation. Quick to install, works on Windows, Web, and in sandboxed environments.
* Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),
[Grunt](http://gruntjs.com/), [Nodemailer](http://www.nodemailer.com/), [Yeoman](http://yeoman.io/) and others.
* Faster than [node-iconv](https://github.com/bnoordhuis/node-iconv) (see below for performance comparison).
* Intuitive encode/decode API, including Streaming support.
* In-browser usage via [browserify](https://github.com/substack/node-browserify) or [webpack](https://webpack.js.org/) (~180kb gzip compressed with Buffer shim included).
* Typescript [type definition file](https://github.com/ashtuchkin/iconv-lite/blob/master/lib/index.d.ts) included.
* React Native is supported (need to install `stream` module to enable Streaming API).
[![NPM Stats](https://nodei.co/npm/iconv-lite.png?downloads=true&downloadRank=true)](https://npmjs.org/packages/iconv-lite/)
## Usage
### Basic API
```javascript
var iconv = require('iconv-lite');
// Convert from an encoded buffer to a js string.
// Convert from an encoded buffer to js string.
str = iconv.decode(Buffer.from([0x68, 0x65, 0x6c, 0x6c, 0x6f]), 'win1251');
// Convert from a js string to an encoded buffer.
// Convert from js string to an encoded buffer.
buf = iconv.encode("Sample input string", 'win1251');
// Check if encoding is supported
iconv.encodingExists("us-ascii")
```
### Streaming API
### Streaming API (Node v0.10+)
```javascript
// Decode stream (from binary data stream to js strings)
// Decode stream (from binary stream to js strings)
http.createServer(function(req, res) {
var converterStream = iconv.decodeStream('win1251');
req.pipe(converterStream);
@@ -59,10 +57,44 @@ http.createServer(function(req, res) {
});
```
### [Deprecated] Extend Node.js own encodings
> NOTE: This doesn't work on latest Node versions. See [details](https://github.com/ashtuchkin/iconv-lite/wiki/Node-v4-compatibility).
```javascript
// After this call all Node basic primitives will understand iconv-lite encodings.
iconv.extendNodeEncodings();
// Examples:
buf = new Buffer(str, 'win1251');
buf.write(str, 'gbk');
str = buf.toString('latin1');
assert(Buffer.isEncoding('iso-8859-15'));
Buffer.byteLength(str, 'us-ascii');
http.createServer(function(req, res) {
req.setEncoding('big5');
req.collect(function(err, body) {
console.log(body);
});
});
fs.createReadStream("file.txt", "shift_jis");
// External modules are also supported (if they use Node primitives, which they probably do).
request = require('request');
request({
url: "http://github.com/",
encoding: "cp932"
});
// To remove extensions
iconv.undoExtendNodeEncodings();
```
## Supported encodings
* All node.js native encodings: utf8, ucs2 / utf16-le, ascii, binary, base64, hex.
* Additional unicode encodings: utf16, utf16-be, utf-7, utf-7-imap, utf32, utf32-le, and utf32-be.
* Additional unicode encodings: utf16, utf16-be, utf-7, utf-7-imap.
* All widespread singlebyte encodings: Windows 125x family, ISO-8859 family,
IBM/DOS codepages, Macintosh family, KOI8 family, all others supported by iconv library.
Aliases like 'latin1', 'us-ascii' also supported.
@@ -74,6 +106,7 @@ Most singlebyte encodings are generated automatically from [node-iconv](https://
Multibyte encodings are generated from [Unicode.org mappings](http://www.unicode.org/Public/MAPPINGS/) and [WHATWG Encoding Standard mappings](http://encoding.spec.whatwg.org/). Thank you, respective authors!
## Encoding/decoding speed
Comparison with node-iconv module (1000x256kb, on MacBook Pro, Core i5/2.6 GHz, Node v0.12.0).
@@ -100,39 +133,24 @@ smart about endianness in the following ways:
overridden with `defaultEncoding: 'utf-16be'` option. Strips BOM unless `stripBOM: false`.
* Encoding: uses UTF-16LE and writes BOM by default. Use `addBOM: false` to override.
## UTF-32 Encodings
This library supports UTF-32LE, UTF-32BE and UTF-32 encodings. Like the UTF-16 encoding above, UTF-32 defaults to UTF-32LE, but uses BOM and 'spaces heuristics' to determine input endianness.
* The default of UTF-32LE can be overridden with the `defaultEncoding: 'utf-32be'` option. Strips BOM unless `stripBOM: false`.
* Encoding: uses UTF-32LE and writes BOM by default. Use `addBOM: false` to override. (`defaultEncoding: 'utf-32be'` can also be used here to change encoding.)
## Other notes
When decoding, be sure to supply a Buffer to decode() method, otherwise [bad things usually happen](https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding).
Untranslatable characters are set to <20> or ?. No transliteration is currently supported.
Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see [#65](https://github.com/ashtuchkin/iconv-lite/issues/65), [#77](https://github.com/ashtuchkin/iconv-lite/issues/77)).
Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see #65, #77).
## Testing
```sh
git clone git@github.com:ashtuchkin/iconv-lite.git
cd iconv-lite
npm install
npm test
```bash
$ git clone git@github.com:ashtuchkin/iconv-lite.git
$ cd iconv-lite
$ npm install
$ npm test
# To view performance:
npm run test:performance
$ # To view performance:
$ node test/performance.js
# To view test coverage:
npm run test:cov
open coverage/index.html
$ # To view test coverage:
$ npm run coverage
$ open coverage/lcov-report/index.html
```
[npm-downloads-image]: https://badgen.net/npm/dm/iconv-lite
[npm-downloads-url]: https://npmcharts.com/compare/iconv-lite?minimal=true
[npm-url]: https://npmjs.org/package/iconv-lite
[npm-version-image]: https://badgen.net/npm/v/iconv-lite
[npm-install-size-image]: https://badgen.net/packagephobia/install/iconv-lite
[npm-install-size-url]: https://packagephobia.com/result?p=iconv-lite
[license-image]: https://img.shields.io/npm/l/iconv-lite.svg
[license-url]: https://github.com/ashtuchkin/iconv-lite/blob/HEAD/LICENSE

File diff suppressed because it is too large Load Diff

View File

@@ -1,185 +1,176 @@
"use strict"
"use strict";
// Description of supported double byte encodings and aliases.
// Tables are not require()-d until they are needed to speed up library load.
// require()-s are direct to support Browserify.
module.exports = {
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
'shiftjis': {
type: '_dbcs',
table: function() { return require('./tables/shiftjis.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
encodeSkipVals: [{from: 0xED40, to: 0xF940}],
},
'csshiftjis': 'shiftjis',
'mskanji': 'shiftjis',
'sjis': 'shiftjis',
'windows31j': 'shiftjis',
'ms31j': 'shiftjis',
'xsjis': 'shiftjis',
'windows932': 'shiftjis',
'ms932': 'shiftjis',
'932': 'shiftjis',
'cp932': 'shiftjis',
shiftjis: {
type: "_dbcs",
table: function () { return require("./tables/shiftjis.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E },
encodeSkipVals: [{ from: 0xED40, to: 0xF940 }]
},
csshiftjis: "shiftjis",
mskanji: "shiftjis",
sjis: "shiftjis",
windows31j: "shiftjis",
ms31j: "shiftjis",
xsjis: "shiftjis",
windows932: "shiftjis",
ms932: "shiftjis",
932: "shiftjis",
cp932: "shiftjis",
'eucjp': {
type: '_dbcs',
table: function() { return require('./tables/eucjp.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
},
eucjp: {
type: "_dbcs",
table: function () { return require("./tables/eucjp.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E }
},
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
gb2312: "cp936",
gb231280: "cp936",
gb23121980: "cp936",
csgb2312: "cp936",
csiso58gb231280: "cp936",
euccn: "cp936",
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
'gb2312': 'cp936',
'gb231280': 'cp936',
'gb23121980': 'cp936',
'csgb2312': 'cp936',
'csiso58gb231280': 'cp936',
'euccn': 'cp936',
// Microsoft's CP936 is a subset and approximation of GBK.
windows936: "cp936",
ms936: "cp936",
936: "cp936",
cp936: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json") }
},
// Microsoft's CP936 is a subset and approximation of GBK.
'windows936': 'cp936',
'ms936': 'cp936',
'936': 'cp936',
'cp936': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json') },
},
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
gbk: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) }
},
xgbk: "gbk",
isoir58: "gbk",
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
'gbk': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
},
'xgbk': 'gbk',
'isoir58': 'gbk',
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
gb18030: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) },
gb18030: function () { return require("./tables/gb18030-ranges.json") },
encodeSkipVals: [0x80],
encodeAdd: { "€": 0xA2E3 }
},
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
'gb18030': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
gb18030: function() { return require('./tables/gb18030-ranges.json') },
encodeSkipVals: [0x80],
encodeAdd: {'€': 0xA2E3},
},
chinese: "gb18030",
'chinese': 'gb18030',
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
windows949: "cp949",
ms949: "cp949",
949: "cp949",
cp949: {
type: "_dbcs",
table: function () { return require("./tables/cp949.json") }
},
cseuckr: "cp949",
csksc56011987: "cp949",
euckr: "cp949",
isoir149: "cp949",
korean: "cp949",
ksc56011987: "cp949",
ksc56011989: "cp949",
ksc5601: "cp949",
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
'windows949': 'cp949',
'ms949': 'cp949',
'949': 'cp949',
'cp949': {
type: '_dbcs',
table: function() { return require('./tables/cp949.json') },
},
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
'cseuckr': 'cp949',
'csksc56011987': 'cp949',
'euckr': 'cp949',
'isoir149': 'cp949',
'korean': 'cp949',
'ksc56011987': 'cp949',
'ksc56011989': 'cp949',
'ksc5601': 'cp949',
windows950: "cp950",
ms950: "cp950",
950: "cp950",
cp950: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json") }
},
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
big5: "big5hkscs",
big5hkscs: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json").concat(require("./tables/big5-added.json")) },
encodeSkipVals: [
// Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of
// https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU.
// But if a single unicode point can be encoded both as HKSCS and regular Big5, we prefer the latter.
0x8e69, 0x8e6f, 0x8e7e, 0x8eab, 0x8eb4, 0x8ecd, 0x8ed0, 0x8f57, 0x8f69, 0x8f6e, 0x8fcb, 0x8ffe,
0x906d, 0x907a, 0x90c4, 0x90dc, 0x90f1, 0x91bf, 0x92af, 0x92b0, 0x92b1, 0x92b2, 0x92d1, 0x9447, 0x94ca,
0x95d9, 0x96fc, 0x9975, 0x9b76, 0x9b78, 0x9b7b, 0x9bc6, 0x9bde, 0x9bec, 0x9bf6, 0x9c42, 0x9c53, 0x9c62,
0x9c68, 0x9c6b, 0x9c77, 0x9cbc, 0x9cbd, 0x9cd0, 0x9d57, 0x9d5a, 0x9dc4, 0x9def, 0x9dfb, 0x9ea9, 0x9eef,
0x9efd, 0x9f60, 0x9fcb, 0xa077, 0xa0dc, 0xa0df, 0x8fcc, 0x92c8, 0x9644, 0x96ed,
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
// Step 2 of https://encoding.spec.whatwg.org/#index-big5-pointer: Use last pointer for U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345
0xa2a4, 0xa2a5, 0xa2a7, 0xa2a6, 0xa2cc, 0xa2ce
]
},
'windows950': 'cp950',
'ms950': 'cp950',
'950': 'cp950',
'cp950': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json') },
},
cnbig5: "big5hkscs",
csbig5: "big5hkscs",
xxbig5: "big5hkscs"
}
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
'big5': 'big5hkscs',
'big5hkscs': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
encodeSkipVals: [0xa2cc],
},
'cnbig5': 'big5hkscs',
'csbig5': 'big5hkscs',
'xxbig5': 'big5hkscs',
};

View File

@@ -1,23 +1,22 @@
"use strict"
var mergeModules = require("../lib/helpers/merge-exports")
"use strict";
// Update this array if you add/rename/remove files in this directory.
// We support Browserify by skipping automatic module discovery and requiring modules directly.
var modules = [
require("./internal"),
require("./utf32"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data")
]
require("./internal"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data"),
];
// Put all encoding/alias/codec definitions to single object and export it.
// Put all encoding/alias/codec definitions to single object and export it.
for (var i = 0; i < modules.length; i++) {
var module = modules[i]
mergeModules(exports, module)
var module = modules[i];
for (var enc in module)
if (Object.prototype.hasOwnProperty.call(module, enc))
exports[enc] = module[enc];
}

View File

@@ -1,218 +1,188 @@
"use strict"
var Buffer = require("safer-buffer").Buffer
"use strict";
var Buffer = require("safer-buffer").Buffer;
// Export Node.js internal encodings.
module.exports = {
// Encodings
utf8: { type: "_internal", bomAware: true },
cesu8: { type: "_internal", bomAware: true },
unicode11utf8: "utf8",
// Encodings
utf8: { type: "_internal", bomAware: true},
cesu8: { type: "_internal", bomAware: true},
unicode11utf8: "utf8",
ucs2: { type: "_internal", bomAware: true },
utf16le: "ucs2",
ucs2: { type: "_internal", bomAware: true},
utf16le: "ucs2",
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
// Codec.
_internal: InternalCodec
}
// Codec.
_internal: InternalCodec,
};
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
function InternalCodec (codecOptions, iconv) {
this.enc = codecOptions.encodingName
this.bomAware = codecOptions.bomAware
function InternalCodec(codecOptions, iconv) {
this.enc = codecOptions.encodingName;
this.bomAware = codecOptions.bomAware;
if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
this.enc = "utf8" // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8
if (this.enc === "base64")
this.encoder = InternalEncoderBase64;
else if (this.enc === "cesu8") {
this.enc = "utf8"; // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8;
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
this.decoder = InternalDecoderCesu8
this.defaultCharUnicode = iconv.defaultCharUnicode
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') {
this.decoder = InternalDecoderCesu8;
this.defaultCharUnicode = iconv.defaultCharUnicode;
}
}
}
}
InternalCodec.prototype.encoder = InternalEncoder
InternalCodec.prototype.decoder = InternalDecoder
InternalCodec.prototype.encoder = InternalEncoder;
InternalCodec.prototype.decoder = InternalDecoder;
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// We use node.js internal decoder. Its signature is the same as ours.
var StringDecoder = require("string_decoder").StringDecoder
var StringDecoder = require('string_decoder').StringDecoder;
function InternalDecoder (options, codec) {
this.decoder = new StringDecoder(codec.enc)
if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
StringDecoder.prototype.end = function() {};
function InternalDecoder(options, codec) {
StringDecoder.call(this, codec.enc);
}
InternalDecoder.prototype.write = function (buf) {
if (!Buffer.isBuffer(buf)) {
buf = Buffer.from(buf)
}
InternalDecoder.prototype = StringDecoder.prototype;
return this.decoder.write(buf)
}
InternalDecoder.prototype.end = function () {
return this.decoder.end()
}
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// Encoder is mostly trivial
function InternalEncoder (options, codec) {
this.enc = codec.enc
function InternalEncoder(options, codec) {
this.enc = codec.enc;
}
InternalEncoder.prototype.write = function (str) {
return Buffer.from(str, this.enc)
InternalEncoder.prototype.write = function(str) {
return Buffer.from(str, this.enc);
}
InternalEncoder.prototype.end = function () {
InternalEncoder.prototype.end = function() {
}
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// Except base64 encoder, which must keep its state.
function InternalEncoderBase64 (options, codec) {
this.prevStr = ""
function InternalEncoderBase64(options, codec) {
this.prevStr = '';
}
InternalEncoderBase64.prototype.write = function (str) {
str = this.prevStr + str
var completeQuads = str.length - (str.length % 4)
this.prevStr = str.slice(completeQuads)
str = str.slice(0, completeQuads)
InternalEncoderBase64.prototype.write = function(str) {
str = this.prevStr + str;
var completeQuads = str.length - (str.length % 4);
this.prevStr = str.slice(completeQuads);
str = str.slice(0, completeQuads);
return Buffer.from(str, "base64")
return Buffer.from(str, "base64");
}
InternalEncoderBase64.prototype.end = function () {
return Buffer.from(this.prevStr, "base64")
InternalEncoderBase64.prototype.end = function() {
return Buffer.from(this.prevStr, "base64");
}
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// CESU-8 encoder is also special.
function InternalEncoderCesu8 (options, codec) {
function InternalEncoderCesu8(options, codec) {
}
InternalEncoderCesu8.prototype.write = function (str) {
var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i)
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
} else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12)
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
InternalEncoderCesu8.prototype.write = function(str) {
var buf = Buffer.alloc(str.length * 3), bufIdx = 0;
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i);
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80)
buf[bufIdx++] = charCode;
else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12);
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
}
}
return buf.slice(0, bufIdx)
return buf.slice(0, bufIdx);
}
InternalEncoderCesu8.prototype.end = function () {
InternalEncoderCesu8.prototype.end = function() {
}
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// CESU-8 decoder is not implemented in Node v4.0+
function InternalDecoderCesu8 (options, codec) {
this.acc = 0
this.contBytes = 0
this.accBytes = 0
this.defaultCharUnicode = codec.defaultCharUnicode
function InternalDecoderCesu8(options, codec) {
this.acc = 0;
this.contBytes = 0;
this.accBytes = 0;
this.defaultCharUnicode = codec.defaultCharUnicode;
}
InternalDecoderCesu8.prototype.write = function (buf) {
var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
var res = ""
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i]
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode
contBytes = 0
}
InternalDecoderCesu8.prototype.write = function(buf) {
var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
res = '';
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i];
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode;
contBytes = 0;
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte)
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F
contBytes = 1; accBytes = 1
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F
contBytes = 2; accBytes = 1
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f)
contBytes--; accBytes++
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0) {
res += this.defaultCharUnicode
} else if (accBytes === 3 && acc < 0x800) {
res += this.defaultCharUnicode
} else {
// Actually add character.
res += String.fromCharCode(acc)
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte);
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F;
contBytes = 1; accBytes = 1;
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F;
contBytes = 2; accBytes = 1;
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode;
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f);
contBytes--; accBytes++;
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0)
res += this.defaultCharUnicode;
else if (accBytes === 3 && acc < 0x800)
res += this.defaultCharUnicode;
else
// Actually add character.
res += String.fromCharCode(acc);
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode;
}
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode
}
}
}
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
return res
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
return res;
}
InternalDecoderCesu8.prototype.end = function () {
var res = 0
if (this.contBytes > 0) { res += this.defaultCharUnicode }
return res
}
// ------------------------------------------------------------------------------
// check the chunk boundaries for surrogate pair
function InternalEncoderUtf8 (options, codec) {
this.highSurrogate = ""
}
InternalEncoderUtf8.prototype.write = function (str) {
if (this.highSurrogate) {
str = this.highSurrogate + str
this.highSurrogate = ""
}
if (str.length > 0) {
var charCode = str.charCodeAt(str.length - 1)
if (charCode >= 0xd800 && charCode < 0xdc00) {
this.highSurrogate = str[str.length - 1]
str = str.slice(0, str.length - 1)
}
}
return Buffer.from(str, this.enc)
}
InternalEncoderUtf8.prototype.end = function () {
if (this.highSurrogate) {
var str = this.highSurrogate
this.highSurrogate = ""
return Buffer.from(str, this.enc)
}
InternalDecoderCesu8.prototype.end = function() {
var res = 0;
if (this.contBytes > 0)
res += this.defaultCharUnicode;
return res;
}

View File

@@ -1,75 +1,72 @@
"use strict"
var Buffer = require("safer-buffer").Buffer
"use strict";
var Buffer = require("safer-buffer").Buffer;
// Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that
// correspond to encoded bytes (if 128 - then lower half is ASCII).
// correspond to encoded bytes (if 128 - then lower half is ASCII).
exports._sbcs = SBCSCodec
function SBCSCodec (codecOptions, iconv) {
if (!codecOptions) {
throw new Error("SBCS codec is called without the data.")
}
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256)) {
throw new Error("Encoding '" + codecOptions.type + "' has incorrect 'chars' (must be of len 128 or 256)")
}
if (codecOptions.chars.length === 128) {
var asciiString = ""
for (var i = 0; i < 128; i++) {
asciiString += String.fromCharCode(i)
exports._sbcs = SBCSCodec;
function SBCSCodec(codecOptions, iconv) {
if (!codecOptions)
throw new Error("SBCS codec is called without the data.")
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (codecOptions.chars.length === 128) {
var asciiString = "";
for (var i = 0; i < 128; i++)
asciiString += String.fromCharCode(i);
codecOptions.chars = asciiString + codecOptions.chars;
}
codecOptions.chars = asciiString + codecOptions.chars
}
this.decodeBuf = Buffer.from(codecOptions.chars, "ucs2")
this.decodeBuf = Buffer.from(codecOptions.chars, 'ucs2');
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0));
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0))
for (var i = 0; i < codecOptions.chars.length; i++)
encodeBuf[codecOptions.chars.charCodeAt(i)] = i;
for (var i = 0; i < codecOptions.chars.length; i++) {
encodeBuf[codecOptions.chars.charCodeAt(i)] = i
}
this.encodeBuf = encodeBuf
this.encodeBuf = encodeBuf;
}
SBCSCodec.prototype.encoder = SBCSEncoder
SBCSCodec.prototype.decoder = SBCSDecoder
SBCSCodec.prototype.encoder = SBCSEncoder;
SBCSCodec.prototype.decoder = SBCSDecoder;
function SBCSEncoder (options, codec) {
this.encodeBuf = codec.encodeBuf
function SBCSEncoder(options, codec) {
this.encodeBuf = codec.encodeBuf;
}
SBCSEncoder.prototype.write = function (str) {
var buf = Buffer.alloc(str.length)
for (var i = 0; i < str.length; i++) {
buf[i] = this.encodeBuf[str.charCodeAt(i)]
}
return buf
SBCSEncoder.prototype.write = function(str) {
var buf = Buffer.alloc(str.length);
for (var i = 0; i < str.length; i++)
buf[i] = this.encodeBuf[str.charCodeAt(i)];
return buf;
}
SBCSEncoder.prototype.end = function () {
SBCSEncoder.prototype.end = function() {
}
function SBCSDecoder (options, codec) {
this.decodeBuf = codec.decodeBuf
function SBCSDecoder(options, codec) {
this.decodeBuf = codec.decodeBuf;
}
SBCSDecoder.prototype.write = function (buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf
var newBuf = Buffer.alloc(buf.length * 2)
var idx1 = 0; var idx2 = 0
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i] * 2; idx2 = i * 2
newBuf[idx2] = decodeBuf[idx1]
newBuf[idx2 + 1] = decodeBuf[idx1 + 1]
}
return newBuf.toString("ucs2")
SBCSDecoder.prototype.write = function(buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf;
var newBuf = Buffer.alloc(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = decodeBuf[idx1];
newBuf[idx2+1] = decodeBuf[idx1+1];
}
return newBuf.toString('ucs2');
}
SBCSDecoder.prototype.end = function () {
SBCSDecoder.prototype.end = function() {
}

View File

@@ -1,178 +1,174 @@
"use strict"
"use strict";
// Manually added data to be used by sbcs codec in addition to generated one.
module.exports = {
// Not supported by iconv, not sure why.
10029: "maccenteuro",
maccenteuro: {
type: "_sbcs",
chars: "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
// Not supported by iconv, not sure why.
"10029": "maccenteuro",
"maccenteuro": {
"type": "_sbcs",
"chars": "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
808: "cp808",
ibm808: "cp808",
cp808: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
"808": "cp808",
"ibm808": "cp808",
"cp808": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
mik: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
"mik": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
cp720: {
type: "_sbcs",
chars: "\x80\x81éâ\x84à\x86çêëèïî\x8d\x8e\x8f\x90\u0651\u0652ô¤ـûùءآأؤ£إئابةتثجحخدذرزسشص«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ضطظعغفµقكلمنهوىي≡\u064b\u064c\u064d\u064e\u064f\u0650≈°∙·√ⁿ²■\u00a0"
},
// Aliases of generated encodings.
"ascii8bit": "ascii",
"usascii": "ascii",
"ansix34": "ascii",
"ansix341968": "ascii",
"ansix341986": "ascii",
"csascii": "ascii",
"cp367": "ascii",
"ibm367": "ascii",
"isoir6": "ascii",
"iso646us": "ascii",
"iso646irv": "ascii",
"us": "ascii",
// Aliases of generated encodings.
ascii8bit: "ascii",
usascii: "ascii",
ansix34: "ascii",
ansix341968: "ascii",
ansix341986: "ascii",
csascii: "ascii",
cp367: "ascii",
ibm367: "ascii",
isoir6: "ascii",
iso646us: "ascii",
iso646irv: "ascii",
us: "ascii",
"latin1": "iso88591",
"latin2": "iso88592",
"latin3": "iso88593",
"latin4": "iso88594",
"latin5": "iso88599",
"latin6": "iso885910",
"latin7": "iso885913",
"latin8": "iso885914",
"latin9": "iso885915",
"latin10": "iso885916",
latin1: "iso88591",
latin2: "iso88592",
latin3: "iso88593",
latin4: "iso88594",
latin5: "iso88599",
latin6: "iso885910",
latin7: "iso885913",
latin8: "iso885914",
latin9: "iso885915",
latin10: "iso885916",
"csisolatin1": "iso88591",
"csisolatin2": "iso88592",
"csisolatin3": "iso88593",
"csisolatin4": "iso88594",
"csisolatincyrillic": "iso88595",
"csisolatinarabic": "iso88596",
"csisolatingreek" : "iso88597",
"csisolatinhebrew": "iso88598",
"csisolatin5": "iso88599",
"csisolatin6": "iso885910",
csisolatin1: "iso88591",
csisolatin2: "iso88592",
csisolatin3: "iso88593",
csisolatin4: "iso88594",
csisolatincyrillic: "iso88595",
csisolatinarabic: "iso88596",
csisolatingreek: "iso88597",
csisolatinhebrew: "iso88598",
csisolatin5: "iso88599",
csisolatin6: "iso885910",
"l1": "iso88591",
"l2": "iso88592",
"l3": "iso88593",
"l4": "iso88594",
"l5": "iso88599",
"l6": "iso885910",
"l7": "iso885913",
"l8": "iso885914",
"l9": "iso885915",
"l10": "iso885916",
l1: "iso88591",
l2: "iso88592",
l3: "iso88593",
l4: "iso88594",
l5: "iso88599",
l6: "iso885910",
l7: "iso885913",
l8: "iso885914",
l9: "iso885915",
l10: "iso885916",
"isoir14": "iso646jp",
"isoir57": "iso646cn",
"isoir100": "iso88591",
"isoir101": "iso88592",
"isoir109": "iso88593",
"isoir110": "iso88594",
"isoir144": "iso88595",
"isoir127": "iso88596",
"isoir126": "iso88597",
"isoir138": "iso88598",
"isoir148": "iso88599",
"isoir157": "iso885910",
"isoir166": "tis620",
"isoir179": "iso885913",
"isoir199": "iso885914",
"isoir203": "iso885915",
"isoir226": "iso885916",
isoir14: "iso646jp",
isoir57: "iso646cn",
isoir100: "iso88591",
isoir101: "iso88592",
isoir109: "iso88593",
isoir110: "iso88594",
isoir144: "iso88595",
isoir127: "iso88596",
isoir126: "iso88597",
isoir138: "iso88598",
isoir148: "iso88599",
isoir157: "iso885910",
isoir166: "tis620",
isoir179: "iso885913",
isoir199: "iso885914",
isoir203: "iso885915",
isoir226: "iso885916",
"cp819": "iso88591",
"ibm819": "iso88591",
cp819: "iso88591",
ibm819: "iso88591",
"cyrillic": "iso88595",
cyrillic: "iso88595",
"arabic": "iso88596",
"arabic8": "iso88596",
"ecma114": "iso88596",
"asmo708": "iso88596",
arabic: "iso88596",
arabic8: "iso88596",
ecma114: "iso88596",
asmo708: "iso88596",
"greek" : "iso88597",
"greek8" : "iso88597",
"ecma118" : "iso88597",
"elot928" : "iso88597",
greek: "iso88597",
greek8: "iso88597",
ecma118: "iso88597",
elot928: "iso88597",
"hebrew": "iso88598",
"hebrew8": "iso88598",
hebrew: "iso88598",
hebrew8: "iso88598",
"turkish": "iso88599",
"turkish8": "iso88599",
turkish: "iso88599",
turkish8: "iso88599",
"thai": "iso885911",
"thai8": "iso885911",
thai: "iso885911",
thai8: "iso885911",
"celtic": "iso885914",
"celtic8": "iso885914",
"isoceltic": "iso885914",
celtic: "iso885914",
celtic8: "iso885914",
isoceltic: "iso885914",
"tis6200": "tis620",
"tis62025291": "tis620",
"tis62025330": "tis620",
tis6200: "tis620",
tis62025291: "tis620",
tis62025330: "tis620",
"10000": "macroman",
"10006": "macgreek",
"10007": "maccyrillic",
"10079": "maciceland",
"10081": "macturkish",
10000: "macroman",
10006: "macgreek",
10007: "maccyrillic",
10079: "maciceland",
10081: "macturkish",
"cspc8codepage437": "cp437",
"cspc775baltic": "cp775",
"cspc850multilingual": "cp850",
"cspcp852": "cp852",
"cspc862latinhebrew": "cp862",
"cpgr": "cp869",
cspc8codepage437: "cp437",
cspc775baltic: "cp775",
cspc850multilingual: "cp850",
cspcp852: "cp852",
cspc862latinhebrew: "cp862",
cpgr: "cp869",
"msee": "cp1250",
"mscyrl": "cp1251",
"msansi": "cp1252",
"msgreek": "cp1253",
"msturk": "cp1254",
"mshebr": "cp1255",
"msarab": "cp1256",
"winbaltrim": "cp1257",
msee: "cp1250",
mscyrl: "cp1251",
msansi: "cp1252",
msgreek: "cp1253",
msturk: "cp1254",
mshebr: "cp1255",
msarab: "cp1256",
winbaltrim: "cp1257",
"cp20866": "koi8r",
"20866": "koi8r",
"ibm878": "koi8r",
"cskoi8r": "koi8r",
cp20866: "koi8r",
20866: "koi8r",
ibm878: "koi8r",
cskoi8r: "koi8r",
"cp21866": "koi8u",
"21866": "koi8u",
"ibm1168": "koi8u",
cp21866: "koi8u",
21866: "koi8u",
ibm1168: "koi8u",
"strk10482002": "rk1048",
strk10482002: "rk1048",
"tcvn5712": "tcvn",
"tcvn57121": "tcvn",
tcvn5712: "tcvn",
tcvn57121: "tcvn",
"gb198880": "iso646cn",
"cn": "iso646cn",
gb198880: "iso646cn",
cn: "iso646cn",
"csiso14jisc6220ro": "iso646jp",
"jisc62201969ro": "iso646jp",
"jp": "iso646jp",
csiso14jisc6220ro: "iso646jp",
jisc62201969ro: "iso646jp",
jp: "iso646jp",
"cshproman8": "hproman8",
"r8": "hproman8",
"roman8": "hproman8",
"xroman8": "hproman8",
"ibm1051": "hproman8",
cshproman8: "hproman8",
r8: "hproman8",
roman8: "hproman8",
xroman8: "hproman8",
ibm1051: "hproman8",
"mac": "macintosh",
"csmacintosh": "macintosh",
};
mac: "macintosh",
csmacintosh: "macintosh"
}

View File

@@ -27,7 +27,7 @@
["a7c2","",14],
["a7f2","",12],
["a896","",10],
["a8bc","ḿ"],
["a8bc",""],
["a8bf","ǹ"],
["a8c1",""],
["a8ea","",20],
@@ -51,6 +51,5 @@
["fca1","",93],
["fda1","",93],
["fe50","⺁⺄㑳㑇⺈⺋㖞㘚㘎⺌⺗㥮㤘㧏㧟㩳㧐㭎㱮㳠⺧⺪䁖䅟⺮䌷⺳⺶⺷䎱䎬⺻䏝䓖䙡䙌"],
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93],
["8135f437",""]
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93]
]

View File

@@ -1,66 +1,69 @@
"use strict"
var Buffer = require("safer-buffer").Buffer
"use strict";
var Buffer = require("safer-buffer").Buffer;
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
// == UTF16-BE codec. ==========================================================
exports.utf16be = Utf16BECodec
function Utf16BECodec () {
exports.utf16be = Utf16BECodec;
function Utf16BECodec() {
}
Utf16BECodec.prototype.encoder = Utf16BEEncoder
Utf16BECodec.prototype.decoder = Utf16BEDecoder
Utf16BECodec.prototype.bomAware = true
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
Utf16BECodec.prototype.bomAware = true;
// -- Encoding
function Utf16BEEncoder () {
function Utf16BEEncoder() {
}
Utf16BEEncoder.prototype.write = function (str) {
var buf = Buffer.from(str, "ucs2")
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i + 1]; buf[i + 1] = tmp
}
return buf
Utf16BEEncoder.prototype.write = function(str) {
var buf = Buffer.from(str, 'ucs2');
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
}
return buf;
}
Utf16BEEncoder.prototype.end = function () {
Utf16BEEncoder.prototype.end = function() {
}
// -- Decoding
function Utf16BEDecoder () {
this.overflowByte = -1
function Utf16BEDecoder() {
this.overflowByte = -1;
}
Utf16BEDecoder.prototype.write = function (buf) {
if (buf.length == 0) { return "" }
Utf16BEDecoder.prototype.write = function(buf) {
if (buf.length == 0)
return '';
var buf2 = Buffer.alloc(buf.length + 1)
var i = 0; var j = 0
var buf2 = Buffer.alloc(buf.length + 1),
i = 0, j = 0;
if (this.overflowByte !== -1) {
buf2[0] = buf[0]
buf2[1] = this.overflowByte
i = 1; j = 2
}
if (this.overflowByte !== -1) {
buf2[0] = buf[0];
buf2[1] = this.overflowByte;
i = 1; j = 2;
}
for (; i < buf.length - 1; i += 2, j += 2) {
buf2[j] = buf[i + 1]
buf2[j + 1] = buf[i]
}
for (; i < buf.length-1; i += 2, j+= 2) {
buf2[j] = buf[i+1];
buf2[j+1] = buf[i];
}
this.overflowByte = (i == buf.length - 1) ? buf[buf.length - 1] : -1
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
return buf2.slice(0, j).toString("ucs2")
return buf2.slice(0, j).toString('ucs2');
}
Utf16BEDecoder.prototype.end = function () {
this.overflowByte = -1
Utf16BEDecoder.prototype.end = function() {
}
// == UTF-16 codec =============================================================
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
// Defaults to UTF-16LE, as it's prevalent and default in Node.
@@ -69,119 +72,106 @@ Utf16BEDecoder.prototype.end = function () {
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
exports.utf16 = Utf16Codec
function Utf16Codec (codecOptions, iconv) {
this.iconv = iconv
exports.utf16 = Utf16Codec;
function Utf16Codec(codecOptions, iconv) {
this.iconv = iconv;
}
Utf16Codec.prototype.encoder = Utf16Encoder
Utf16Codec.prototype.decoder = Utf16Decoder
Utf16Codec.prototype.encoder = Utf16Encoder;
Utf16Codec.prototype.decoder = Utf16Decoder;
// -- Encoding (pass-through)
function Utf16Encoder (options, codec) {
options = options || {}
if (options.addBOM === undefined) { options.addBOM = true }
this.encoder = codec.iconv.getEncoder("utf-16le", options)
function Utf16Encoder(options, codec) {
options = options || {};
if (options.addBOM === undefined)
options.addBOM = true;
this.encoder = codec.iconv.getEncoder('utf-16le', options);
}
Utf16Encoder.prototype.write = function (str) {
return this.encoder.write(str)
Utf16Encoder.prototype.write = function(str) {
return this.encoder.write(str);
}
Utf16Encoder.prototype.end = function () {
return this.encoder.end()
Utf16Encoder.prototype.end = function() {
return this.encoder.end();
}
// -- Decoding
function Utf16Decoder (options, codec) {
this.decoder = null
this.initialBufs = []
this.initialBufsLen = 0
function Utf16Decoder(options, codec) {
this.decoder = null;
this.initialBytes = [];
this.initialBytesLen = 0;
this.options = options || {}
this.iconv = codec.iconv
this.options = options || {};
this.iconv = codec.iconv;
}
Utf16Decoder.prototype.write = function (buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf)
this.initialBufsLen += buf.length
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBytes.push(buf);
this.initialBytesLen += buf.length;
if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
return '';
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
{ return "" }
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.write(buf)
}
Utf16Decoder.prototype.end = function () {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
var trail = this.decoder.end()
if (trail) { resStr += trail }
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.end()
}
function detectEncoding (bufs, defaultEncoding) {
var b = []
var charsProcessed = 0
// Number of ASCII chars when decoded as LE or BE.
var asciiCharsLE = 0
var asciiCharsBE = 0
outerLoop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i]
for (var j = 0; j < buf.length; j++) {
b.push(buf[j])
if (b.length === 2) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE) return "utf-16le"
if (b[0] === 0xFE && b[1] === 0xFF) return "utf-16be"
}
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++
b.length = 0
charsProcessed++
if (charsProcessed >= 100) {
break outerLoop
}
}
// We have enough bytes -> detect endianness.
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
this.initialBytes.length = this.initialBytesLen = 0;
}
}
// Make decisions.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
if (asciiCharsBE > asciiCharsLE) return "utf-16be"
if (asciiCharsBE < asciiCharsLE) return "utf-16le"
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || "utf-16le"
return this.decoder.write(buf);
}
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
var res = this.decoder.write(buf),
trail = this.decoder.end();
return trail ? (res + trail) : res;
}
return this.decoder.end();
}
function detectEncoding(buf, defaultEncoding) {
var enc = defaultEncoding || 'utf-16le';
if (buf.length >= 2) {
// Check BOM.
if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
enc = 'utf-16be';
else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
enc = 'utf-16le';
else {
// No BOM found. Try to deduce encoding from initial content.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
for (var i = 0; i < _len; i += 2) {
if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
}
if (asciiCharsBE > asciiCharsLE)
enc = 'utf-16be';
else if (asciiCharsBE < asciiCharsLE)
enc = 'utf-16le';
}
}
return enc;
}

View File

@@ -1,122 +1,122 @@
"use strict"
var Buffer = require("safer-buffer").Buffer
"use strict";
var Buffer = require("safer-buffer").Buffer;
// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
exports.utf7 = Utf7Codec
exports.unicode11utf7 = "utf7" // Alias UNICODE-1-1-UTF-7
function Utf7Codec (codecOptions, iconv) {
this.iconv = iconv
exports.utf7 = Utf7Codec;
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
function Utf7Codec(codecOptions, iconv) {
this.iconv = iconv;
};
Utf7Codec.prototype.encoder = Utf7Encoder
Utf7Codec.prototype.decoder = Utf7Decoder
Utf7Codec.prototype.bomAware = true
Utf7Codec.prototype.encoder = Utf7Encoder;
Utf7Codec.prototype.decoder = Utf7Decoder;
Utf7Codec.prototype.bomAware = true;
// -- Encoding
// Why scape ()?./?
// eslint-disable-next-line no-useless-escape
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
function Utf7Encoder (options, codec) {
this.iconv = codec.iconv
function Utf7Encoder(options, codec) {
this.iconv = codec.iconv;
}
Utf7Encoder.prototype.write = function (str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function (chunk) {
return "+" + (chunk === "+"
? ""
: this.iconv.encode(chunk, "utf16-be").toString("base64").replace(/=+$/, "")) +
"-"
}.bind(this)))
Utf7Encoder.prototype.write = function(str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function(chunk) {
return "+" + (chunk === '+' ? '' :
this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
+ "-";
}.bind(this)));
}
Utf7Encoder.prototype.end = function () {
Utf7Encoder.prototype.end = function() {
}
// -- Decoding
function Utf7Decoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
function Utf7Decoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
}
// Why scape /?
// eslint-disable-next-line no-useless-escape
var base64Regex = /[A-Za-z0-9\/+]/
var base64Chars = []
for (var i = 0; i < 256; i++) { base64Chars[i] = base64Regex.test(String.fromCharCode(i)) }
var base64Regex = /[A-Za-z0-9\/+]/;
var base64Chars = [];
for (var i = 0; i < 256; i++)
base64Chars[i] = base64Regex.test(String.fromCharCode(i));
var plusChar = "+".charCodeAt(0)
var minusChar = "-".charCodeAt(0)
var andChar = "&".charCodeAt(0)
var plusChar = '+'.charCodeAt(0),
minusChar = '-'.charCodeAt(0),
andChar = '&'.charCodeAt(0);
Utf7Decoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
Utf7Decoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
// The decoder is more involved as we must handle chunks in stream.
// The decoder is more involved as we must handle chunks in stream.
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "+-" -> "+"
res += "+"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
res += "+";
} else {
var b64str = base64Accum + buf.slice(lastI, i).toString();
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii")
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + buf.slice(lastI).toString();
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
this.inBase64 = inBase64
this.base64Accum = base64Accum
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
return res
return res;
}
Utf7Decoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
Utf7Decoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
this.inBase64 = false
this.base64Accum = ""
return res
this.inBase64 = false;
this.base64Accum = '';
return res;
}
// UTF-7-IMAP codec.
// RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
// Differences:
@@ -128,156 +128,163 @@ Utf7Decoder.prototype.end = function () {
// * String must end in non-shifted position.
// * "-&" while in base64 is not allowed.
exports.utf7imap = Utf7IMAPCodec
function Utf7IMAPCodec (codecOptions, iconv) {
this.iconv = iconv
exports.utf7imap = Utf7IMAPCodec;
function Utf7IMAPCodec(codecOptions, iconv) {
this.iconv = iconv;
};
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder
Utf7IMAPCodec.prototype.bomAware = true
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
Utf7IMAPCodec.prototype.bomAware = true;
// -- Encoding
function Utf7IMAPEncoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = Buffer.alloc(6)
this.base64AccumIdx = 0
function Utf7IMAPEncoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = Buffer.alloc(6);
this.base64AccumIdx = 0;
}
Utf7IMAPEncoder.prototype.write = function (str) {
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
var base64AccumIdx = this.base64AccumIdx
var buf = Buffer.alloc(str.length * 5 + 10); var bufIdx = 0
Utf7IMAPEncoder.prototype.write = function(str) {
var inBase64 = this.inBase64,
base64Accum = this.base64Accum,
base64AccumIdx = this.base64AccumIdx,
buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0;
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i)
if (uChar >= 0x20 && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
base64AccumIdx = 0
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i);
if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
base64AccumIdx = 0;
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
inBase64 = false;
}
if (!inBase64) {
buf[bufIdx++] = uChar; // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
buf[bufIdx++] = minusChar;
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
inBase64 = true;
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8;
base64Accum[base64AccumIdx++] = uChar & 0xFF;
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
base64AccumIdx = 0;
}
}
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
inBase64 = false
}
if (!inBase64) {
buf[bufIdx++] = uChar // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
{ buf[bufIdx++] = minusChar }
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar // Write '&', then go to base64 mode.
inBase64 = true
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8
base64Accum[base64AccumIdx++] = uChar & 0xFF
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString("base64").replace(/\//g, ","), bufIdx)
base64AccumIdx = 0
}
}
}
}
this.inBase64 = inBase64
this.base64AccumIdx = base64AccumIdx
return buf.slice(0, bufIdx)
}
Utf7IMAPEncoder.prototype.end = function () {
var buf = Buffer.alloc(10); var bufIdx = 0
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
this.base64AccumIdx = 0
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
this.inBase64 = false
}
this.inBase64 = inBase64;
this.base64AccumIdx = base64AccumIdx;
return buf.slice(0, bufIdx)
return buf.slice(0, bufIdx);
}
Utf7IMAPEncoder.prototype.end = function() {
var buf = Buffer.alloc(10), bufIdx = 0;
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
this.base64AccumIdx = 0;
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
this.inBase64 = false;
}
return buf.slice(0, bufIdx);
}
// -- Decoding
function Utf7IMAPDecoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
function Utf7IMAPDecoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
}
var base64IMAPChars = base64Chars.slice()
base64IMAPChars[",".charCodeAt(0)] = true
var base64IMAPChars = base64Chars.slice();
base64IMAPChars[','.charCodeAt(0)] = true;
Utf7IMAPDecoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
Utf7IMAPDecoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, "/")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&";
} else {
var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, "/")
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
this.inBase64 = inBase64
this.base64Accum = base64Accum
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
return res
return res;
}
Utf7IMAPDecoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
Utf7IMAPDecoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
this.inBase64 = false
this.base64Accum = ""
return res
this.inBase64 = false;
this.base64Accum = '';
return res;
}

View File

@@ -1,48 +1,52 @@
"use strict"
"use strict";
var BOMChar = "\uFEFF"
var BOMChar = '\uFEFF';
exports.PrependBOM = PrependBOMWrapper
function PrependBOMWrapper (encoder, options) {
this.encoder = encoder
this.addBOM = true
function PrependBOMWrapper(encoder, options) {
this.encoder = encoder;
this.addBOM = true;
}
PrependBOMWrapper.prototype.write = function (str) {
if (this.addBOM) {
str = BOMChar + str
this.addBOM = false
}
PrependBOMWrapper.prototype.write = function(str) {
if (this.addBOM) {
str = BOMChar + str;
this.addBOM = false;
}
return this.encoder.write(str)
return this.encoder.write(str);
}
PrependBOMWrapper.prototype.end = function () {
return this.encoder.end()
PrependBOMWrapper.prototype.end = function() {
return this.encoder.end();
}
// ------------------------------------------------------------------------------
exports.StripBOM = StripBOMWrapper
function StripBOMWrapper (decoder, options) {
this.decoder = decoder
this.pass = false
this.options = options || {}
//------------------------------------------------------------------------------
exports.StripBOM = StripBOMWrapper;
function StripBOMWrapper(decoder, options) {
this.decoder = decoder;
this.pass = false;
this.options = options || {};
}
StripBOMWrapper.prototype.write = function (buf) {
var res = this.decoder.write(buf)
if (this.pass || !res) { return res }
StripBOMWrapper.prototype.write = function(buf) {
var res = this.decoder.write(buf);
if (this.pass || !res)
return res;
if (res[0] === BOMChar) {
res = res.slice(1)
if (typeof this.options.stripBOM === "function") { this.options.stripBOM() }
}
if (res[0] === BOMChar) {
res = res.slice(1);
if (typeof this.options.stripBOM === 'function')
this.options.stripBOM();
}
this.pass = true
return res
this.pass = true;
return res;
}
StripBOMWrapper.prototype.end = function () {
return this.decoder.end()
StripBOMWrapper.prototype.end = function() {
return this.decoder.end();
}

View File

@@ -6,22 +6,15 @@
*--------------------------------------------------------------------------------------------*/
declare module 'iconv-lite' {
// Basic API
export function decode(buffer: Buffer | Uint8Array, encoding: string, options?: Options): string;
export function decode(buffer: Buffer, encoding: string, options?: Options): string;
export function encode(content: string, encoding: string, options?: Options): Buffer;
export function encodingExists(encoding: string): boolean;
// Stream API
export function decodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;
export function encodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;
// Low-level stream APIs
export function getEncoder(encoding: string, options?: Options): EncoderStream;
export function getDecoder(encoding: string, options?: Options): DecoderStream;
}
export interface Options {
@@ -29,13 +22,3 @@ export interface Options {
addBOM?: boolean;
defaultEncoding?: string;
}
export interface EncoderStream {
write(str: string): Buffer;
end(): Buffer | undefined;
}
export interface DecoderStream {
write(buf: Buffer): string;
end(): string | undefined;
}

232
node_modules/iconv-lite/lib/index.js generated vendored
View File

@@ -1,183 +1,153 @@
"use strict"
"use strict";
var Buffer = require("safer-buffer").Buffer
// Some environments don't have global Buffer (e.g. React Native).
// Solution would be installing npm modules "buffer" and "stream" explicitly.
var Buffer = require("safer-buffer").Buffer;
var bomHandling = require("./bom-handling")
var mergeModules = require("./helpers/merge-exports")
var iconv = module.exports
var bomHandling = require("./bom-handling"),
iconv = module.exports;
// All codecs and aliases are kept here, keyed by encoding name/alias.
// They are lazy loaded in `iconv.getCodec` from `encodings/index.js`.
// Cannot initialize with { __proto__: null } because Boolean({ __proto__: null }) === true
iconv.encodings = null
iconv.encodings = null;
// Characters emitted in case of error.
iconv.defaultCharUnicode = "<22>"
iconv.defaultCharSingleByte = "?"
iconv.defaultCharUnicode = '<27>';
iconv.defaultCharSingleByte = '?';
// Public API.
iconv.encode = function encode (str, encoding, options) {
str = "" + (str || "") // Ensure string.
iconv.encode = function encode(str, encoding, options) {
str = "" + (str || ""); // Ensure string.
var encoder = iconv.getEncoder(encoding, options)
var encoder = iconv.getEncoder(encoding, options);
var res = encoder.write(str)
var trail = encoder.end()
return (trail && trail.length > 0) ? Buffer.concat([res, trail]) : res
var res = encoder.write(str);
var trail = encoder.end();
return (trail && trail.length > 0) ? Buffer.concat([res, trail]) : res;
}
iconv.decode = function decode (buf, encoding, options) {
if (typeof buf === "string") {
if (!iconv.skipDecodeWarning) {
console.error("Iconv-lite warning: decode()-ing strings is deprecated. Refer to https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding")
iconv.skipDecodeWarning = true
iconv.decode = function decode(buf, encoding, options) {
if (typeof buf === 'string') {
if (!iconv.skipDecodeWarning) {
console.error('Iconv-lite warning: decode()-ing strings is deprecated. Refer to https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding');
iconv.skipDecodeWarning = true;
}
buf = Buffer.from("" + (buf || ""), "binary"); // Ensure buffer.
}
buf = Buffer.from("" + (buf || ""), "binary") // Ensure buffer.
}
var decoder = iconv.getDecoder(encoding, options);
var decoder = iconv.getDecoder(encoding, options)
var res = decoder.write(buf);
var trail = decoder.end();
var res = decoder.write(buf)
var trail = decoder.end()
return trail ? (res + trail) : res
return trail ? (res + trail) : res;
}
iconv.encodingExists = function encodingExists (enc) {
try {
iconv.getCodec(enc)
return true
} catch (e) {
return false
}
iconv.encodingExists = function encodingExists(enc) {
try {
iconv.getCodec(enc);
return true;
} catch (e) {
return false;
}
}
// Legacy aliases to convert functions
iconv.toEncoding = iconv.encode
iconv.fromEncoding = iconv.decode
iconv.toEncoding = iconv.encode;
iconv.fromEncoding = iconv.decode;
// Search for a codec in iconv.encodings. Cache codec data in iconv._codecDataCache.
iconv._codecDataCache = { __proto__: null }
iconv._codecDataCache = {};
iconv.getCodec = function getCodec(encoding) {
if (!iconv.encodings)
iconv.encodings = require("../encodings"); // Lazy load all encoding definitions.
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
var enc = iconv._canonicalizeEncoding(encoding);
iconv.getCodec = function getCodec (encoding) {
if (!iconv.encodings) {
var raw = require("../encodings")
// TODO: In future versions when old nodejs support is removed can use object.assign
iconv.encodings = { __proto__: null } // Initialize as empty object.
mergeModules(iconv.encodings, raw)
}
// Traverse iconv.encodings to find actual codec.
var codecOptions = {};
while (true) {
var codec = iconv._codecDataCache[enc];
if (codec)
return codec;
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
var enc = iconv._canonicalizeEncoding(encoding)
var codecDef = iconv.encodings[enc];
// Traverse iconv.encodings to find actual codec.
var codecOptions = {}
while (true) {
var codec = iconv._codecDataCache[enc]
switch (typeof codecDef) {
case "string": // Direct alias to other encoding.
enc = codecDef;
break;
if (codec) { return codec }
case "object": // Alias with options. Can be layered.
for (var key in codecDef)
codecOptions[key] = codecDef[key];
var codecDef = iconv.encodings[enc]
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
enc = codecDef.type;
break;
switch (typeof codecDef) {
case "string": // Direct alias to other encoding.
enc = codecDef
break
case "function": // Codec itself.
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
case "object": // Alias with options. Can be layered.
for (var key in codecDef) { codecOptions[key] = codecDef[key] }
// The codec function must load all tables and return object with .encoder and .decoder methods.
// It'll be called only once (for each different options object).
codec = new codecDef(codecOptions, iconv);
if (!codecOptions.encodingName) { codecOptions.encodingName = enc }
iconv._codecDataCache[codecOptions.encodingName] = codec; // Save it to be reused later.
return codec;
enc = codecDef.type
break
case "function": // Codec itself.
if (!codecOptions.encodingName) { codecOptions.encodingName = enc }
// The codec function must load all tables and return object with .encoder and .decoder methods.
// It'll be called only once (for each different options object).
//
codec = new codecDef(codecOptions, iconv)
iconv._codecDataCache[codecOptions.encodingName] = codec // Save it to be reused later.
return codec
default:
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '" + enc + "')")
default:
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
}
}
}
}
iconv._canonicalizeEncoding = function (encoding) {
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
return ("" + encoding).toLowerCase().replace(/:\d{4}$|[^0-9a-z]/g, "")
iconv._canonicalizeEncoding = function(encoding) {
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
return (''+encoding).toLowerCase().replace(/:\d{4}$|[^0-9a-z]/g, "");
}
iconv.getEncoder = function getEncoder (encoding, options) {
var codec = iconv.getCodec(encoding)
var encoder = new codec.encoder(options, codec)
iconv.getEncoder = function getEncoder(encoding, options) {
var codec = iconv.getCodec(encoding),
encoder = new codec.encoder(options, codec);
if (codec.bomAware && options && options.addBOM) { encoder = new bomHandling.PrependBOM(encoder, options) }
if (codec.bomAware && options && options.addBOM)
encoder = new bomHandling.PrependBOM(encoder, options);
return encoder
return encoder;
}
iconv.getDecoder = function getDecoder (encoding, options) {
var codec = iconv.getCodec(encoding)
var decoder = new codec.decoder(options, codec)
iconv.getDecoder = function getDecoder(encoding, options) {
var codec = iconv.getCodec(encoding),
decoder = new codec.decoder(options, codec);
if (codec.bomAware && !(options && options.stripBOM === false)) { decoder = new bomHandling.StripBOM(decoder, options) }
if (codec.bomAware && !(options && options.stripBOM === false))
decoder = new bomHandling.StripBOM(decoder, options);
return decoder
return decoder;
}
// Streaming API
// NOTE: Streaming API naturally depends on 'stream' module from Node.js. Unfortunately in browser environments this module can add
// up to 100Kb to the output bundle. To avoid unnecessary code bloat, we don't enable Streaming API in browser by default.
// If you would like to enable it explicitly, please add the following code to your app:
// > iconv.enableStreamingAPI(require('stream'));
iconv.enableStreamingAPI = function enableStreamingAPI (streamModule) {
if (iconv.supportsStreams) { return }
// Dependency-inject stream module to create IconvLite stream classes.
var streams = require("./streams")(streamModule)
// Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json.
var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node;
if (nodeVer) {
// Not public API yet, but expose the stream classes.
iconv.IconvLiteEncoderStream = streams.IconvLiteEncoderStream
iconv.IconvLiteDecoderStream = streams.IconvLiteDecoderStream
// Load streaming support in Node v0.10+
var nodeVerArr = nodeVer.split(".").map(Number);
if (nodeVerArr[0] > 0 || nodeVerArr[1] >= 10) {
require("./streams")(iconv);
}
// Streaming API.
iconv.encodeStream = function encodeStream (encoding, options) {
return new iconv.IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options)
}
iconv.decodeStream = function decodeStream (encoding, options) {
return new iconv.IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options)
}
iconv.supportsStreams = true
// Load Node primitive extensions.
require("./extend-node")(iconv);
}
// Enable Streaming API automatically if 'stream' module is available and non-empty (the majority of environments).
var streamModule
try {
streamModule = require("stream")
} catch (e) {}
if (streamModule && streamModule.Transform) {
iconv.enableStreamingAPI(streamModule)
} else {
// In rare cases where 'stream' module is not available by default, throw a helpful exception.
iconv.encodeStream = iconv.decodeStream = function () {
throw new Error("iconv-lite Streaming API is not enabled. Use iconv.enableStreamingAPI(require('stream')); to enable it.")
}
}
// Some environments, such as browsers, may not load JavaScript files as UTF-8
// eslint-disable-next-line no-constant-condition
if ("Ā" !== "\u0100") {
console.error("iconv-lite warning: js files use non-utf8 encoding. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info.")
if ("Ā" != "\u0100") {
console.error("iconv-lite warning: javascript files use encoding different from utf-8. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info.");
}

View File

@@ -1,105 +1,121 @@
"use strict"
"use strict";
var Buffer = require("safer-buffer").Buffer
var Buffer = require("buffer").Buffer,
Transform = require("stream").Transform;
// NOTE: Due to 'stream' module being pretty large (~100Kb, significant in browser environments),
// we opt to dependency-inject it instead of creating a hard dependency.
module.exports = function (streamModule) {
var Transform = streamModule.Transform
// == Encoder stream =======================================================
function IconvLiteEncoderStream (conv, options) {
this.conv = conv
options = options || {}
options.decodeStrings = false // We accept only strings, so we don't need to decode them.
Transform.call(this, options)
}
IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteEncoderStream }
})
IconvLiteEncoderStream.prototype._transform = function (chunk, encoding, done) {
if (typeof chunk !== "string") {
return done(new Error("Iconv encoding stream needs strings as its input."))
// == Exports ==================================================================
module.exports = function(iconv) {
// Additional Public API.
iconv.encodeStream = function encodeStream(encoding, options) {
return new IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options);
}
try {
var res = this.conv.write(chunk)
if (res && res.length) this.push(res)
done()
} catch (e) {
done(e)
iconv.decodeStream = function decodeStream(encoding, options) {
return new IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options);
}
}
IconvLiteEncoderStream.prototype._flush = function (done) {
try {
var res = this.conv.end()
if (res && res.length) this.push(res)
done()
} catch (e) {
done(e)
}
}
iconv.supportsStreams = true;
IconvLiteEncoderStream.prototype.collect = function (cb) {
var chunks = []
this.on("error", cb)
this.on("data", function (chunk) { chunks.push(chunk) })
this.on("end", function () {
cb(null, Buffer.concat(chunks))
})
return this
}
// == Decoder stream =======================================================
// Not published yet.
iconv.IconvLiteEncoderStream = IconvLiteEncoderStream;
iconv.IconvLiteDecoderStream = IconvLiteDecoderStream;
iconv._collect = IconvLiteDecoderStream.prototype.collect;
};
function IconvLiteDecoderStream (conv, options) {
this.conv = conv
options = options || {}
options.encoding = this.encoding = "utf8" // We output strings.
Transform.call(this, options)
}
IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteDecoderStream }
})
IconvLiteDecoderStream.prototype._transform = function (chunk, encoding, done) {
if (!Buffer.isBuffer(chunk) && !(chunk instanceof Uint8Array)) { return done(new Error("Iconv decoding stream needs buffers as its input.")) }
try {
var res = this.conv.write(chunk)
if (res && res.length) this.push(res, this.encoding)
done()
} catch (e) {
done(e)
}
}
IconvLiteDecoderStream.prototype._flush = function (done) {
try {
var res = this.conv.end()
if (res && res.length) this.push(res, this.encoding)
done()
} catch (e) {
done(e)
}
}
IconvLiteDecoderStream.prototype.collect = function (cb) {
var res = ""
this.on("error", cb)
this.on("data", function (chunk) { res += chunk })
this.on("end", function () {
cb(null, res)
})
return this
}
return {
IconvLiteEncoderStream: IconvLiteEncoderStream,
IconvLiteDecoderStream: IconvLiteDecoderStream
}
// == Encoder stream =======================================================
function IconvLiteEncoderStream(conv, options) {
this.conv = conv;
options = options || {};
options.decodeStrings = false; // We accept only strings, so we don't need to decode them.
Transform.call(this, options);
}
IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteEncoderStream }
});
IconvLiteEncoderStream.prototype._transform = function(chunk, encoding, done) {
if (typeof chunk != 'string')
return done(new Error("Iconv encoding stream needs strings as its input."));
try {
var res = this.conv.write(chunk);
if (res && res.length) this.push(res);
done();
}
catch (e) {
done(e);
}
}
IconvLiteEncoderStream.prototype._flush = function(done) {
try {
var res = this.conv.end();
if (res && res.length) this.push(res);
done();
}
catch (e) {
done(e);
}
}
IconvLiteEncoderStream.prototype.collect = function(cb) {
var chunks = [];
this.on('error', cb);
this.on('data', function(chunk) { chunks.push(chunk); });
this.on('end', function() {
cb(null, Buffer.concat(chunks));
});
return this;
}
// == Decoder stream =======================================================
function IconvLiteDecoderStream(conv, options) {
this.conv = conv;
options = options || {};
options.encoding = this.encoding = 'utf8'; // We output strings.
Transform.call(this, options);
}
IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteDecoderStream }
});
IconvLiteDecoderStream.prototype._transform = function(chunk, encoding, done) {
if (!Buffer.isBuffer(chunk))
return done(new Error("Iconv decoding stream needs buffers as its input."));
try {
var res = this.conv.write(chunk);
if (res && res.length) this.push(res, this.encoding);
done();
}
catch (e) {
done(e);
}
}
IconvLiteDecoderStream.prototype._flush = function(done) {
try {
var res = this.conv.end();
if (res && res.length) this.push(res, this.encoding);
done();
}
catch (e) {
done(e);
}
}
IconvLiteDecoderStream.prototype.collect = function(cb) {
var res = '';
this.on('error', cb);
this.on('data', function(chunk) { res += chunk; });
this.on('end', function() {
cb(null, res);
});
return this;
}

49
node_modules/iconv-lite/package.json generated vendored
View File

@@ -1,7 +1,7 @@
{
"name": "iconv-lite",
"description": "Convert character encodings in pure javascript.",
"version": "0.7.0",
"version": "0.4.24",
"license": "MIT",
"keywords": [
"iconv",
@@ -12,48 +12,35 @@
"author": "Alexander Shtuchkin <ashtuchkin@gmail.com>",
"main": "./lib/index.js",
"typings": "./lib/index.d.ts",
"homepage": "https://github.com/pillarjs/iconv-lite",
"bugs": "https://github.com/pillarjs/iconv-lite/issues",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/express"
},
"homepage": "https://github.com/ashtuchkin/iconv-lite",
"bugs": "https://github.com/ashtuchkin/iconv-lite/issues",
"repository": {
"type": "git",
"url": "https://github.com/pillarjs/iconv-lite.git"
"url": "git://github.com/ashtuchkin/iconv-lite.git"
},
"engines": {
"node": ">=0.10.0"
},
"scripts": {
"lint": "eslint",
"lint:fix": "eslint --fix",
"test": "mocha --reporter spec --check-leaks --grep .",
"test:ci": "nyc --exclude test --reporter=lcovonly --reporter=text npm test",
"test:cov": "nyc --exclude test --reporter=html --reporter=text npm test",
"test:performance": "node --allow-natives-syntax performance/index.js",
"test:tap": "mocha --reporter tap --check-leaks --grep .",
"test:webpack": "npm pack && mv iconv-lite-*.tgz test/webpack/iconv-lite.tgz && cd test/webpack && npm install && npm run test && rm iconv-lite.tgz"
"coverage": "istanbul cover _mocha -- --grep .",
"coverage-open": "open coverage/lcov-report/index.html",
"test": "mocha --reporter spec --grep ."
},
"browser": {
"stream": false
"./lib/extend-node": false,
"./lib/streams": false
},
"devDependencies": {
"@stylistic/eslint-plugin": "^5.1.0",
"@stylistic/eslint-plugin-js": "^4.1.0",
"async": "^3.2.0",
"bench-node": "^0.10.0",
"eslint": "^9.0.0",
"errto": "^0.2.1",
"iconv": "^2.3.5",
"mocha": "^6.2.2",
"neostandard": "^0.12.0",
"nyc": "^14.1.1",
"request": "^2.88.2",
"semver": "^6.3.0",
"unorm": "^1.6.0"
"mocha": "^3.1.0",
"request": "~2.87.0",
"unorm": "*",
"errto": "*",
"async": "*",
"istanbul": "*",
"semver": "*",
"iconv": "*"
},
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
"safer-buffer": ">= 2.1.2 < 3"
}
}