Update Bot

This commit is contained in:
2026-03-15 11:58:43 +01:00
parent b67c111ffc
commit cd99275933
560 changed files with 23173 additions and 55113 deletions

124
node_modules/iconv-lite/Changelog.md generated vendored
View File

@@ -1,50 +1,124 @@
# 0.4.24 / 2018-08-22
## 0.7.0
### 🐞 Bug fixes
* Handle split surrogate pairs when encoding utf8 - by [@yosion-p](https://github.com/yosion-p) and [@ashtuchkin](https://github.com/ashtuchkin) in [#282](https://github.com/ashtuchkin/iconv-lite/pull/282):
Handle a case where streaming utf8 encoder (converting js strings -> buffers) encounters
surrogate pairs split between chunks (last character of one chunk is high surrogate and first
character of the next chunk is a low surrogate).
* Avoid false positives in encodingExists by using objects without a prototype - by [@bjohansebas](https://github.com/bjohansebas) in [#328](https://github.com/ashtuchkin/iconv-lite/pull/328)
The encodingExists method could return incorrect results if the lookup matched properties inherited
from the prototype of the object that stores the encodings, such as constructor and others. This change
replaces that object with one that has no prototype, ensuring that only explicitly defined valid encodings
in the library are considered. In addition, the fix is applied to the internal cache system to avoid the same
kind of false positives
### 🚀 Improvements
* Make explicit that decode() method supports Uint8Array input - by [@jardicc](https://github.com/jardicc) in [#271](https://github.com/ashtuchkin/iconv-lite/pull/271)
* Remove compatibility check for StringDecoder.end method - by [@bjohansebas](https://github.com/bjohansebas) in [#331](https://github.com/ashtuchkin/iconv-lite/pull/331)
## 0.6.3 / 2021-05-23
* Fix HKSCS encoding to prefer Big5 codes if both Big5 and HKSCS codes are possible (#264)
## 0.6.2 / 2020-07-08
* Support Uint8Array-s decoding without conversion to Buffers, plus fix an edge case.
## 0.6.1 / 2020-06-28
* Support Uint8Array-s directly when decoding (#246, by @gyzerok)
* Unify package.json version ranges to be strictly semver-compatible (#241)
* Fix minor issue in UTF-32 decoder's endianness detection code.
## 0.6.0 / 2020-06-08
* Updated 'gb18030' encoding to :2005 edition (see https://github.com/whatwg/encoding/issues/22).
* Removed `iconv.extendNodeEncodings()` mechanism. It was deprecated 5 years ago and didn't work
in recent Node versions.
* Reworked Streaming API behavior in browser environments to fix #204. Streaming API will be
excluded by default in browser packs, saving ~100Kb bundle size, unless enabled explicitly using
`iconv.enableStreamingAPI(require('stream'))`.
* Updates to development environment & tests:
* Added ./test/webpack private package to test complex new use cases that need custom environment.
It's tested as a separate job in Travis CI.
* Updated generation code for the new EUC-KR index file format from Encoding Standard.
* Removed Buffer() constructor in tests (#197 by @gabrielschulhof).
## 0.5.2 / 2020-06-08
* Added `iconv.getEncoder()` and `iconv.getDecoder()` methods to typescript definitions (#229).
* Fixed semver version to 6.1.2 to support Node 8.x (by @tanandara).
* Capped iconv version to 2.x as 3.x has dropped support for older Node versions.
* Switched from instanbul to c8 for code coverage.
## 0.5.1 / 2020-01-18
* Added cp720 encoding (#221, by @kr-deps)
* (minor) Changed Changelog.md formatting to use h2.
## 0.5.0 / 2019-06-26
* Added UTF-32 encoding, both little-endian and big-endian variants (UTF-32LE, UTF32-BE). If endianness
is not provided for decoding, it's deduced automatically from the stream using a heuristic similar to
what we use in UTF-16. (great work in #216 by @kshetline)
* Several minor updates to README (#217 by @oldj, plus some more)
* Added Node versions 10 and 12 to Travis test harness.
## 0.4.24 / 2018-08-22
* Added MIK encoding (#196, by @Ivan-Kalatchev)
# 0.4.23 / 2018-05-07
## 0.4.23 / 2018-05-07
* Fix deprecation warning in Node v10 due to the last usage of `new Buffer` (#185, by @felixbuenemann)
* Switched from NodeBuffer to Buffer in typings (#155 by @felixfbecker, #186 by @larssn)
# 0.4.22 / 2018-05-05
## 0.4.22 / 2018-05-05
* Use older semver style for dependencies to be compatible with Node version 0.10 (#182, by @dougwilson)
* Fix tests to accomodate fixes in Node v10 (#182, by @dougwilson)
# 0.4.21 / 2018-04-06
## 0.4.21 / 2018-04-06
* Fix encoding canonicalization (#156)
* Fix the paths in the "browser" field in package.json (#174 by @LMLB)
* Removed "contributors" section in package.json - see Git history instead.
# 0.4.20 / 2018-04-06
## 0.4.20 / 2018-04-06
* Updated `new Buffer()` usages with recommended replacements as it's being deprecated in Node v10 (#176, #178 by @ChALkeR)
# 0.4.19 / 2017-09-09
## 0.4.19 / 2017-09-09
* Fixed iso8859-1 codec regression in handling untranslatable characters (#162, caused by #147)
* Re-generated windows1255 codec, because it was updated in iconv project
* Fixed grammar in error message when iconv-lite is loaded with encoding other than utf8
# 0.4.18 / 2017-06-13
## 0.4.18 / 2017-06-13
* Fixed CESU-8 regression in Node v8.
# 0.4.17 / 2017-04-22
## 0.4.17 / 2017-04-22
* Updated typescript definition file to support Angular 2 AoT mode (#153 by @larssn)
# 0.4.16 / 2017-04-22
## 0.4.16 / 2017-04-22
* Added support for React Native (#150)
* Changed iso8859-1 encoding to usine internal 'binary' encoding, as it's the same thing (#147 by @mscdex)
@@ -53,12 +127,12 @@
* Added a warning if iconv-lite is loaded not as utf-8 (see #142)
# 0.4.15 / 2016-11-21
## 0.4.15 / 2016-11-21
* Fixed typescript type definition (#137)
# 0.4.14 / 2016-11-20
## 0.4.14 / 2016-11-20
* Preparation for v1.0
* Added Node v6 and latest Node versions to Travis CI test rig
@@ -68,12 +142,12 @@
* Add ms prefix to dbcs windows encodings (@rokoroku)
# 0.4.13 / 2015-10-01
## 0.4.13 / 2015-10-01
* Fix silly mistake in deprecation notice.
# 0.4.12 / 2015-09-26
## 0.4.12 / 2015-09-26
* Node v4 support:
* Added CESU-8 decoding (#106)
@@ -81,18 +155,18 @@
* Added Travis tests for Node v4 and io.js latest (#105 by @Mithgol)
# 0.4.11 / 2015-07-03
## 0.4.11 / 2015-07-03
* Added CESU-8 encoding.
# 0.4.10 / 2015-05-26
## 0.4.10 / 2015-05-26
* Changed UTF-16 endianness heuristic to take into account any ASCII chars, not
just spaces. This should minimize the importance of "default" endianness.
# 0.4.9 / 2015-05-24
## 0.4.9 / 2015-05-24
* Streamlined BOM handling: strip BOM by default, add BOM when encoding if
addBOM: true. Added docs to Readme.
@@ -104,12 +178,12 @@
* Use strict mode in all files.
# 0.4.8 / 2015-04-14
## 0.4.8 / 2015-04-14
* added alias UNICODE-1-1-UTF-7 for UTF-7 encoding (#94)
# 0.4.7 / 2015-02-05
## 0.4.7 / 2015-02-05
* stop official support of Node.js v0.8. Should still work, but no guarantees.
reason: Packages needed for testing are hard to get on Travis CI.
@@ -117,40 +191,40 @@
props (#89).
# 0.4.6 / 2015-01-12
## 0.4.6 / 2015-01-12
* fix rare aliases of single-byte encodings (thanks @mscdex)
* double the timeout for dbcs tests to make them less flaky on travis
# 0.4.5 / 2014-11-20
## 0.4.5 / 2014-11-20
* fix windows-31j and x-sjis encoding support (@nleush)
* minor fix: undefined variable reference when internal error happens
# 0.4.4 / 2014-07-16
## 0.4.4 / 2014-07-16
* added encodings UTF-7 (RFC2152) and UTF-7-IMAP (RFC3501 Section 5.1.3)
* fixed streaming base64 encoding
# 0.4.3 / 2014-06-14
## 0.4.3 / 2014-06-14
* added encodings UTF-16BE and UTF-16 with BOM
# 0.4.2 / 2014-06-12
## 0.4.2 / 2014-06-12
* don't throw exception if `extendNodeEncodings()` is called more than once
# 0.4.1 / 2014-06-11
## 0.4.1 / 2014-06-11
* codepage 808 added
# 0.4.0 / 2014-06-10
## 0.4.0 / 2014-06-10
* code is rewritten from scratch
* all widespread encodings are supported

112
node_modules/iconv-lite/README.md generated vendored
View File

@@ -1,38 +1,40 @@
## Pure JS character encoding conversion [![Build Status](https://travis-ci.org/ashtuchkin/iconv-lite.svg?branch=master)](https://travis-ci.org/ashtuchkin/iconv-lite)
## iconv-lite: Pure JS character encoding conversion
* Doesn't need native code compilation. Works on Windows and in sandboxed environments like [Cloud9](http://c9.io).
* Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),
[Grunt](http://gruntjs.com/), [Nodemailer](http://www.nodemailer.com/), [Yeoman](http://yeoman.io/) and others.
* Faster than [node-iconv](https://github.com/bnoordhuis/node-iconv) (see below for performance comparison).
* Intuitive encode/decode API
* Streaming support for Node v0.10+
* [Deprecated] Can extend Node.js primitives (buffers, streams) to support all iconv-lite encodings.
* In-browser usage via [Browserify](https://github.com/substack/node-browserify) (~180k gzip compressed with Buffer shim included).
* Typescript [type definition file](https://github.com/ashtuchkin/iconv-lite/blob/master/lib/index.d.ts) included.
* React Native is supported (need to explicitly `npm install` two more modules: `buffer` and `stream`).
* License: MIT.
[![NPM Version][npm-version-image]][npm-url]
[![NPM Downloads][npm-downloads-image]][npm-downloads-url]
[![License][license-image]][license-url]
[![NPM Install Size][npm-install-size-image]][npm-install-size-url]
[![NPM Stats](https://nodei.co/npm/iconv-lite.png?downloads=true&downloadRank=true)](https://npmjs.org/packages/iconv-lite/)
* No need for native code compilation. Quick to install, works on Windows, Web, and in sandboxed environments.
* Used in popular projects like [Express.js (body_parser)](https://github.com/expressjs/body-parser),
[Grunt](http://gruntjs.com/), [Nodemailer](http://www.nodemailer.com/), [Yeoman](http://yeoman.io/) and others.
* Faster than [node-iconv](https://github.com/bnoordhuis/node-iconv) (see below for performance comparison).
* Intuitive encode/decode API, including Streaming support.
* In-browser usage via [browserify](https://github.com/substack/node-browserify) or [webpack](https://webpack.js.org/) (~180kb gzip compressed with Buffer shim included).
* Typescript [type definition file](https://github.com/ashtuchkin/iconv-lite/blob/master/lib/index.d.ts) included.
* React Native is supported (need to install `stream` module to enable Streaming API).
## Usage
### Basic API
```javascript
var iconv = require('iconv-lite');
// Convert from an encoded buffer to js string.
// Convert from an encoded buffer to a js string.
str = iconv.decode(Buffer.from([0x68, 0x65, 0x6c, 0x6c, 0x6f]), 'win1251');
// Convert from js string to an encoded buffer.
// Convert from a js string to an encoded buffer.
buf = iconv.encode("Sample input string", 'win1251');
// Check if encoding is supported
iconv.encodingExists("us-ascii")
```
### Streaming API (Node v0.10+)
```javascript
### Streaming API
// Decode stream (from binary stream to js strings)
```javascript
// Decode stream (from binary data stream to js strings)
http.createServer(function(req, res) {
var converterStream = iconv.decodeStream('win1251');
req.pipe(converterStream);
@@ -57,44 +59,10 @@ http.createServer(function(req, res) {
});
```
### [Deprecated] Extend Node.js own encodings
> NOTE: This doesn't work on latest Node versions. See [details](https://github.com/ashtuchkin/iconv-lite/wiki/Node-v4-compatibility).
```javascript
// After this call all Node basic primitives will understand iconv-lite encodings.
iconv.extendNodeEncodings();
// Examples:
buf = new Buffer(str, 'win1251');
buf.write(str, 'gbk');
str = buf.toString('latin1');
assert(Buffer.isEncoding('iso-8859-15'));
Buffer.byteLength(str, 'us-ascii');
http.createServer(function(req, res) {
req.setEncoding('big5');
req.collect(function(err, body) {
console.log(body);
});
});
fs.createReadStream("file.txt", "shift_jis");
// External modules are also supported (if they use Node primitives, which they probably do).
request = require('request');
request({
url: "http://github.com/",
encoding: "cp932"
});
// To remove extensions
iconv.undoExtendNodeEncodings();
```
## Supported encodings
* All node.js native encodings: utf8, ucs2 / utf16-le, ascii, binary, base64, hex.
* Additional unicode encodings: utf16, utf16-be, utf-7, utf-7-imap.
* Additional unicode encodings: utf16, utf16-be, utf-7, utf-7-imap, utf32, utf32-le, and utf32-be.
* All widespread singlebyte encodings: Windows 125x family, ISO-8859 family,
IBM/DOS codepages, Macintosh family, KOI8 family, all others supported by iconv library.
Aliases like 'latin1', 'us-ascii' also supported.
@@ -106,7 +74,6 @@ Most singlebyte encodings are generated automatically from [node-iconv](https://
Multibyte encodings are generated from [Unicode.org mappings](http://www.unicode.org/Public/MAPPINGS/) and [WHATWG Encoding Standard mappings](http://encoding.spec.whatwg.org/). Thank you, respective authors!
## Encoding/decoding speed
Comparison with node-iconv module (1000x256kb, on MacBook Pro, Core i5/2.6 GHz, Node v0.12.0).
@@ -133,24 +100,39 @@ smart about endianness in the following ways:
overridden with `defaultEncoding: 'utf-16be'` option. Strips BOM unless `stripBOM: false`.
* Encoding: uses UTF-16LE and writes BOM by default. Use `addBOM: false` to override.
## UTF-32 Encodings
This library supports UTF-32LE, UTF-32BE and UTF-32 encodings. Like the UTF-16 encoding above, UTF-32 defaults to UTF-32LE, but uses BOM and 'spaces heuristics' to determine input endianness.
* The default of UTF-32LE can be overridden with the `defaultEncoding: 'utf-32be'` option. Strips BOM unless `stripBOM: false`.
* Encoding: uses UTF-32LE and writes BOM by default. Use `addBOM: false` to override. (`defaultEncoding: 'utf-32be'` can also be used here to change encoding.)
## Other notes
When decoding, be sure to supply a Buffer to decode() method, otherwise [bad things usually happen](https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding).
Untranslatable characters are set to <20> or ?. No transliteration is currently supported.
Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see #65, #77).
Node versions 0.10.31 and 0.11.13 are buggy, don't use them (see [#65](https://github.com/ashtuchkin/iconv-lite/issues/65), [#77](https://github.com/ashtuchkin/iconv-lite/issues/77)).
## Testing
```bash
$ git clone git@github.com:ashtuchkin/iconv-lite.git
$ cd iconv-lite
$ npm install
$ npm test
```sh
git clone git@github.com:ashtuchkin/iconv-lite.git
cd iconv-lite
npm install
npm test
$ # To view performance:
$ node test/performance.js
# To view performance:
npm run test:performance
$ # To view test coverage:
$ npm run coverage
$ open coverage/lcov-report/index.html
# To view test coverage:
npm run test:cov
open coverage/index.html
```
[npm-downloads-image]: https://badgen.net/npm/dm/iconv-lite
[npm-downloads-url]: https://npmcharts.com/compare/iconv-lite?minimal=true
[npm-url]: https://npmjs.org/package/iconv-lite
[npm-version-image]: https://badgen.net/npm/v/iconv-lite
[npm-install-size-image]: https://badgen.net/packagephobia/install/iconv-lite
[npm-install-size-url]: https://packagephobia.com/result?p=iconv-lite
[license-image]: https://img.shields.io/npm/l/iconv-lite.svg
[license-url]: https://github.com/ashtuchkin/iconv-lite/blob/HEAD/LICENSE

View File

@@ -1,555 +1,532 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Multibyte codec. In this scheme, a character is represented by 1 or more bytes.
// Our codec supports UTF-16 surrogates, extensions for GB18030 and unicode sequences.
// To save memory and loading time, we read table files only when requested.
exports._dbcs = DBCSCodec;
exports._dbcs = DBCSCodec
var UNASSIGNED = -1,
GB18030_CODE = -2,
SEQ_START = -10,
NODE_START = -1000,
UNASSIGNED_NODE = new Array(0x100),
DEF_CHAR = -1;
for (var i = 0; i < 0x100; i++)
UNASSIGNED_NODE[i] = UNASSIGNED;
var UNASSIGNED = -1
var GB18030_CODE = -2
var SEQ_START = -10
var NODE_START = -1000
var UNASSIGNED_NODE = new Array(0x100)
var DEF_CHAR = -1
for (var i = 0; i < 0x100; i++) { UNASSIGNED_NODE[i] = UNASSIGNED }
// Class DBCSCodec reads and initializes mapping tables.
function DBCSCodec(codecOptions, iconv) {
this.encodingName = codecOptions.encodingName;
if (!codecOptions)
throw new Error("DBCS codec is called without the data.")
if (!codecOptions.table)
throw new Error("Encoding '" + this.encodingName + "' has no data.");
function DBCSCodec (codecOptions, iconv) {
this.encodingName = codecOptions.encodingName
if (!codecOptions) { throw new Error("DBCS codec is called without the data.") }
if (!codecOptions.table) { throw new Error("Encoding '" + this.encodingName + "' has no data.") }
// Load tables.
var mappingTable = codecOptions.table();
// Load tables.
var mappingTable = codecOptions.table()
// Decode tables: MBCS -> Unicode.
// Decode tables: MBCS -> Unicode.
// decodeTables is a trie, encoded as an array of arrays of integers. Internal arrays are trie nodes and all have len = 256.
// Trie root is decodeTables[0].
// Values: >= 0 -> unicode character code. can be > 0xFFFF
// == UNASSIGNED -> unknown/unassigned sequence.
// == GB18030_CODE -> this is the end of a GB18030 4-byte sequence.
// <= NODE_START -> index of the next node in our trie to process next byte.
// <= SEQ_START -> index of the start of a character code sequence, in decodeTableSeq.
this.decodeTables = []
this.decodeTables[0] = UNASSIGNED_NODE.slice(0) // Create root node.
// decodeTables is a trie, encoded as an array of arrays of integers. Internal arrays are trie nodes and all have len = 256.
// Trie root is decodeTables[0].
// Values: >= 0 -> unicode character code. can be > 0xFFFF
// == UNASSIGNED -> unknown/unassigned sequence.
// == GB18030_CODE -> this is the end of a GB18030 4-byte sequence.
// <= NODE_START -> index of the next node in our trie to process next byte.
// <= SEQ_START -> index of the start of a character code sequence, in decodeTableSeq.
this.decodeTables = [];
this.decodeTables[0] = UNASSIGNED_NODE.slice(0); // Create root node.
// Sometimes a MBCS char corresponds to a sequence of unicode chars. We store them as arrays of integers here.
this.decodeTableSeq = []
// Sometimes a MBCS char corresponds to a sequence of unicode chars. We store them as arrays of integers here.
this.decodeTableSeq = [];
// Actual mapping tables consist of chunks. Use them to fill up decode tables.
for (var i = 0; i < mappingTable.length; i++) { this._addDecodeChunk(mappingTable[i]) }
// Actual mapping tables consist of chunks. Use them to fill up decode tables.
for (var i = 0; i < mappingTable.length; i++)
this._addDecodeChunk(mappingTable[i]);
// Load & create GB18030 tables when needed.
if (typeof codecOptions.gb18030 === "function") {
this.gb18030 = codecOptions.gb18030() // Load GB18030 ranges.
this.defaultCharUnicode = iconv.defaultCharUnicode;
// Add GB18030 common decode nodes.
var commonThirdByteNodeIdx = this.decodeTables.length
this.decodeTables.push(UNASSIGNED_NODE.slice(0))
// Encode tables: Unicode -> DBCS.
var commonFourthByteNodeIdx = this.decodeTables.length
this.decodeTables.push(UNASSIGNED_NODE.slice(0))
// `encodeTable` is array mapping from unicode char to encoded char. All its values are integers for performance.
// Because it can be sparse, it is represented as array of buckets by 256 chars each. Bucket can be null.
// Values: >= 0 -> it is a normal char. Write the value (if <=256 then 1 byte, if <=65536 then 2 bytes, etc.).
// == UNASSIGNED -> no conversion found. Output a default char.
// <= SEQ_START -> it's an index in encodeTableSeq, see below. The character starts a sequence.
this.encodeTable = [];
// `encodeTableSeq` is used when a sequence of unicode characters is encoded as a single code. We use a tree of
// objects where keys correspond to characters in sequence and leafs are the encoded dbcs values. A special DEF_CHAR key
// means end of sequence (needed when one sequence is a strict subsequence of another).
// Objects are kept separately from encodeTable to increase performance.
this.encodeTableSeq = [];
// Some chars can be decoded, but need not be encoded.
var skipEncodeChars = {};
if (codecOptions.encodeSkipVals)
for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
var val = codecOptions.encodeSkipVals[i];
if (typeof val === 'number')
skipEncodeChars[val] = true;
else
for (var j = val.from; j <= val.to; j++)
skipEncodeChars[j] = true;
// Fill out the tree
var firstByteNode = this.decodeTables[0]
for (var i = 0x81; i <= 0xFE; i++) {
var secondByteNode = this.decodeTables[NODE_START - firstByteNode[i]]
for (var j = 0x30; j <= 0x39; j++) {
if (secondByteNode[j] === UNASSIGNED) {
secondByteNode[j] = NODE_START - commonThirdByteNodeIdx
} else if (secondByteNode[j] > NODE_START) {
throw new Error("gb18030 decode tables conflict at byte 2")
}
// Use decode trie to recursively fill out encode tables.
this._fillEncodeTable(0, 0, skipEncodeChars);
// Add more encoding pairs when needed.
if (codecOptions.encodeAdd) {
for (var uChar in codecOptions.encodeAdd)
if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar))
this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]);
var thirdByteNode = this.decodeTables[NODE_START - secondByteNode[j]]
for (var k = 0x81; k <= 0xFE; k++) {
if (thirdByteNode[k] === UNASSIGNED) {
thirdByteNode[k] = NODE_START - commonFourthByteNodeIdx
} else if (thirdByteNode[k] === NODE_START - commonFourthByteNodeIdx) {
continue
} else if (thirdByteNode[k] > NODE_START) {
throw new Error("gb18030 decode tables conflict at byte 3")
}
var fourthByteNode = this.decodeTables[NODE_START - thirdByteNode[k]]
for (var l = 0x30; l <= 0x39; l++) {
if (fourthByteNode[l] === UNASSIGNED) { fourthByteNode[l] = GB18030_CODE }
}
}
}
}
}
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];
if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);
this.defaultCharUnicode = iconv.defaultCharUnicode
// Encode tables: Unicode -> DBCS.
// Load & create GB18030 tables when needed.
if (typeof codecOptions.gb18030 === 'function') {
this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.
// `encodeTable` is array mapping from unicode char to encoded char. All its values are integers for performance.
// Because it can be sparse, it is represented as array of buckets by 256 chars each. Bucket can be null.
// Values: >= 0 -> it is a normal char. Write the value (if <=256 then 1 byte, if <=65536 then 2 bytes, etc.).
// == UNASSIGNED -> no conversion found. Output a default char.
// <= SEQ_START -> it's an index in encodeTableSeq, see below. The character starts a sequence.
this.encodeTable = []
// Add GB18030 decode tables.
var thirdByteNodeIdx = this.decodeTables.length;
var thirdByteNode = this.decodeTables[thirdByteNodeIdx] = UNASSIGNED_NODE.slice(0);
// `encodeTableSeq` is used when a sequence of unicode characters is encoded as a single code. We use a tree of
// objects where keys correspond to characters in sequence and leafs are the encoded dbcs values. A special DEF_CHAR key
// means end of sequence (needed when one sequence is a strict subsequence of another).
// Objects are kept separately from encodeTable to increase performance.
this.encodeTableSeq = []
var fourthByteNodeIdx = this.decodeTables.length;
var fourthByteNode = this.decodeTables[fourthByteNodeIdx] = UNASSIGNED_NODE.slice(0);
// Some chars can be decoded, but need not be encoded.
var skipEncodeChars = {}
if (codecOptions.encodeSkipVals) {
for (var i = 0; i < codecOptions.encodeSkipVals.length; i++) {
var val = codecOptions.encodeSkipVals[i]
if (typeof val === "number") { skipEncodeChars[val] = true } else {
for (var j = val.from; j <= val.to; j++) { skipEncodeChars[j] = true }
}
}
}
for (var i = 0x81; i <= 0xFE; i++) {
var secondByteNodeIdx = NODE_START - this.decodeTables[0][i];
var secondByteNode = this.decodeTables[secondByteNodeIdx];
for (var j = 0x30; j <= 0x39; j++)
secondByteNode[j] = NODE_START - thirdByteNodeIdx;
}
for (var i = 0x81; i <= 0xFE; i++)
thirdByteNode[i] = NODE_START - fourthByteNodeIdx;
for (var i = 0x30; i <= 0x39; i++)
fourthByteNode[i] = GB18030_CODE
}
// Use decode trie to recursively fill out encode tables.
this._fillEncodeTable(0, 0, skipEncodeChars)
// Add more encoding pairs when needed.
if (codecOptions.encodeAdd) {
for (var uChar in codecOptions.encodeAdd) {
if (Object.prototype.hasOwnProperty.call(codecOptions.encodeAdd, uChar)) { this._setEncodeChar(uChar.charCodeAt(0), codecOptions.encodeAdd[uChar]) }
}
}
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)]
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]["?"]
if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0)
}
DBCSCodec.prototype.encoder = DBCSEncoder;
DBCSCodec.prototype.decoder = DBCSDecoder;
DBCSCodec.prototype.encoder = DBCSEncoder
DBCSCodec.prototype.decoder = DBCSDecoder
// Decoder helpers
DBCSCodec.prototype._getDecodeTrieNode = function(addr) {
var bytes = [];
for (; addr > 0; addr >>= 8)
bytes.push(addr & 0xFF);
if (bytes.length == 0)
bytes.push(0);
DBCSCodec.prototype._getDecodeTrieNode = function (addr) {
var bytes = []
for (; addr > 0; addr >>>= 8) { bytes.push(addr & 0xFF) }
if (bytes.length == 0) { bytes.push(0) }
var node = this.decodeTables[0];
for (var i = bytes.length-1; i > 0; i--) { // Traverse nodes deeper into the trie.
var val = node[bytes[i]];
var node = this.decodeTables[0]
for (var i = bytes.length - 1; i > 0; i--) { // Traverse nodes deeper into the trie.
var val = node[bytes[i]]
if (val == UNASSIGNED) { // Create new node.
node[bytes[i]] = NODE_START - this.decodeTables.length;
this.decodeTables.push(node = UNASSIGNED_NODE.slice(0));
}
else if (val <= NODE_START) { // Existing node.
node = this.decodeTables[NODE_START - val];
}
else
throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16));
}
return node;
if (val == UNASSIGNED) { // Create new node.
node[bytes[i]] = NODE_START - this.decodeTables.length
this.decodeTables.push(node = UNASSIGNED_NODE.slice(0))
} else if (val <= NODE_START) { // Existing node.
node = this.decodeTables[NODE_START - val]
} else { throw new Error("Overwrite byte in " + this.encodingName + ", addr: " + addr.toString(16)) }
}
return node
}
DBCSCodec.prototype._addDecodeChunk = function (chunk) {
// First element of chunk is the hex mbcs code where we start.
var curAddr = parseInt(chunk[0], 16)
DBCSCodec.prototype._addDecodeChunk = function(chunk) {
// First element of chunk is the hex mbcs code where we start.
var curAddr = parseInt(chunk[0], 16);
// Choose the decoding node where we'll write our chars.
var writeTable = this._getDecodeTrieNode(curAddr)
curAddr = curAddr & 0xFF
// Choose the decoding node where we'll write our chars.
var writeTable = this._getDecodeTrieNode(curAddr);
curAddr = curAddr & 0xFF;
// Write all other elements of the chunk to the table.
for (var k = 1; k < chunk.length; k++) {
var part = chunk[k]
if (typeof part === "string") { // String, write as-is.
for (var l = 0; l < part.length;) {
var code = part.charCodeAt(l++)
if (code >= 0xD800 && code < 0xDC00) { // Decode surrogate
var codeTrail = part.charCodeAt(l++)
if (codeTrail >= 0xDC00 && codeTrail < 0xE000) { writeTable[curAddr++] = 0x10000 + (code - 0xD800) * 0x400 + (codeTrail - 0xDC00) } else { throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]) }
} else if (code > 0x0FF0 && code <= 0x0FFF) { // Character sequence (our own encoding used)
var len = 0xFFF - code + 2
var seq = []
for (var m = 0; m < len; m++) { seq.push(part.charCodeAt(l++)) } // Simple variation: don't support surrogates or subsequences in seq.
// Write all other elements of the chunk to the table.
for (var k = 1; k < chunk.length; k++) {
var part = chunk[k];
if (typeof part === "string") { // String, write as-is.
for (var l = 0; l < part.length;) {
var code = part.charCodeAt(l++);
if (0xD800 <= code && code < 0xDC00) { // Decode surrogate
var codeTrail = part.charCodeAt(l++);
if (0xDC00 <= codeTrail && codeTrail < 0xE000)
writeTable[curAddr++] = 0x10000 + (code - 0xD800) * 0x400 + (codeTrail - 0xDC00);
else
throw new Error("Incorrect surrogate pair in " + this.encodingName + " at chunk " + chunk[0]);
}
else if (0x0FF0 < code && code <= 0x0FFF) { // Character sequence (our own encoding used)
var len = 0xFFF - code + 2;
var seq = [];
for (var m = 0; m < len; m++)
seq.push(part.charCodeAt(l++)); // Simple variation: don't support surrogates or subsequences in seq.
writeTable[curAddr++] = SEQ_START - this.decodeTableSeq.length;
this.decodeTableSeq.push(seq);
}
else
writeTable[curAddr++] = code; // Basic char
}
}
else if (typeof part === "number") { // Integer, meaning increasing sequence starting with prev character.
var charCode = writeTable[curAddr - 1] + 1;
for (var l = 0; l < part; l++)
writeTable[curAddr++] = charCode++;
}
else
throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]);
}
if (curAddr > 0xFF)
throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr);
writeTable[curAddr++] = SEQ_START - this.decodeTableSeq.length
this.decodeTableSeq.push(seq)
} else { writeTable[curAddr++] = code } // Basic char
}
} else if (typeof part === "number") { // Integer, meaning increasing sequence starting with prev character.
var charCode = writeTable[curAddr - 1] + 1
for (var l = 0; l < part; l++) { writeTable[curAddr++] = charCode++ }
} else { throw new Error("Incorrect type '" + typeof part + "' given in " + this.encodingName + " at chunk " + chunk[0]) }
}
if (curAddr > 0xFF) { throw new Error("Incorrect chunk in " + this.encodingName + " at addr " + chunk[0] + ": too long" + curAddr) }
}
// Encoder helpers
DBCSCodec.prototype._getEncodeBucket = function(uCode) {
var high = uCode >> 8; // This could be > 0xFF because of astral characters.
if (this.encodeTable[high] === undefined)
this.encodeTable[high] = UNASSIGNED_NODE.slice(0); // Create bucket on demand.
return this.encodeTable[high];
DBCSCodec.prototype._getEncodeBucket = function (uCode) {
var high = uCode >> 8 // This could be > 0xFF because of astral characters.
if (this.encodeTable[high] === undefined) {
this.encodeTable[high] = UNASSIGNED_NODE.slice(0)
} // Create bucket on demand.
return this.encodeTable[high]
}
DBCSCodec.prototype._setEncodeChar = function(uCode, dbcsCode) {
var bucket = this._getEncodeBucket(uCode);
var low = uCode & 0xFF;
if (bucket[low] <= SEQ_START)
this.encodeTableSeq[SEQ_START-bucket[low]][DEF_CHAR] = dbcsCode; // There's already a sequence, set a single-char subsequence of it.
else if (bucket[low] == UNASSIGNED)
bucket[low] = dbcsCode;
DBCSCodec.prototype._setEncodeChar = function (uCode, dbcsCode) {
var bucket = this._getEncodeBucket(uCode)
var low = uCode & 0xFF
if (bucket[low] <= SEQ_START) { this.encodeTableSeq[SEQ_START - bucket[low]][DEF_CHAR] = dbcsCode } // There's already a sequence, set a single-char subsequence of it.
else if (bucket[low] == UNASSIGNED) { bucket[low] = dbcsCode }
}
DBCSCodec.prototype._setEncodeSequence = function(seq, dbcsCode) {
// Get the root of character tree according to first character of the sequence.
var uCode = seq[0];
var bucket = this._getEncodeBucket(uCode);
var low = uCode & 0xFF;
DBCSCodec.prototype._setEncodeSequence = function (seq, dbcsCode) {
// Get the root of character tree according to first character of the sequence.
var uCode = seq[0]
var bucket = this._getEncodeBucket(uCode)
var low = uCode & 0xFF
var node;
if (bucket[low] <= SEQ_START) {
// There's already a sequence with - use it.
node = this.encodeTableSeq[SEQ_START-bucket[low]];
}
else {
// There was no sequence object - allocate a new one.
node = {};
if (bucket[low] !== UNASSIGNED) node[DEF_CHAR] = bucket[low]; // If a char was set before - make it a single-char subsequence.
bucket[low] = SEQ_START - this.encodeTableSeq.length;
this.encodeTableSeq.push(node);
}
var node
if (bucket[low] <= SEQ_START) {
// There's already a sequence with - use it.
node = this.encodeTableSeq[SEQ_START - bucket[low]]
} else {
// There was no sequence object - allocate a new one.
node = {}
if (bucket[low] !== UNASSIGNED) node[DEF_CHAR] = bucket[low] // If a char was set before - make it a single-char subsequence.
bucket[low] = SEQ_START - this.encodeTableSeq.length
this.encodeTableSeq.push(node)
}
// Traverse the character tree, allocating new nodes as needed.
for (var j = 1; j < seq.length-1; j++) {
var oldVal = node[uCode];
if (typeof oldVal === 'object')
node = oldVal;
else {
node = node[uCode] = {}
if (oldVal !== undefined)
node[DEF_CHAR] = oldVal
}
// Traverse the character tree, allocating new nodes as needed.
for (var j = 1; j < seq.length - 1; j++) {
var oldVal = node[uCode]
if (typeof oldVal === "object") { node = oldVal } else {
node = node[uCode] = {}
if (oldVal !== undefined) { node[DEF_CHAR] = oldVal }
}
}
// Set the leaf to given dbcsCode.
uCode = seq[seq.length-1];
node[uCode] = dbcsCode;
// Set the leaf to given dbcsCode.
uCode = seq[seq.length - 1]
node[uCode] = dbcsCode
}
DBCSCodec.prototype._fillEncodeTable = function(nodeIdx, prefix, skipEncodeChars) {
var node = this.decodeTables[nodeIdx];
for (var i = 0; i < 0x100; i++) {
var uCode = node[i];
var mbCode = prefix + i;
if (skipEncodeChars[mbCode])
continue;
DBCSCodec.prototype._fillEncodeTable = function (nodeIdx, prefix, skipEncodeChars) {
var node = this.decodeTables[nodeIdx]
var hasValues = false
var subNodeEmpty = {}
for (var i = 0; i < 0x100; i++) {
var uCode = node[i]
var mbCode = prefix + i
if (skipEncodeChars[mbCode]) { continue }
if (uCode >= 0)
this._setEncodeChar(uCode, mbCode);
else if (uCode <= NODE_START)
this._fillEncodeTable(NODE_START - uCode, mbCode << 8, skipEncodeChars);
else if (uCode <= SEQ_START)
this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode);
if (uCode >= 0) {
this._setEncodeChar(uCode, mbCode)
hasValues = true
} else if (uCode <= NODE_START) {
var subNodeIdx = NODE_START - uCode
if (!subNodeEmpty[subNodeIdx]) { // Skip empty subtrees (they are too large in gb18030).
var newPrefix = (mbCode << 8) >>> 0 // NOTE: '>>> 0' keeps 32-bit num positive.
if (this._fillEncodeTable(subNodeIdx, newPrefix, skipEncodeChars)) { hasValues = true } else { subNodeEmpty[subNodeIdx] = true }
}
} else if (uCode <= SEQ_START) {
this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode)
hasValues = true
}
}
return hasValues
}
// == Encoder ==================================================================
function DBCSEncoder(options, codec) {
// Encoder state
this.leadSurrogate = -1;
this.seqObj = undefined;
// Static data
this.encodeTable = codec.encodeTable;
this.encodeTableSeq = codec.encodeTableSeq;
this.defaultCharSingleByte = codec.defCharSB;
this.gb18030 = codec.gb18030;
function DBCSEncoder (options, codec) {
// Encoder state
this.leadSurrogate = -1
this.seqObj = undefined
// Static data
this.encodeTable = codec.encodeTable
this.encodeTableSeq = codec.encodeTableSeq
this.defaultCharSingleByte = codec.defCharSB
this.gb18030 = codec.gb18030
}
DBCSEncoder.prototype.write = function(str) {
var newBuf = Buffer.alloc(str.length * (this.gb18030 ? 4 : 3)),
leadSurrogate = this.leadSurrogate,
seqObj = this.seqObj, nextChar = -1,
i = 0, j = 0;
DBCSEncoder.prototype.write = function (str) {
var newBuf = Buffer.alloc(str.length * (this.gb18030 ? 4 : 3))
var leadSurrogate = this.leadSurrogate
var seqObj = this.seqObj
var nextChar = -1
var i = 0; var j = 0
while (true) {
// 0. Get next character.
if (nextChar === -1) {
if (i == str.length) break;
var uCode = str.charCodeAt(i++);
}
else {
var uCode = nextChar;
nextChar = -1;
}
// 1. Handle surrogates.
if (0xD800 <= uCode && uCode < 0xE000) { // Char is one of surrogates.
if (uCode < 0xDC00) { // We've got lead surrogate.
if (leadSurrogate === -1) {
leadSurrogate = uCode;
continue;
} else {
leadSurrogate = uCode;
// Double lead surrogate found.
uCode = UNASSIGNED;
}
} else { // We've got trail surrogate.
if (leadSurrogate !== -1) {
uCode = 0x10000 + (leadSurrogate - 0xD800) * 0x400 + (uCode - 0xDC00);
leadSurrogate = -1;
} else {
// Incomplete surrogate pair - only trail surrogate found.
uCode = UNASSIGNED;
}
}
}
else if (leadSurrogate !== -1) {
// Incomplete surrogate pair - only lead surrogate found.
nextChar = uCode; uCode = UNASSIGNED; // Write an error, then current char.
leadSurrogate = -1;
}
// 2. Convert uCode character.
var dbcsCode = UNASSIGNED;
if (seqObj !== undefined && uCode != UNASSIGNED) { // We are in the middle of the sequence
var resCode = seqObj[uCode];
if (typeof resCode === 'object') { // Sequence continues.
seqObj = resCode;
continue;
} else if (typeof resCode == 'number') { // Sequence finished. Write it.
dbcsCode = resCode;
} else if (resCode == undefined) { // Current character is not part of the sequence.
// Try default character for this sequence
resCode = seqObj[DEF_CHAR];
if (resCode !== undefined) {
dbcsCode = resCode; // Found. Write it.
nextChar = uCode; // Current character will be written too in the next iteration.
} else {
// TODO: What if we have no default? (resCode == undefined)
// Then, we should write first char of the sequence as-is and try the rest recursively.
// Didn't do it for now because no encoding has this situation yet.
// Currently, just skip the sequence and write current char.
}
}
seqObj = undefined;
}
else if (uCode >= 0) { // Regular character
var subtable = this.encodeTable[uCode >> 8];
if (subtable !== undefined)
dbcsCode = subtable[uCode & 0xFF];
if (dbcsCode <= SEQ_START) { // Sequence start
seqObj = this.encodeTableSeq[SEQ_START-dbcsCode];
continue;
}
if (dbcsCode == UNASSIGNED && this.gb18030) {
// Use GB18030 algorithm to find character(s) to write.
var idx = findIdx(this.gb18030.uChars, uCode);
if (idx != -1) {
var dbcsCode = this.gb18030.gbChars[idx] + (uCode - this.gb18030.uChars[idx]);
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 12600); dbcsCode = dbcsCode % 12600;
newBuf[j++] = 0x30 + Math.floor(dbcsCode / 1260); dbcsCode = dbcsCode % 1260;
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 10); dbcsCode = dbcsCode % 10;
newBuf[j++] = 0x30 + dbcsCode;
continue;
}
}
}
// 3. Write dbcsCode character.
if (dbcsCode === UNASSIGNED)
dbcsCode = this.defaultCharSingleByte;
if (dbcsCode < 0x100) {
newBuf[j++] = dbcsCode;
}
else if (dbcsCode < 0x10000) {
newBuf[j++] = dbcsCode >> 8; // high byte
newBuf[j++] = dbcsCode & 0xFF; // low byte
}
else {
newBuf[j++] = dbcsCode >> 16;
newBuf[j++] = (dbcsCode >> 8) & 0xFF;
newBuf[j++] = dbcsCode & 0xFF;
}
while (true) {
// 0. Get next character.
if (nextChar === -1) {
if (i == str.length) break
var uCode = str.charCodeAt(i++)
} else {
var uCode = nextChar
nextChar = -1
}
this.seqObj = seqObj;
this.leadSurrogate = leadSurrogate;
return newBuf.slice(0, j);
}
DBCSEncoder.prototype.end = function() {
if (this.leadSurrogate === -1 && this.seqObj === undefined)
return; // All clean. Most often case.
var newBuf = Buffer.alloc(10), j = 0;
if (this.seqObj) { // We're in the sequence.
var dbcsCode = this.seqObj[DEF_CHAR];
if (dbcsCode !== undefined) { // Write beginning of the sequence.
if (dbcsCode < 0x100) {
newBuf[j++] = dbcsCode;
}
else {
newBuf[j++] = dbcsCode >> 8; // high byte
newBuf[j++] = dbcsCode & 0xFF; // low byte
}
// 1. Handle surrogates.
if (uCode >= 0xD800 && uCode < 0xE000) { // Char is one of surrogates.
if (uCode < 0xDC00) { // We've got lead surrogate.
if (leadSurrogate === -1) {
leadSurrogate = uCode
continue
} else {
// See todo above.
leadSurrogate = uCode
// Double lead surrogate found.
uCode = UNASSIGNED
}
this.seqObj = undefined;
} else { // We've got trail surrogate.
if (leadSurrogate !== -1) {
uCode = 0x10000 + (leadSurrogate - 0xD800) * 0x400 + (uCode - 0xDC00)
leadSurrogate = -1
} else {
// Incomplete surrogate pair - only trail surrogate found.
uCode = UNASSIGNED
}
}
} else if (leadSurrogate !== -1) {
// Incomplete surrogate pair - only lead surrogate found.
nextChar = uCode; uCode = UNASSIGNED // Write an error, then current char.
leadSurrogate = -1
}
if (this.leadSurrogate !== -1) {
// Incomplete surrogate pair - only lead surrogate found.
newBuf[j++] = this.defaultCharSingleByte;
this.leadSurrogate = -1;
// 2. Convert uCode character.
var dbcsCode = UNASSIGNED
if (seqObj !== undefined && uCode != UNASSIGNED) { // We are in the middle of the sequence
var resCode = seqObj[uCode]
if (typeof resCode === "object") { // Sequence continues.
seqObj = resCode
continue
} else if (typeof resCode === "number") { // Sequence finished. Write it.
dbcsCode = resCode
} else if (resCode == undefined) { // Current character is not part of the sequence.
// Try default character for this sequence
resCode = seqObj[DEF_CHAR]
if (resCode !== undefined) {
dbcsCode = resCode // Found. Write it.
nextChar = uCode // Current character will be written too in the next iteration.
} else {
// TODO: What if we have no default? (resCode == undefined)
// Then, we should write first char of the sequence as-is and try the rest recursively.
// Didn't do it for now because no encoding has this situation yet.
// Currently, just skip the sequence and write current char.
}
}
seqObj = undefined
} else if (uCode >= 0) { // Regular character
var subtable = this.encodeTable[uCode >> 8]
if (subtable !== undefined) { dbcsCode = subtable[uCode & 0xFF] }
if (dbcsCode <= SEQ_START) { // Sequence start
seqObj = this.encodeTableSeq[SEQ_START - dbcsCode]
continue
}
if (dbcsCode == UNASSIGNED && this.gb18030) {
// Use GB18030 algorithm to find character(s) to write.
var idx = findIdx(this.gb18030.uChars, uCode)
if (idx != -1) {
var dbcsCode = this.gb18030.gbChars[idx] + (uCode - this.gb18030.uChars[idx])
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 12600); dbcsCode = dbcsCode % 12600
newBuf[j++] = 0x30 + Math.floor(dbcsCode / 1260); dbcsCode = dbcsCode % 1260
newBuf[j++] = 0x81 + Math.floor(dbcsCode / 10); dbcsCode = dbcsCode % 10
newBuf[j++] = 0x30 + dbcsCode
continue
}
}
}
return newBuf.slice(0, j);
// 3. Write dbcsCode character.
if (dbcsCode === UNASSIGNED) { dbcsCode = this.defaultCharSingleByte }
if (dbcsCode < 0x100) {
newBuf[j++] = dbcsCode
} else if (dbcsCode < 0x10000) {
newBuf[j++] = dbcsCode >> 8 // high byte
newBuf[j++] = dbcsCode & 0xFF // low byte
} else if (dbcsCode < 0x1000000) {
newBuf[j++] = dbcsCode >> 16
newBuf[j++] = (dbcsCode >> 8) & 0xFF
newBuf[j++] = dbcsCode & 0xFF
} else {
newBuf[j++] = dbcsCode >>> 24
newBuf[j++] = (dbcsCode >>> 16) & 0xFF
newBuf[j++] = (dbcsCode >>> 8) & 0xFF
newBuf[j++] = dbcsCode & 0xFF
}
}
this.seqObj = seqObj
this.leadSurrogate = leadSurrogate
return newBuf.slice(0, j)
}
DBCSEncoder.prototype.end = function () {
if (this.leadSurrogate === -1 && this.seqObj === undefined) { return } // All clean. Most often case.
var newBuf = Buffer.alloc(10); var j = 0
if (this.seqObj) { // We're in the sequence.
var dbcsCode = this.seqObj[DEF_CHAR]
if (dbcsCode !== undefined) { // Write beginning of the sequence.
if (dbcsCode < 0x100) {
newBuf[j++] = dbcsCode
} else {
newBuf[j++] = dbcsCode >> 8 // high byte
newBuf[j++] = dbcsCode & 0xFF // low byte
}
} else {
// See todo above.
}
this.seqObj = undefined
}
if (this.leadSurrogate !== -1) {
// Incomplete surrogate pair - only lead surrogate found.
newBuf[j++] = this.defaultCharSingleByte
this.leadSurrogate = -1
}
return newBuf.slice(0, j)
}
// Export for testing
DBCSEncoder.prototype.findIdx = findIdx;
DBCSEncoder.prototype.findIdx = findIdx
// == Decoder ==================================================================
function DBCSDecoder(options, codec) {
// Decoder state
this.nodeIdx = 0;
this.prevBuf = Buffer.alloc(0);
function DBCSDecoder (options, codec) {
// Decoder state
this.nodeIdx = 0
this.prevBytes = []
// Static data
this.decodeTables = codec.decodeTables;
this.decodeTableSeq = codec.decodeTableSeq;
this.defaultCharUnicode = codec.defaultCharUnicode;
this.gb18030 = codec.gb18030;
// Static data
this.decodeTables = codec.decodeTables
this.decodeTableSeq = codec.decodeTableSeq
this.defaultCharUnicode = codec.defaultCharUnicode
this.gb18030 = codec.gb18030
}
DBCSDecoder.prototype.write = function(buf) {
var newBuf = Buffer.alloc(buf.length*2),
nodeIdx = this.nodeIdx,
prevBuf = this.prevBuf, prevBufOffset = this.prevBuf.length,
seqStart = -this.prevBuf.length, // idx of the start of current parsed sequence.
uCode;
DBCSDecoder.prototype.write = function (buf) {
var newBuf = Buffer.alloc(buf.length * 2)
var nodeIdx = this.nodeIdx
var prevBytes = this.prevBytes; var prevOffset = this.prevBytes.length
var seqStart = -this.prevBytes.length // idx of the start of current parsed sequence.
var uCode
if (prevBufOffset > 0) // Make prev buf overlap a little to make it easier to slice later.
prevBuf = Buffer.concat([prevBuf, buf.slice(0, 10)]);
for (var i = 0, j = 0; i < buf.length; i++) {
var curByte = (i >= 0) ? buf[i] : prevBuf[i + prevBufOffset];
for (var i = 0, j = 0; i < buf.length; i++) {
var curByte = (i >= 0) ? buf[i] : prevBytes[i + prevOffset]
// Lookup in current trie node.
var uCode = this.decodeTables[nodeIdx][curByte];
// Lookup in current trie node.
var uCode = this.decodeTables[nodeIdx][curByte]
if (uCode >= 0) {
// Normal character, just use it.
}
else if (uCode === UNASSIGNED) { // Unknown char.
// TODO: Callback with seq.
//var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
i = seqStart; // Try to parse again, after skipping first byte of the sequence ('i' will be incremented by 'for' cycle).
uCode = this.defaultCharUnicode.charCodeAt(0);
}
else if (uCode === GB18030_CODE) {
var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
var ptr = (curSeq[0]-0x81)*12600 + (curSeq[1]-0x30)*1260 + (curSeq[2]-0x81)*10 + (curSeq[3]-0x30);
var idx = findIdx(this.gb18030.gbChars, ptr);
uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx];
}
else if (uCode <= NODE_START) { // Go to next trie node.
nodeIdx = NODE_START - uCode;
continue;
}
else if (uCode <= SEQ_START) { // Output a sequence of chars.
var seq = this.decodeTableSeq[SEQ_START - uCode];
for (var k = 0; k < seq.length - 1; k++) {
uCode = seq[k];
newBuf[j++] = uCode & 0xFF;
newBuf[j++] = uCode >> 8;
}
uCode = seq[seq.length-1];
}
else
throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte);
if (uCode >= 0) {
// Normal character, just use it.
} else if (uCode === UNASSIGNED) { // Unknown char.
// TODO: Callback with seq.
uCode = this.defaultCharUnicode.charCodeAt(0)
i = seqStart // Skip one byte ('i' will be incremented by the for loop) and try to parse again.
} else if (uCode === GB18030_CODE) {
if (i >= 3) {
var ptr = (buf[i - 3] - 0x81) * 12600 + (buf[i - 2] - 0x30) * 1260 + (buf[i - 1] - 0x81) * 10 + (curByte - 0x30)
} else {
var ptr = (prevBytes[i - 3 + prevOffset] - 0x81) * 12600 +
(((i - 2 >= 0) ? buf[i - 2] : prevBytes[i - 2 + prevOffset]) - 0x30) * 1260 +
(((i - 1 >= 0) ? buf[i - 1] : prevBytes[i - 1 + prevOffset]) - 0x81) * 10 +
(curByte - 0x30)
}
var idx = findIdx(this.gb18030.gbChars, ptr)
uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx]
} else if (uCode <= NODE_START) { // Go to next trie node.
nodeIdx = NODE_START - uCode
continue
} else if (uCode <= SEQ_START) { // Output a sequence of chars.
var seq = this.decodeTableSeq[SEQ_START - uCode]
for (var k = 0; k < seq.length - 1; k++) {
uCode = seq[k]
newBuf[j++] = uCode & 0xFF
newBuf[j++] = uCode >> 8
}
uCode = seq[seq.length - 1]
} else { throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte) }
// Write the character to buffer, handling higher planes using surrogate pair.
if (uCode > 0xFFFF) {
uCode -= 0x10000;
var uCodeLead = 0xD800 + Math.floor(uCode / 0x400);
newBuf[j++] = uCodeLead & 0xFF;
newBuf[j++] = uCodeLead >> 8;
// Write the character to buffer, handling higher planes using surrogate pair.
if (uCode >= 0x10000) {
uCode -= 0x10000
var uCodeLead = 0xD800 | (uCode >> 10)
newBuf[j++] = uCodeLead & 0xFF
newBuf[j++] = uCodeLead >> 8
uCode = 0xDC00 + uCode % 0x400;
}
newBuf[j++] = uCode & 0xFF;
newBuf[j++] = uCode >> 8;
// Reset trie node.
nodeIdx = 0; seqStart = i+1;
uCode = 0xDC00 | (uCode & 0x3FF)
}
newBuf[j++] = uCode & 0xFF
newBuf[j++] = uCode >> 8
this.nodeIdx = nodeIdx;
this.prevBuf = (seqStart >= 0) ? buf.slice(seqStart) : prevBuf.slice(seqStart + prevBufOffset);
return newBuf.slice(0, j).toString('ucs2');
// Reset trie node.
nodeIdx = 0; seqStart = i + 1
}
this.nodeIdx = nodeIdx
this.prevBytes = (seqStart >= 0)
? Array.prototype.slice.call(buf, seqStart)
: prevBytes.slice(seqStart + prevOffset).concat(Array.prototype.slice.call(buf))
return newBuf.slice(0, j).toString("ucs2")
}
DBCSDecoder.prototype.end = function() {
var ret = '';
DBCSDecoder.prototype.end = function () {
var ret = ""
// Try to parse all remaining chars.
while (this.prevBuf.length > 0) {
// Skip 1 character in the buffer.
ret += this.defaultCharUnicode;
var buf = this.prevBuf.slice(1);
// Try to parse all remaining chars.
while (this.prevBytes.length > 0) {
// Skip 1 character in the buffer.
ret += this.defaultCharUnicode
var bytesArr = this.prevBytes.slice(1)
// Parse remaining as usual.
this.prevBuf = Buffer.alloc(0);
this.nodeIdx = 0;
if (buf.length > 0)
ret += this.write(buf);
}
// Parse remaining as usual.
this.prevBytes = []
this.nodeIdx = 0
if (bytesArr.length > 0) { ret += this.write(bytesArr) }
}
this.nodeIdx = 0;
return ret;
this.prevBytes = []
this.nodeIdx = 0
return ret
}
// Binary search for GB18030. Returns largest i such that table[i] <= val.
function findIdx(table, val) {
if (table[0] > val)
return -1;
function findIdx (table, val) {
if (table[0] > val) { return -1 }
var l = 0, r = table.length;
while (l < r-1) { // always table[l] <= val < table[r]
var mid = l + Math.floor((r-l+1)/2);
if (table[mid] <= val)
l = mid;
else
r = mid;
}
return l;
var l = 0; var r = table.length
while (l < r - 1) { // always table[l] <= val < table[r]
var mid = l + ((r - l + 1) >> 1)
if (table[mid] <= val) { l = mid } else { r = mid }
}
return l
}

View File

@@ -1,176 +1,185 @@
"use strict";
"use strict"
// Description of supported double byte encodings and aliases.
// Tables are not require()-d until they are needed to speed up library load.
// require()-s are direct to support Browserify.
module.exports = {
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
// == Japanese/ShiftJIS ====================================================
// All japanese encodings are based on JIS X set of standards:
// JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF.
// JIS X 0208 - Main set of 6879 characters, placed in 94x94 plane, to be encoded by 2 bytes.
// Has several variations in 1978, 1983, 1990 and 1997.
// JIS X 0212 - Supplementary plane of 6067 chars in 94x94 plane. 1990. Effectively dead.
// JIS X 0213 - Extension and modern replacement of 0208 and 0212. Total chars: 11233.
// 2 planes, first is superset of 0208, second - revised 0212.
// Introduced in 2000, revised 2004. Some characters are in Unicode Plane 2 (0x2xxxx)
'shiftjis': {
type: '_dbcs',
table: function() { return require('./tables/shiftjis.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
encodeSkipVals: [{from: 0xED40, to: 0xF940}],
},
'csshiftjis': 'shiftjis',
'mskanji': 'shiftjis',
'sjis': 'shiftjis',
'windows31j': 'shiftjis',
'ms31j': 'shiftjis',
'xsjis': 'shiftjis',
'windows932': 'shiftjis',
'ms932': 'shiftjis',
'932': 'shiftjis',
'cp932': 'shiftjis',
// Byte encodings are:
// * Shift_JIS: Compatible with 0201, uses not defined chars in top half as lead bytes for double-byte
// encoding of 0208. Lead byte ranges: 0x81-0x9F, 0xE0-0xEF; Trail byte ranges: 0x40-0x7E, 0x80-0x9E, 0x9F-0xFC.
// Windows CP932 is a superset of Shift_JIS. Some companies added more chars, notably KDDI.
// * EUC-JP: Up to 3 bytes per character. Used mostly on *nixes.
// 0x00-0x7F - lower part of 0201
// 0x8E, 0xA1-0xDF - upper part of 0201
// (0xA1-0xFE)x2 - 0208 plane (94x94).
// 0x8F, (0xA1-0xFE)x2 - 0212 plane (94x94).
// * JIS X 208: 7-bit, direct encoding of 0208. Byte ranges: 0x21-0x7E (94 values). Uncommon.
// Used as-is in ISO2022 family.
// * ISO2022-JP: Stateful encoding, with escape sequences to switch between ASCII,
// 0201-1976 Roman, 0208-1978, 0208-1983.
// * ISO2022-JP-1: Adds esc seq for 0212-1990.
// * ISO2022-JP-2: Adds esc seq for GB2313-1980, KSX1001-1992, ISO8859-1, ISO8859-7.
// * ISO2022-JP-3: Adds esc seq for 0201-1976 Kana set, 0213-2000 Planes 1, 2.
// * ISO2022-JP-2004: Adds 0213-2004 Plane 1.
//
// After JIS X 0213 appeared, Shift_JIS-2004, EUC-JISX0213 and ISO2022-JP-2004 followed, with just changing the planes.
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
'eucjp': {
type: '_dbcs',
table: function() { return require('./tables/eucjp.json') },
encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E},
},
shiftjis: {
type: "_dbcs",
table: function () { return require("./tables/shiftjis.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E },
encodeSkipVals: [{ from: 0xED40, to: 0xF940 }]
},
csshiftjis: "shiftjis",
mskanji: "shiftjis",
sjis: "shiftjis",
windows31j: "shiftjis",
ms31j: "shiftjis",
xsjis: "shiftjis",
windows932: "shiftjis",
ms932: "shiftjis",
932: "shiftjis",
cp932: "shiftjis",
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
eucjp: {
type: "_dbcs",
table: function () { return require("./tables/eucjp.json") },
encodeAdd: { "\u00a5": 0x5C, "\u203E": 0x7E }
},
// TODO: KDDI extension to Shift_JIS
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
// We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
'gb2312': 'cp936',
'gb231280': 'cp936',
'gb23121980': 'cp936',
'csgb2312': 'cp936',
'csiso58gb231280': 'cp936',
'euccn': 'cp936',
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
gb2312: "cp936",
gb231280: "cp936",
gb23121980: "cp936",
csgb2312: "cp936",
csiso58gb231280: "cp936",
euccn: "cp936",
// Microsoft's CP936 is a subset and approximation of GBK.
'windows936': 'cp936',
'ms936': 'cp936',
'936': 'cp936',
'cp936': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json') },
},
// Microsoft's CP936 is a subset and approximation of GBK.
windows936: "cp936",
ms936: "cp936",
936: "cp936",
cp936: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json") }
},
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
'gbk': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
},
'xgbk': 'gbk',
'isoir58': 'gbk',
// GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other.
gbk: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) }
},
xgbk: "gbk",
isoir58: "gbk",
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
'gb18030': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
gb18030: function() { return require('./tables/gb18030-ranges.json') },
encodeSkipVals: [0x80],
encodeAdd: {'€': 0xA2E3},
},
// GB18030 is an algorithmic extension of GBK.
// Main source: https://www.w3.org/TR/encoding/#gbk-encoder
// http://icu-project.org/docs/papers/gb18030.html
// http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
gb18030: {
type: "_dbcs",
table: function () { return require("./tables/cp936.json").concat(require("./tables/gbk-added.json")) },
gb18030: function () { return require("./tables/gb18030-ranges.json") },
encodeSkipVals: [0x80],
encodeAdd: { "€": 0xA2E3 }
},
'chinese': 'gb18030',
chinese: "gb18030",
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
windows949: "cp949",
ms949: "cp949",
949: "cp949",
cp949: {
type: "_dbcs",
table: function () { return require("./tables/cp949.json") }
},
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
'windows949': 'cp949',
'ms949': 'cp949',
'949': 'cp949',
'cp949': {
type: '_dbcs',
table: function() { return require('./tables/cp949.json') },
},
cseuckr: "cp949",
csksc56011987: "cp949",
euckr: "cp949",
isoir149: "cp949",
korean: "cp949",
ksc56011987: "cp949",
ksc56011989: "cp949",
ksc5601: "cp949",
'cseuckr': 'cp949',
'csksc56011987': 'cp949',
'euckr': 'cp949',
'isoir149': 'cp949',
'korean': 'cp949',
'ksc56011987': 'cp949',
'ksc56011989': 'cp949',
'ksc5601': 'cp949',
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
windows950: "cp950",
ms950: "cp950",
950: "cp950",
cp950: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json") }
},
// == Big5/Taiwan/Hong Kong ================================================
// There are lots of tables for Big5 and cp950. Please see the following links for history:
// http://moztw.org/docs/big5/ http://www.haible.de/bruno/charsets/conversion-tables/Big5.html
// Variations, in roughly number of defined chars:
// * Windows CP 950: Microsoft variant of Big5. Canonical: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
// * Windows CP 951: Microsoft variant of Big5-HKSCS-2001. Seems to be never public. http://me.abelcheung.org/articles/research/what-is-cp951/
// * Big5-2003 (Taiwan standard) almost superset of cp950.
// * Unicode-at-on (UAO) / Mozilla 1.8. Falling out of use on the Web. Not supported by other browsers.
// * Big5-HKSCS (-2001, -2004, -2008). Hong Kong standard.
// many unicode code points moved from PUA to Supplementary plane (U+2XXXX) over the years.
// Plus, it has 4 combining sequences.
// Seems that Mozilla refused to support it for 10 yrs. https://bugzilla.mozilla.org/show_bug.cgi?id=162431 https://bugzilla.mozilla.org/show_bug.cgi?id=310299
// because big5-hkscs is the only encoding to include astral characters in non-algorithmic way.
// Implementations are not consistent within browsers; sometimes labeled as just big5.
// MS Internet Explorer switches from big5 to big5-hkscs when a patch applied.
// Great discussion & recap of what's going on https://bugzilla.mozilla.org/show_bug.cgi?id=912470#c31
// In the encoder, it might make sense to support encoding old PUA mappings to Big5 bytes seq-s.
// Official spec: http://www.ogcio.gov.hk/en/business/tech_promotion/ccli/terms/doc/2003cmp_2008.txt
// http://www.ogcio.gov.hk/tc/business/tech_promotion/ccli/terms/doc/hkscs-2008-big5-iso.txt
//
// Current understanding of how to deal with Big5(-HKSCS) is in the Encoding Standard, http://encoding.spec.whatwg.org/#big5-encoder
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
big5: "big5hkscs",
big5hkscs: {
type: "_dbcs",
table: function () { return require("./tables/cp950.json").concat(require("./tables/big5-added.json")) },
encodeSkipVals: [
// Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of
// https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU.
// But if a single unicode point can be encoded both as HKSCS and regular Big5, we prefer the latter.
0x8e69, 0x8e6f, 0x8e7e, 0x8eab, 0x8eb4, 0x8ecd, 0x8ed0, 0x8f57, 0x8f69, 0x8f6e, 0x8fcb, 0x8ffe,
0x906d, 0x907a, 0x90c4, 0x90dc, 0x90f1, 0x91bf, 0x92af, 0x92b0, 0x92b1, 0x92b2, 0x92d1, 0x9447, 0x94ca,
0x95d9, 0x96fc, 0x9975, 0x9b76, 0x9b78, 0x9b7b, 0x9bc6, 0x9bde, 0x9bec, 0x9bf6, 0x9c42, 0x9c53, 0x9c62,
0x9c68, 0x9c6b, 0x9c77, 0x9cbc, 0x9cbd, 0x9cd0, 0x9d57, 0x9d5a, 0x9dc4, 0x9def, 0x9dfb, 0x9ea9, 0x9eef,
0x9efd, 0x9f60, 0x9fcb, 0xa077, 0xa0dc, 0xa0df, 0x8fcc, 0x92c8, 0x9644, 0x96ed,
'windows950': 'cp950',
'ms950': 'cp950',
'950': 'cp950',
'cp950': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json') },
},
// Step 2 of https://encoding.spec.whatwg.org/#index-big5-pointer: Use last pointer for U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345
0xa2a4, 0xa2a5, 0xa2a7, 0xa2a6, 0xa2cc, 0xa2ce
]
},
// Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus.
'big5': 'big5hkscs',
'big5hkscs': {
type: '_dbcs',
table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) },
encodeSkipVals: [0xa2cc],
},
'cnbig5': 'big5hkscs',
'csbig5': 'big5hkscs',
'xxbig5': 'big5hkscs',
};
cnbig5: "big5hkscs",
csbig5: "big5hkscs",
xxbig5: "big5hkscs"
}

View File

@@ -1,22 +1,23 @@
"use strict";
"use strict"
var mergeModules = require("../lib/helpers/merge-exports")
// Update this array if you add/rename/remove files in this directory.
// We support Browserify by skipping automatic module discovery and requiring modules directly.
var modules = [
require("./internal"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data"),
];
require("./internal"),
require("./utf32"),
require("./utf16"),
require("./utf7"),
require("./sbcs-codec"),
require("./sbcs-data"),
require("./sbcs-data-generated"),
require("./dbcs-codec"),
require("./dbcs-data")
]
// Put all encoding/alias/codec definitions to single object and export it.
// Put all encoding/alias/codec definitions to single object and export it.
for (var i = 0; i < modules.length; i++) {
var module = modules[i];
for (var enc in module)
if (Object.prototype.hasOwnProperty.call(module, enc))
exports[enc] = module[enc];
var module = modules[i]
mergeModules(exports, module)
}

View File

@@ -1,188 +1,218 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Export Node.js internal encodings.
module.exports = {
// Encodings
utf8: { type: "_internal", bomAware: true},
cesu8: { type: "_internal", bomAware: true},
unicode11utf8: "utf8",
// Encodings
utf8: { type: "_internal", bomAware: true },
cesu8: { type: "_internal", bomAware: true },
unicode11utf8: "utf8",
ucs2: { type: "_internal", bomAware: true},
utf16le: "ucs2",
ucs2: { type: "_internal", bomAware: true },
utf16le: "ucs2",
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
binary: { type: "_internal" },
base64: { type: "_internal" },
hex: { type: "_internal" },
// Codec.
_internal: InternalCodec,
};
//------------------------------------------------------------------------------
function InternalCodec(codecOptions, iconv) {
this.enc = codecOptions.encodingName;
this.bomAware = codecOptions.bomAware;
if (this.enc === "base64")
this.encoder = InternalEncoderBase64;
else if (this.enc === "cesu8") {
this.enc = "utf8"; // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8;
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') {
this.decoder = InternalDecoderCesu8;
this.defaultCharUnicode = iconv.defaultCharUnicode;
}
}
// Codec.
_internal: InternalCodec
}
InternalCodec.prototype.encoder = InternalEncoder;
InternalCodec.prototype.decoder = InternalDecoder;
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
function InternalCodec (codecOptions, iconv) {
this.enc = codecOptions.encodingName
this.bomAware = codecOptions.bomAware
if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
this.enc = "utf8" // Use utf8 for decoding.
this.encoder = InternalEncoderCesu8
// Add decoder for versions of Node not supporting CESU-8
if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
this.decoder = InternalDecoderCesu8
this.defaultCharUnicode = iconv.defaultCharUnicode
}
}
}
InternalCodec.prototype.encoder = InternalEncoder
InternalCodec.prototype.decoder = InternalDecoder
// ------------------------------------------------------------------------------
// We use node.js internal decoder. Its signature is the same as ours.
var StringDecoder = require('string_decoder').StringDecoder;
var StringDecoder = require("string_decoder").StringDecoder
if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
StringDecoder.prototype.end = function() {};
function InternalDecoder(options, codec) {
StringDecoder.call(this, codec.enc);
function InternalDecoder (options, codec) {
this.decoder = new StringDecoder(codec.enc)
}
InternalDecoder.prototype = StringDecoder.prototype;
InternalDecoder.prototype.write = function (buf) {
if (!Buffer.isBuffer(buf)) {
buf = Buffer.from(buf)
}
return this.decoder.write(buf)
}
//------------------------------------------------------------------------------
InternalDecoder.prototype.end = function () {
return this.decoder.end()
}
// ------------------------------------------------------------------------------
// Encoder is mostly trivial
function InternalEncoder(options, codec) {
this.enc = codec.enc;
function InternalEncoder (options, codec) {
this.enc = codec.enc
}
InternalEncoder.prototype.write = function(str) {
return Buffer.from(str, this.enc);
InternalEncoder.prototype.write = function (str) {
return Buffer.from(str, this.enc)
}
InternalEncoder.prototype.end = function() {
InternalEncoder.prototype.end = function () {
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// Except base64 encoder, which must keep its state.
function InternalEncoderBase64(options, codec) {
this.prevStr = '';
function InternalEncoderBase64 (options, codec) {
this.prevStr = ""
}
InternalEncoderBase64.prototype.write = function(str) {
str = this.prevStr + str;
var completeQuads = str.length - (str.length % 4);
this.prevStr = str.slice(completeQuads);
str = str.slice(0, completeQuads);
InternalEncoderBase64.prototype.write = function (str) {
str = this.prevStr + str
var completeQuads = str.length - (str.length % 4)
this.prevStr = str.slice(completeQuads)
str = str.slice(0, completeQuads)
return Buffer.from(str, "base64");
return Buffer.from(str, "base64")
}
InternalEncoderBase64.prototype.end = function() {
return Buffer.from(this.prevStr, "base64");
InternalEncoderBase64.prototype.end = function () {
return Buffer.from(this.prevStr, "base64")
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// CESU-8 encoder is also special.
function InternalEncoderCesu8(options, codec) {
function InternalEncoderCesu8 (options, codec) {
}
InternalEncoderCesu8.prototype.write = function(str) {
var buf = Buffer.alloc(str.length * 3), bufIdx = 0;
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i);
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80)
buf[bufIdx++] = charCode;
else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12);
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
buf[bufIdx++] = 0x80 + (charCode & 0x3f);
}
InternalEncoderCesu8.prototype.write = function (str) {
var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
for (var i = 0; i < str.length; i++) {
var charCode = str.charCodeAt(i)
// Naive implementation, but it works because CESU-8 is especially easy
// to convert from UTF-16 (which all JS strings are encoded in).
if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
buf[bufIdx++] = 0xC0 + (charCode >>> 6)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
} else { // charCode will always be < 0x10000 in javascript.
buf[bufIdx++] = 0xE0 + (charCode >>> 12)
buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
buf[bufIdx++] = 0x80 + (charCode & 0x3f)
}
return buf.slice(0, bufIdx);
}
return buf.slice(0, bufIdx)
}
InternalEncoderCesu8.prototype.end = function() {
InternalEncoderCesu8.prototype.end = function () {
}
//------------------------------------------------------------------------------
// ------------------------------------------------------------------------------
// CESU-8 decoder is not implemented in Node v4.0+
function InternalDecoderCesu8(options, codec) {
this.acc = 0;
this.contBytes = 0;
this.accBytes = 0;
this.defaultCharUnicode = codec.defaultCharUnicode;
function InternalDecoderCesu8 (options, codec) {
this.acc = 0
this.contBytes = 0
this.accBytes = 0
this.defaultCharUnicode = codec.defaultCharUnicode
}
InternalDecoderCesu8.prototype.write = function(buf) {
var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
res = '';
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i];
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode;
contBytes = 0;
}
InternalDecoderCesu8.prototype.write = function (buf) {
var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
var res = ""
for (var i = 0; i < buf.length; i++) {
var curByte = buf[i]
if ((curByte & 0xC0) !== 0x80) { // Leading byte
if (contBytes > 0) { // Previous code is invalid
res += this.defaultCharUnicode
contBytes = 0
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte);
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F;
contBytes = 1; accBytes = 1;
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F;
contBytes = 2; accBytes = 1;
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode;
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f);
contBytes--; accBytes++;
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0)
res += this.defaultCharUnicode;
else if (accBytes === 3 && acc < 0x800)
res += this.defaultCharUnicode;
else
// Actually add character.
res += String.fromCharCode(acc);
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode;
}
if (curByte < 0x80) { // Single-byte code
res += String.fromCharCode(curByte)
} else if (curByte < 0xE0) { // Two-byte code
acc = curByte & 0x1F
contBytes = 1; accBytes = 1
} else if (curByte < 0xF0) { // Three-byte code
acc = curByte & 0x0F
contBytes = 2; accBytes = 1
} else { // Four or more are not supported for CESU-8.
res += this.defaultCharUnicode
}
} else { // Continuation byte
if (contBytes > 0) { // We're waiting for it.
acc = (acc << 6) | (curByte & 0x3f)
contBytes--; accBytes++
if (contBytes === 0) {
// Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
if (accBytes === 2 && acc < 0x80 && acc > 0) {
res += this.defaultCharUnicode
} else if (accBytes === 3 && acc < 0x800) {
res += this.defaultCharUnicode
} else {
// Actually add character.
res += String.fromCharCode(acc)
}
}
} else { // Unexpected continuation byte
res += this.defaultCharUnicode
}
}
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
return res;
}
this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
return res
}
InternalDecoderCesu8.prototype.end = function() {
var res = 0;
if (this.contBytes > 0)
res += this.defaultCharUnicode;
return res;
InternalDecoderCesu8.prototype.end = function () {
var res = 0
if (this.contBytes > 0) { res += this.defaultCharUnicode }
return res
}
// ------------------------------------------------------------------------------
// check the chunk boundaries for surrogate pair
function InternalEncoderUtf8 (options, codec) {
this.highSurrogate = ""
}
InternalEncoderUtf8.prototype.write = function (str) {
if (this.highSurrogate) {
str = this.highSurrogate + str
this.highSurrogate = ""
}
if (str.length > 0) {
var charCode = str.charCodeAt(str.length - 1)
if (charCode >= 0xd800 && charCode < 0xdc00) {
this.highSurrogate = str[str.length - 1]
str = str.slice(0, str.length - 1)
}
}
return Buffer.from(str, this.enc)
}
InternalEncoderUtf8.prototype.end = function () {
if (this.highSurrogate) {
var str = this.highSurrogate
this.highSurrogate = ""
return Buffer.from(str, this.enc)
}
}

View File

@@ -1,72 +1,75 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that
// correspond to encoded bytes (if 128 - then lower half is ASCII).
// correspond to encoded bytes (if 128 - then lower half is ASCII).
exports._sbcs = SBCSCodec;
function SBCSCodec(codecOptions, iconv) {
if (!codecOptions)
throw new Error("SBCS codec is called without the data.")
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256))
throw new Error("Encoding '"+codecOptions.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (codecOptions.chars.length === 128) {
var asciiString = "";
for (var i = 0; i < 128; i++)
asciiString += String.fromCharCode(i);
codecOptions.chars = asciiString + codecOptions.chars;
exports._sbcs = SBCSCodec
function SBCSCodec (codecOptions, iconv) {
if (!codecOptions) {
throw new Error("SBCS codec is called without the data.")
}
// Prepare char buffer for decoding.
if (!codecOptions.chars || (codecOptions.chars.length !== 128 && codecOptions.chars.length !== 256)) {
throw new Error("Encoding '" + codecOptions.type + "' has incorrect 'chars' (must be of len 128 or 256)")
}
if (codecOptions.chars.length === 128) {
var asciiString = ""
for (var i = 0; i < 128; i++) {
asciiString += String.fromCharCode(i)
}
codecOptions.chars = asciiString + codecOptions.chars
}
this.decodeBuf = Buffer.from(codecOptions.chars, 'ucs2');
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0));
this.decodeBuf = Buffer.from(codecOptions.chars, "ucs2")
for (var i = 0; i < codecOptions.chars.length; i++)
encodeBuf[codecOptions.chars.charCodeAt(i)] = i;
// Encoding buffer.
var encodeBuf = Buffer.alloc(65536, iconv.defaultCharSingleByte.charCodeAt(0))
this.encodeBuf = encodeBuf;
for (var i = 0; i < codecOptions.chars.length; i++) {
encodeBuf[codecOptions.chars.charCodeAt(i)] = i
}
this.encodeBuf = encodeBuf
}
SBCSCodec.prototype.encoder = SBCSEncoder;
SBCSCodec.prototype.decoder = SBCSDecoder;
SBCSCodec.prototype.encoder = SBCSEncoder
SBCSCodec.prototype.decoder = SBCSDecoder
function SBCSEncoder(options, codec) {
this.encodeBuf = codec.encodeBuf;
function SBCSEncoder (options, codec) {
this.encodeBuf = codec.encodeBuf
}
SBCSEncoder.prototype.write = function(str) {
var buf = Buffer.alloc(str.length);
for (var i = 0; i < str.length; i++)
buf[i] = this.encodeBuf[str.charCodeAt(i)];
return buf;
SBCSEncoder.prototype.write = function (str) {
var buf = Buffer.alloc(str.length)
for (var i = 0; i < str.length; i++) {
buf[i] = this.encodeBuf[str.charCodeAt(i)]
}
return buf
}
SBCSEncoder.prototype.end = function() {
SBCSEncoder.prototype.end = function () {
}
function SBCSDecoder(options, codec) {
this.decodeBuf = codec.decodeBuf;
function SBCSDecoder (options, codec) {
this.decodeBuf = codec.decodeBuf
}
SBCSDecoder.prototype.write = function(buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf;
var newBuf = Buffer.alloc(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = decodeBuf[idx1];
newBuf[idx2+1] = decodeBuf[idx1+1];
}
return newBuf.toString('ucs2');
SBCSDecoder.prototype.write = function (buf) {
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var decodeBuf = this.decodeBuf
var newBuf = Buffer.alloc(buf.length * 2)
var idx1 = 0; var idx2 = 0
for (var i = 0; i < buf.length; i++) {
idx1 = buf[i] * 2; idx2 = i * 2
newBuf[idx2] = decodeBuf[idx1]
newBuf[idx2 + 1] = decodeBuf[idx1 + 1]
}
return newBuf.toString("ucs2")
}
SBCSDecoder.prototype.end = function() {
SBCSDecoder.prototype.end = function () {
}

View File

@@ -1,174 +1,178 @@
"use strict";
"use strict"
// Manually added data to be used by sbcs codec in addition to generated one.
module.exports = {
// Not supported by iconv, not sure why.
"10029": "maccenteuro",
"maccenteuro": {
"type": "_sbcs",
"chars": "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
// Not supported by iconv, not sure why.
10029: "maccenteuro",
maccenteuro: {
type: "_sbcs",
chars: "ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ"
},
"808": "cp808",
"ibm808": "cp808",
"cp808": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
808: "cp808",
ibm808: "cp808",
cp808: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№€■ "
},
"mik": {
"type": "_sbcs",
"chars": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
mik: {
type: "_sbcs",
chars: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя└┴┬├─┼╣║╚╔╩╦╠═╬┐░▒▓│┤№§╗╝┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ "
},
// Aliases of generated encodings.
"ascii8bit": "ascii",
"usascii": "ascii",
"ansix34": "ascii",
"ansix341968": "ascii",
"ansix341986": "ascii",
"csascii": "ascii",
"cp367": "ascii",
"ibm367": "ascii",
"isoir6": "ascii",
"iso646us": "ascii",
"iso646irv": "ascii",
"us": "ascii",
cp720: {
type: "_sbcs",
chars: "\x80\x81éâ\x84à\x86çêëèïî\x8d\x8e\x8f\x90\u0651\u0652ô¤ـûùءآأؤ£إئابةتثجحخدذرزسشص«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ضطظعغفµقكلمنهوىي≡\u064b\u064c\u064d\u064e\u064f\u0650≈°∙·√ⁿ²■\u00a0"
},
"latin1": "iso88591",
"latin2": "iso88592",
"latin3": "iso88593",
"latin4": "iso88594",
"latin5": "iso88599",
"latin6": "iso885910",
"latin7": "iso885913",
"latin8": "iso885914",
"latin9": "iso885915",
"latin10": "iso885916",
// Aliases of generated encodings.
ascii8bit: "ascii",
usascii: "ascii",
ansix34: "ascii",
ansix341968: "ascii",
ansix341986: "ascii",
csascii: "ascii",
cp367: "ascii",
ibm367: "ascii",
isoir6: "ascii",
iso646us: "ascii",
iso646irv: "ascii",
us: "ascii",
"csisolatin1": "iso88591",
"csisolatin2": "iso88592",
"csisolatin3": "iso88593",
"csisolatin4": "iso88594",
"csisolatincyrillic": "iso88595",
"csisolatinarabic": "iso88596",
"csisolatingreek" : "iso88597",
"csisolatinhebrew": "iso88598",
"csisolatin5": "iso88599",
"csisolatin6": "iso885910",
latin1: "iso88591",
latin2: "iso88592",
latin3: "iso88593",
latin4: "iso88594",
latin5: "iso88599",
latin6: "iso885910",
latin7: "iso885913",
latin8: "iso885914",
latin9: "iso885915",
latin10: "iso885916",
"l1": "iso88591",
"l2": "iso88592",
"l3": "iso88593",
"l4": "iso88594",
"l5": "iso88599",
"l6": "iso885910",
"l7": "iso885913",
"l8": "iso885914",
"l9": "iso885915",
"l10": "iso885916",
csisolatin1: "iso88591",
csisolatin2: "iso88592",
csisolatin3: "iso88593",
csisolatin4: "iso88594",
csisolatincyrillic: "iso88595",
csisolatinarabic: "iso88596",
csisolatingreek: "iso88597",
csisolatinhebrew: "iso88598",
csisolatin5: "iso88599",
csisolatin6: "iso885910",
"isoir14": "iso646jp",
"isoir57": "iso646cn",
"isoir100": "iso88591",
"isoir101": "iso88592",
"isoir109": "iso88593",
"isoir110": "iso88594",
"isoir144": "iso88595",
"isoir127": "iso88596",
"isoir126": "iso88597",
"isoir138": "iso88598",
"isoir148": "iso88599",
"isoir157": "iso885910",
"isoir166": "tis620",
"isoir179": "iso885913",
"isoir199": "iso885914",
"isoir203": "iso885915",
"isoir226": "iso885916",
l1: "iso88591",
l2: "iso88592",
l3: "iso88593",
l4: "iso88594",
l5: "iso88599",
l6: "iso885910",
l7: "iso885913",
l8: "iso885914",
l9: "iso885915",
l10: "iso885916",
"cp819": "iso88591",
"ibm819": "iso88591",
isoir14: "iso646jp",
isoir57: "iso646cn",
isoir100: "iso88591",
isoir101: "iso88592",
isoir109: "iso88593",
isoir110: "iso88594",
isoir144: "iso88595",
isoir127: "iso88596",
isoir126: "iso88597",
isoir138: "iso88598",
isoir148: "iso88599",
isoir157: "iso885910",
isoir166: "tis620",
isoir179: "iso885913",
isoir199: "iso885914",
isoir203: "iso885915",
isoir226: "iso885916",
"cyrillic": "iso88595",
cp819: "iso88591",
ibm819: "iso88591",
"arabic": "iso88596",
"arabic8": "iso88596",
"ecma114": "iso88596",
"asmo708": "iso88596",
cyrillic: "iso88595",
"greek" : "iso88597",
"greek8" : "iso88597",
"ecma118" : "iso88597",
"elot928" : "iso88597",
arabic: "iso88596",
arabic8: "iso88596",
ecma114: "iso88596",
asmo708: "iso88596",
"hebrew": "iso88598",
"hebrew8": "iso88598",
greek: "iso88597",
greek8: "iso88597",
ecma118: "iso88597",
elot928: "iso88597",
"turkish": "iso88599",
"turkish8": "iso88599",
hebrew: "iso88598",
hebrew8: "iso88598",
"thai": "iso885911",
"thai8": "iso885911",
turkish: "iso88599",
turkish8: "iso88599",
"celtic": "iso885914",
"celtic8": "iso885914",
"isoceltic": "iso885914",
thai: "iso885911",
thai8: "iso885911",
"tis6200": "tis620",
"tis62025291": "tis620",
"tis62025330": "tis620",
celtic: "iso885914",
celtic8: "iso885914",
isoceltic: "iso885914",
"10000": "macroman",
"10006": "macgreek",
"10007": "maccyrillic",
"10079": "maciceland",
"10081": "macturkish",
tis6200: "tis620",
tis62025291: "tis620",
tis62025330: "tis620",
"cspc8codepage437": "cp437",
"cspc775baltic": "cp775",
"cspc850multilingual": "cp850",
"cspcp852": "cp852",
"cspc862latinhebrew": "cp862",
"cpgr": "cp869",
10000: "macroman",
10006: "macgreek",
10007: "maccyrillic",
10079: "maciceland",
10081: "macturkish",
"msee": "cp1250",
"mscyrl": "cp1251",
"msansi": "cp1252",
"msgreek": "cp1253",
"msturk": "cp1254",
"mshebr": "cp1255",
"msarab": "cp1256",
"winbaltrim": "cp1257",
cspc8codepage437: "cp437",
cspc775baltic: "cp775",
cspc850multilingual: "cp850",
cspcp852: "cp852",
cspc862latinhebrew: "cp862",
cpgr: "cp869",
"cp20866": "koi8r",
"20866": "koi8r",
"ibm878": "koi8r",
"cskoi8r": "koi8r",
msee: "cp1250",
mscyrl: "cp1251",
msansi: "cp1252",
msgreek: "cp1253",
msturk: "cp1254",
mshebr: "cp1255",
msarab: "cp1256",
winbaltrim: "cp1257",
"cp21866": "koi8u",
"21866": "koi8u",
"ibm1168": "koi8u",
cp20866: "koi8r",
20866: "koi8r",
ibm878: "koi8r",
cskoi8r: "koi8r",
"strk10482002": "rk1048",
cp21866: "koi8u",
21866: "koi8u",
ibm1168: "koi8u",
"tcvn5712": "tcvn",
"tcvn57121": "tcvn",
strk10482002: "rk1048",
"gb198880": "iso646cn",
"cn": "iso646cn",
tcvn5712: "tcvn",
tcvn57121: "tcvn",
"csiso14jisc6220ro": "iso646jp",
"jisc62201969ro": "iso646jp",
"jp": "iso646jp",
gb198880: "iso646cn",
cn: "iso646cn",
"cshproman8": "hproman8",
"r8": "hproman8",
"roman8": "hproman8",
"xroman8": "hproman8",
"ibm1051": "hproman8",
csiso14jisc6220ro: "iso646jp",
jisc62201969ro: "iso646jp",
jp: "iso646jp",
"mac": "macintosh",
"csmacintosh": "macintosh",
};
cshproman8: "hproman8",
r8: "hproman8",
roman8: "hproman8",
xroman8: "hproman8",
ibm1051: "hproman8",
mac: "macintosh",
csmacintosh: "macintosh"
}

View File

@@ -27,7 +27,7 @@
["a7c2","",14],
["a7f2","",12],
["a896","",10],
["a8bc",""],
["a8bc","ḿ"],
["a8bf","ǹ"],
["a8c1",""],
["a8ea","",20],
@@ -51,5 +51,6 @@
["fca1","",93],
["fda1","",93],
["fe50","⺁⺄㑳㑇⺈⺋㖞㘚㘎⺌⺗㥮㤘㧏㧟㩳㧐㭎㱮㳠⺧⺪䁖䅟⺮䌷⺳⺶⺷䎱䎬⺻䏝䓖䙡䙌"],
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93]
["fe80","䜣䜩䝼䞍⻊䥇䥺䥽䦂䦃䦅䦆䦟䦛䦷䦶䲣䲟䲠䲡䱷䲢䴓",6,"䶮",93],
["8135f437",""]
]

View File

@@ -1,69 +1,66 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
// == UTF16-BE codec. ==========================================================
exports.utf16be = Utf16BECodec;
function Utf16BECodec() {
exports.utf16be = Utf16BECodec
function Utf16BECodec () {
}
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
Utf16BECodec.prototype.bomAware = true;
Utf16BECodec.prototype.encoder = Utf16BEEncoder
Utf16BECodec.prototype.decoder = Utf16BEDecoder
Utf16BECodec.prototype.bomAware = true
// -- Encoding
function Utf16BEEncoder() {
function Utf16BEEncoder () {
}
Utf16BEEncoder.prototype.write = function(str) {
var buf = Buffer.from(str, 'ucs2');
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
}
return buf;
Utf16BEEncoder.prototype.write = function (str) {
var buf = Buffer.from(str, "ucs2")
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i + 1]; buf[i + 1] = tmp
}
return buf
}
Utf16BEEncoder.prototype.end = function() {
Utf16BEEncoder.prototype.end = function () {
}
// -- Decoding
function Utf16BEDecoder() {
this.overflowByte = -1;
function Utf16BEDecoder () {
this.overflowByte = -1
}
Utf16BEDecoder.prototype.write = function(buf) {
if (buf.length == 0)
return '';
Utf16BEDecoder.prototype.write = function (buf) {
if (buf.length == 0) { return "" }
var buf2 = Buffer.alloc(buf.length + 1),
i = 0, j = 0;
var buf2 = Buffer.alloc(buf.length + 1)
var i = 0; var j = 0
if (this.overflowByte !== -1) {
buf2[0] = buf[0];
buf2[1] = this.overflowByte;
i = 1; j = 2;
}
if (this.overflowByte !== -1) {
buf2[0] = buf[0]
buf2[1] = this.overflowByte
i = 1; j = 2
}
for (; i < buf.length-1; i += 2, j+= 2) {
buf2[j] = buf[i+1];
buf2[j+1] = buf[i];
}
for (; i < buf.length - 1; i += 2, j += 2) {
buf2[j] = buf[i + 1]
buf2[j + 1] = buf[i]
}
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
this.overflowByte = (i == buf.length - 1) ? buf[buf.length - 1] : -1
return buf2.slice(0, j).toString('ucs2');
return buf2.slice(0, j).toString("ucs2")
}
Utf16BEDecoder.prototype.end = function() {
Utf16BEDecoder.prototype.end = function () {
this.overflowByte = -1
}
// == UTF-16 codec =============================================================
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
// Defaults to UTF-16LE, as it's prevalent and default in Node.
@@ -72,106 +69,119 @@ Utf16BEDecoder.prototype.end = function() {
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
exports.utf16 = Utf16Codec;
function Utf16Codec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf16 = Utf16Codec
function Utf16Codec (codecOptions, iconv) {
this.iconv = iconv
}
Utf16Codec.prototype.encoder = Utf16Encoder;
Utf16Codec.prototype.decoder = Utf16Decoder;
Utf16Codec.prototype.encoder = Utf16Encoder
Utf16Codec.prototype.decoder = Utf16Decoder
// -- Encoding (pass-through)
function Utf16Encoder(options, codec) {
options = options || {};
if (options.addBOM === undefined)
options.addBOM = true;
this.encoder = codec.iconv.getEncoder('utf-16le', options);
function Utf16Encoder (options, codec) {
options = options || {}
if (options.addBOM === undefined) { options.addBOM = true }
this.encoder = codec.iconv.getEncoder("utf-16le", options)
}
Utf16Encoder.prototype.write = function(str) {
return this.encoder.write(str);
Utf16Encoder.prototype.write = function (str) {
return this.encoder.write(str)
}
Utf16Encoder.prototype.end = function() {
return this.encoder.end();
Utf16Encoder.prototype.end = function () {
return this.encoder.end()
}
// -- Decoding
function Utf16Decoder(options, codec) {
this.decoder = null;
this.initialBytes = [];
this.initialBytesLen = 0;
function Utf16Decoder (options, codec) {
this.decoder = null
this.initialBufs = []
this.initialBufsLen = 0
this.options = options || {};
this.iconv = codec.iconv;
this.options = options || {}
this.iconv = codec.iconv
}
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBytes.push(buf);
this.initialBytesLen += buf.length;
if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
return '';
Utf16Decoder.prototype.write = function (buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf)
this.initialBufsLen += buf.length
// We have enough bytes -> detect endianness.
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
this.initialBytes.length = this.initialBytesLen = 0;
}
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
{ return "" }
return this.decoder.write(buf);
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.write(buf)
}
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
var buf = Buffer.concat(this.initialBytes),
encoding = detectEncoding(buf, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
Utf16Decoder.prototype.end = function () {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding)
this.decoder = this.iconv.getDecoder(encoding, this.options)
var res = this.decoder.write(buf),
trail = this.decoder.end();
var resStr = ""
for (var i = 0; i < this.initialBufs.length; i++) { resStr += this.decoder.write(this.initialBufs[i]) }
return trail ? (res + trail) : res;
}
return this.decoder.end();
var trail = this.decoder.end()
if (trail) { resStr += trail }
this.initialBufs.length = this.initialBufsLen = 0
return resStr
}
return this.decoder.end()
}
function detectEncoding(buf, defaultEncoding) {
var enc = defaultEncoding || 'utf-16le';
function detectEncoding (bufs, defaultEncoding) {
var b = []
var charsProcessed = 0
// Number of ASCII chars when decoded as LE or BE.
var asciiCharsLE = 0
var asciiCharsBE = 0
if (buf.length >= 2) {
// Check BOM.
if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
enc = 'utf-16be';
else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
enc = 'utf-16le';
else {
// No BOM found. Try to deduce encoding from initial content.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
for (var i = 0; i < _len; i += 2) {
if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
}
if (asciiCharsBE > asciiCharsLE)
enc = 'utf-16be';
else if (asciiCharsBE < asciiCharsLE)
enc = 'utf-16le';
outerLoop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i]
for (var j = 0; j < buf.length; j++) {
b.push(buf[j])
if (b.length === 2) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE) return "utf-16le"
if (b[0] === 0xFE && b[1] === 0xFF) return "utf-16be"
}
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++
b.length = 0
charsProcessed++
if (charsProcessed >= 100) {
break outerLoop
}
}
}
}
return enc;
// Make decisions.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
if (asciiCharsBE > asciiCharsLE) return "utf-16be"
if (asciiCharsBE < asciiCharsLE) return "utf-16le"
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || "utf-16le"
}

View File

@@ -1,122 +1,122 @@
"use strict";
var Buffer = require("safer-buffer").Buffer;
"use strict"
var Buffer = require("safer-buffer").Buffer
// UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
// See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
exports.utf7 = Utf7Codec;
exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
function Utf7Codec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf7 = Utf7Codec
exports.unicode11utf7 = "utf7" // Alias UNICODE-1-1-UTF-7
function Utf7Codec (codecOptions, iconv) {
this.iconv = iconv
};
Utf7Codec.prototype.encoder = Utf7Encoder;
Utf7Codec.prototype.decoder = Utf7Decoder;
Utf7Codec.prototype.bomAware = true;
Utf7Codec.prototype.encoder = Utf7Encoder
Utf7Codec.prototype.decoder = Utf7Decoder
Utf7Codec.prototype.bomAware = true
// -- Encoding
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
// Why scape ()?./?
// eslint-disable-next-line no-useless-escape
var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g
function Utf7Encoder(options, codec) {
this.iconv = codec.iconv;
function Utf7Encoder (options, codec) {
this.iconv = codec.iconv
}
Utf7Encoder.prototype.write = function(str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function(chunk) {
return "+" + (chunk === '+' ? '' :
this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
+ "-";
}.bind(this)));
Utf7Encoder.prototype.write = function (str) {
// Naive implementation.
// Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
return Buffer.from(str.replace(nonDirectChars, function (chunk) {
return "+" + (chunk === "+"
? ""
: this.iconv.encode(chunk, "utf16-be").toString("base64").replace(/=+$/, "")) +
"-"
}.bind(this)))
}
Utf7Encoder.prototype.end = function() {
Utf7Encoder.prototype.end = function () {
}
// -- Decoding
function Utf7Decoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
function Utf7Decoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
}
var base64Regex = /[A-Za-z0-9\/+]/;
var base64Chars = [];
for (var i = 0; i < 256; i++)
base64Chars[i] = base64Regex.test(String.fromCharCode(i));
// Why scape /?
// eslint-disable-next-line no-useless-escape
var base64Regex = /[A-Za-z0-9\/+]/
var base64Chars = []
for (var i = 0; i < 256; i++) { base64Chars[i] = base64Regex.test(String.fromCharCode(i)) }
var plusChar = '+'.charCodeAt(0),
minusChar = '-'.charCodeAt(0),
andChar = '&'.charCodeAt(0);
var plusChar = "+".charCodeAt(0)
var minusChar = "-".charCodeAt(0)
var andChar = "&".charCodeAt(0)
Utf7Decoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
Utf7Decoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
// The decoder is more involved as we must handle chunks in stream.
// The decoder is more involved as we must handle chunks in stream.
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
res += "+";
} else {
var b64str = base64Accum + buf.slice(lastI, i).toString();
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '+'
if (buf[i] == plusChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64Chars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "+-" -> "+"
res += "+"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
if (buf[i] != minusChar) // Minus is absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + buf.slice(lastI).toString();
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii")
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
this.inBase64 = inBase64
this.base64Accum = base64Accum
return res;
return res
}
Utf7Decoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
Utf7Decoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
this.inBase64 = false;
this.base64Accum = '';
return res;
this.inBase64 = false
this.base64Accum = ""
return res
}
// UTF-7-IMAP codec.
// RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
// Differences:
@@ -128,163 +128,156 @@ Utf7Decoder.prototype.end = function() {
// * String must end in non-shifted position.
// * "-&" while in base64 is not allowed.
exports.utf7imap = Utf7IMAPCodec;
function Utf7IMAPCodec(codecOptions, iconv) {
this.iconv = iconv;
exports.utf7imap = Utf7IMAPCodec
function Utf7IMAPCodec (codecOptions, iconv) {
this.iconv = iconv
};
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
Utf7IMAPCodec.prototype.bomAware = true;
Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder
Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder
Utf7IMAPCodec.prototype.bomAware = true
// -- Encoding
function Utf7IMAPEncoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = Buffer.alloc(6);
this.base64AccumIdx = 0;
function Utf7IMAPEncoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = Buffer.alloc(6)
this.base64AccumIdx = 0
}
Utf7IMAPEncoder.prototype.write = function(str) {
var inBase64 = this.inBase64,
base64Accum = this.base64Accum,
base64AccumIdx = this.base64AccumIdx,
buf = Buffer.alloc(str.length*5 + 10), bufIdx = 0;
Utf7IMAPEncoder.prototype.write = function (str) {
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
var base64AccumIdx = this.base64AccumIdx
var buf = Buffer.alloc(str.length * 5 + 10); var bufIdx = 0
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i);
if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
base64AccumIdx = 0;
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
inBase64 = false;
}
if (!inBase64) {
buf[bufIdx++] = uChar; // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
buf[bufIdx++] = minusChar;
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
inBase64 = true;
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8;
base64Accum[base64AccumIdx++] = uChar & 0xFF;
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
base64AccumIdx = 0;
}
}
}
}
this.inBase64 = inBase64;
this.base64AccumIdx = base64AccumIdx;
return buf.slice(0, bufIdx);
}
Utf7IMAPEncoder.prototype.end = function() {
var buf = Buffer.alloc(10), bufIdx = 0;
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
this.base64AccumIdx = 0;
for (var i = 0; i < str.length; i++) {
var uChar = str.charCodeAt(i)
if (uChar >= 0x20 && uChar <= 0x7E) { // Direct character or '&'.
if (inBase64) {
if (base64AccumIdx > 0) {
bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
base64AccumIdx = 0
}
buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
this.inBase64 = false;
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
inBase64 = false
}
return buf.slice(0, bufIdx);
if (!inBase64) {
buf[bufIdx++] = uChar // Write direct character
if (uChar === andChar) // Ampersand -> '&-'
{ buf[bufIdx++] = minusChar }
}
} else { // Non-direct character
if (!inBase64) {
buf[bufIdx++] = andChar // Write '&', then go to base64 mode.
inBase64 = true
}
if (inBase64) {
base64Accum[base64AccumIdx++] = uChar >> 8
base64Accum[base64AccumIdx++] = uChar & 0xFF
if (base64AccumIdx == base64Accum.length) {
bufIdx += buf.write(base64Accum.toString("base64").replace(/\//g, ","), bufIdx)
base64AccumIdx = 0
}
}
}
}
this.inBase64 = inBase64
this.base64AccumIdx = base64AccumIdx
return buf.slice(0, bufIdx)
}
Utf7IMAPEncoder.prototype.end = function () {
var buf = Buffer.alloc(10); var bufIdx = 0
if (this.inBase64) {
if (this.base64AccumIdx > 0) {
bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
this.base64AccumIdx = 0
}
buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
this.inBase64 = false
}
return buf.slice(0, bufIdx)
}
// -- Decoding
function Utf7IMAPDecoder(options, codec) {
this.iconv = codec.iconv;
this.inBase64 = false;
this.base64Accum = '';
function Utf7IMAPDecoder (options, codec) {
this.iconv = codec.iconv
this.inBase64 = false
this.base64Accum = ""
}
var base64IMAPChars = base64Chars.slice();
base64IMAPChars[','.charCodeAt(0)] = true;
var base64IMAPChars = base64Chars.slice()
base64IMAPChars[",".charCodeAt(0)] = true
Utf7IMAPDecoder.prototype.write = function(buf) {
var res = "", lastI = 0,
inBase64 = this.inBase64,
base64Accum = this.base64Accum;
Utf7IMAPDecoder.prototype.write = function (buf) {
var res = ""; var lastI = 0
var inBase64 = this.inBase64
var base64Accum = this.base64Accum
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
// The decoder is more involved as we must handle chunks in stream.
// It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
lastI = i+1;
inBase64 = true;
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&";
} else {
var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
i--;
lastI = i+1;
inBase64 = false;
base64Accum = '';
}
for (var i = 0; i < buf.length; i++) {
if (!inBase64) { // We're in direct mode.
// Write direct chars until '&'
if (buf[i] == andChar) {
res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
lastI = i + 1
inBase64 = true
}
} else { // We decode base64.
if (!base64IMAPChars[buf[i]]) { // Base64 ended.
if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
res += "&"
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, "/")
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
if (buf[i] != minusChar) // Minus may be absorbed after base64.
{ i-- }
lastI = i + 1
inBase64 = false
base64Accum = ""
}
}
}
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
} else {
var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
if (!inBase64) {
res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
} else {
var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, "/")
var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded);
var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
b64str = b64str.slice(0, canBeDecoded)
res += this.iconv.decode(Buffer.from(b64str, 'base64'), "utf16-be");
}
res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
}
this.inBase64 = inBase64;
this.base64Accum = base64Accum;
this.inBase64 = inBase64
this.base64Accum = base64Accum
return res;
return res
}
Utf7IMAPDecoder.prototype.end = function() {
var res = "";
if (this.inBase64 && this.base64Accum.length > 0)
res = this.iconv.decode(Buffer.from(this.base64Accum, 'base64'), "utf16-be");
Utf7IMAPDecoder.prototype.end = function () {
var res = ""
if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
this.inBase64 = false;
this.base64Accum = '';
return res;
this.inBase64 = false
this.base64Accum = ""
return res
}

View File

@@ -1,52 +1,48 @@
"use strict";
"use strict"
var BOMChar = '\uFEFF';
var BOMChar = "\uFEFF"
exports.PrependBOM = PrependBOMWrapper
function PrependBOMWrapper(encoder, options) {
this.encoder = encoder;
this.addBOM = true;
function PrependBOMWrapper (encoder, options) {
this.encoder = encoder
this.addBOM = true
}
PrependBOMWrapper.prototype.write = function(str) {
if (this.addBOM) {
str = BOMChar + str;
this.addBOM = false;
}
PrependBOMWrapper.prototype.write = function (str) {
if (this.addBOM) {
str = BOMChar + str
this.addBOM = false
}
return this.encoder.write(str);
return this.encoder.write(str)
}
PrependBOMWrapper.prototype.end = function() {
return this.encoder.end();
PrependBOMWrapper.prototype.end = function () {
return this.encoder.end()
}
// ------------------------------------------------------------------------------
//------------------------------------------------------------------------------
exports.StripBOM = StripBOMWrapper;
function StripBOMWrapper(decoder, options) {
this.decoder = decoder;
this.pass = false;
this.options = options || {};
exports.StripBOM = StripBOMWrapper
function StripBOMWrapper (decoder, options) {
this.decoder = decoder
this.pass = false
this.options = options || {}
}
StripBOMWrapper.prototype.write = function(buf) {
var res = this.decoder.write(buf);
if (this.pass || !res)
return res;
StripBOMWrapper.prototype.write = function (buf) {
var res = this.decoder.write(buf)
if (this.pass || !res) { return res }
if (res[0] === BOMChar) {
res = res.slice(1);
if (typeof this.options.stripBOM === 'function')
this.options.stripBOM();
}
if (res[0] === BOMChar) {
res = res.slice(1)
if (typeof this.options.stripBOM === "function") { this.options.stripBOM() }
}
this.pass = true;
return res;
this.pass = true
return res
}
StripBOMWrapper.prototype.end = function() {
return this.decoder.end();
StripBOMWrapper.prototype.end = function () {
return this.decoder.end()
}

View File

@@ -6,15 +6,22 @@
*--------------------------------------------------------------------------------------------*/
declare module 'iconv-lite' {
export function decode(buffer: Buffer, encoding: string, options?: Options): string;
// Basic API
export function decode(buffer: Buffer | Uint8Array, encoding: string, options?: Options): string;
export function encode(content: string, encoding: string, options?: Options): Buffer;
export function encodingExists(encoding: string): boolean;
// Stream API
export function decodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;
export function encodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;
// Low-level stream APIs
export function getEncoder(encoding: string, options?: Options): EncoderStream;
export function getDecoder(encoding: string, options?: Options): DecoderStream;
}
export interface Options {
@@ -22,3 +29,13 @@ export interface Options {
addBOM?: boolean;
defaultEncoding?: string;
}
export interface EncoderStream {
write(str: string): Buffer;
end(): Buffer | undefined;
}
export interface DecoderStream {
write(buf: Buffer): string;
end(): string | undefined;
}

232
node_modules/iconv-lite/lib/index.js generated vendored
View File

@@ -1,153 +1,183 @@
"use strict";
"use strict"
// Some environments don't have global Buffer (e.g. React Native).
// Solution would be installing npm modules "buffer" and "stream" explicitly.
var Buffer = require("safer-buffer").Buffer;
var Buffer = require("safer-buffer").Buffer
var bomHandling = require("./bom-handling"),
iconv = module.exports;
var bomHandling = require("./bom-handling")
var mergeModules = require("./helpers/merge-exports")
var iconv = module.exports
// All codecs and aliases are kept here, keyed by encoding name/alias.
// They are lazy loaded in `iconv.getCodec` from `encodings/index.js`.
iconv.encodings = null;
// Cannot initialize with { __proto__: null } because Boolean({ __proto__: null }) === true
iconv.encodings = null
// Characters emitted in case of error.
iconv.defaultCharUnicode = '<27>';
iconv.defaultCharSingleByte = '?';
iconv.defaultCharUnicode = "<22>"
iconv.defaultCharSingleByte = "?"
// Public API.
iconv.encode = function encode(str, encoding, options) {
str = "" + (str || ""); // Ensure string.
iconv.encode = function encode (str, encoding, options) {
str = "" + (str || "") // Ensure string.
var encoder = iconv.getEncoder(encoding, options);
var encoder = iconv.getEncoder(encoding, options)
var res = encoder.write(str);
var trail = encoder.end();
return (trail && trail.length > 0) ? Buffer.concat([res, trail]) : res;
var res = encoder.write(str)
var trail = encoder.end()
return (trail && trail.length > 0) ? Buffer.concat([res, trail]) : res
}
iconv.decode = function decode(buf, encoding, options) {
if (typeof buf === 'string') {
if (!iconv.skipDecodeWarning) {
console.error('Iconv-lite warning: decode()-ing strings is deprecated. Refer to https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding');
iconv.skipDecodeWarning = true;
}
buf = Buffer.from("" + (buf || ""), "binary"); // Ensure buffer.
iconv.decode = function decode (buf, encoding, options) {
if (typeof buf === "string") {
if (!iconv.skipDecodeWarning) {
console.error("Iconv-lite warning: decode()-ing strings is deprecated. Refer to https://github.com/ashtuchkin/iconv-lite/wiki/Use-Buffers-when-decoding")
iconv.skipDecodeWarning = true
}
var decoder = iconv.getDecoder(encoding, options);
buf = Buffer.from("" + (buf || ""), "binary") // Ensure buffer.
}
var res = decoder.write(buf);
var trail = decoder.end();
var decoder = iconv.getDecoder(encoding, options)
return trail ? (res + trail) : res;
var res = decoder.write(buf)
var trail = decoder.end()
return trail ? (res + trail) : res
}
iconv.encodingExists = function encodingExists(enc) {
try {
iconv.getCodec(enc);
return true;
} catch (e) {
return false;
}
iconv.encodingExists = function encodingExists (enc) {
try {
iconv.getCodec(enc)
return true
} catch (e) {
return false
}
}
// Legacy aliases to convert functions
iconv.toEncoding = iconv.encode;
iconv.fromEncoding = iconv.decode;
iconv.toEncoding = iconv.encode
iconv.fromEncoding = iconv.decode
// Search for a codec in iconv.encodings. Cache codec data in iconv._codecDataCache.
iconv._codecDataCache = {};
iconv.getCodec = function getCodec(encoding) {
if (!iconv.encodings)
iconv.encodings = require("../encodings"); // Lazy load all encoding definitions.
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
var enc = iconv._canonicalizeEncoding(encoding);
iconv._codecDataCache = { __proto__: null }
// Traverse iconv.encodings to find actual codec.
var codecOptions = {};
while (true) {
var codec = iconv._codecDataCache[enc];
if (codec)
return codec;
iconv.getCodec = function getCodec (encoding) {
if (!iconv.encodings) {
var raw = require("../encodings")
// TODO: In future versions when old nodejs support is removed can use object.assign
iconv.encodings = { __proto__: null } // Initialize as empty object.
mergeModules(iconv.encodings, raw)
}
var codecDef = iconv.encodings[enc];
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
var enc = iconv._canonicalizeEncoding(encoding)
switch (typeof codecDef) {
case "string": // Direct alias to other encoding.
enc = codecDef;
break;
// Traverse iconv.encodings to find actual codec.
var codecOptions = {}
while (true) {
var codec = iconv._codecDataCache[enc]
case "object": // Alias with options. Can be layered.
for (var key in codecDef)
codecOptions[key] = codecDef[key];
if (codec) { return codec }
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
enc = codecDef.type;
break;
var codecDef = iconv.encodings[enc]
case "function": // Codec itself.
if (!codecOptions.encodingName)
codecOptions.encodingName = enc;
switch (typeof codecDef) {
case "string": // Direct alias to other encoding.
enc = codecDef
break
// The codec function must load all tables and return object with .encoder and .decoder methods.
// It'll be called only once (for each different options object).
codec = new codecDef(codecOptions, iconv);
case "object": // Alias with options. Can be layered.
for (var key in codecDef) { codecOptions[key] = codecDef[key] }
iconv._codecDataCache[codecOptions.encodingName] = codec; // Save it to be reused later.
return codec;
if (!codecOptions.encodingName) { codecOptions.encodingName = enc }
default:
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
}
enc = codecDef.type
break
case "function": // Codec itself.
if (!codecOptions.encodingName) { codecOptions.encodingName = enc }
// The codec function must load all tables and return object with .encoder and .decoder methods.
// It'll be called only once (for each different options object).
//
codec = new codecDef(codecOptions, iconv)
iconv._codecDataCache[codecOptions.encodingName] = codec // Save it to be reused later.
return codec
default:
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '" + enc + "')")
}
}
}
iconv._canonicalizeEncoding = function(encoding) {
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
return (''+encoding).toLowerCase().replace(/:\d{4}$|[^0-9a-z]/g, "");
iconv._canonicalizeEncoding = function (encoding) {
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
return ("" + encoding).toLowerCase().replace(/:\d{4}$|[^0-9a-z]/g, "")
}
iconv.getEncoder = function getEncoder(encoding, options) {
var codec = iconv.getCodec(encoding),
encoder = new codec.encoder(options, codec);
iconv.getEncoder = function getEncoder (encoding, options) {
var codec = iconv.getCodec(encoding)
var encoder = new codec.encoder(options, codec)
if (codec.bomAware && options && options.addBOM)
encoder = new bomHandling.PrependBOM(encoder, options);
if (codec.bomAware && options && options.addBOM) { encoder = new bomHandling.PrependBOM(encoder, options) }
return encoder;
return encoder
}
iconv.getDecoder = function getDecoder(encoding, options) {
var codec = iconv.getCodec(encoding),
decoder = new codec.decoder(options, codec);
iconv.getDecoder = function getDecoder (encoding, options) {
var codec = iconv.getCodec(encoding)
var decoder = new codec.decoder(options, codec)
if (codec.bomAware && !(options && options.stripBOM === false))
decoder = new bomHandling.StripBOM(decoder, options);
if (codec.bomAware && !(options && options.stripBOM === false)) { decoder = new bomHandling.StripBOM(decoder, options) }
return decoder;
return decoder
}
// Streaming API
// NOTE: Streaming API naturally depends on 'stream' module from Node.js. Unfortunately in browser environments this module can add
// up to 100Kb to the output bundle. To avoid unnecessary code bloat, we don't enable Streaming API in browser by default.
// If you would like to enable it explicitly, please add the following code to your app:
// > iconv.enableStreamingAPI(require('stream'));
iconv.enableStreamingAPI = function enableStreamingAPI (streamModule) {
if (iconv.supportsStreams) { return }
// Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json.
var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node;
if (nodeVer) {
// Dependency-inject stream module to create IconvLite stream classes.
var streams = require("./streams")(streamModule)
// Load streaming support in Node v0.10+
var nodeVerArr = nodeVer.split(".").map(Number);
if (nodeVerArr[0] > 0 || nodeVerArr[1] >= 10) {
require("./streams")(iconv);
}
// Not public API yet, but expose the stream classes.
iconv.IconvLiteEncoderStream = streams.IconvLiteEncoderStream
iconv.IconvLiteDecoderStream = streams.IconvLiteDecoderStream
// Load Node primitive extensions.
require("./extend-node")(iconv);
// Streaming API.
iconv.encodeStream = function encodeStream (encoding, options) {
return new iconv.IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options)
}
iconv.decodeStream = function decodeStream (encoding, options) {
return new iconv.IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options)
}
iconv.supportsStreams = true
}
if ("Ā" != "\u0100") {
console.error("iconv-lite warning: javascript files use encoding different from utf-8. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info.");
// Enable Streaming API automatically if 'stream' module is available and non-empty (the majority of environments).
var streamModule
try {
streamModule = require("stream")
} catch (e) {}
if (streamModule && streamModule.Transform) {
iconv.enableStreamingAPI(streamModule)
} else {
// In rare cases where 'stream' module is not available by default, throw a helpful exception.
iconv.encodeStream = iconv.decodeStream = function () {
throw new Error("iconv-lite Streaming API is not enabled. Use iconv.enableStreamingAPI(require('stream')); to enable it.")
}
}
// Some environments, such as browsers, may not load JavaScript files as UTF-8
// eslint-disable-next-line no-constant-condition
if ("Ā" !== "\u0100") {
console.error("iconv-lite warning: js files use non-utf8 encoding. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info.")
}

View File

@@ -1,121 +1,105 @@
"use strict";
"use strict"
var Buffer = require("buffer").Buffer,
Transform = require("stream").Transform;
var Buffer = require("safer-buffer").Buffer
// NOTE: Due to 'stream' module being pretty large (~100Kb, significant in browser environments),
// we opt to dependency-inject it instead of creating a hard dependency.
module.exports = function (streamModule) {
var Transform = streamModule.Transform
// == Exports ==================================================================
module.exports = function(iconv) {
// Additional Public API.
iconv.encodeStream = function encodeStream(encoding, options) {
return new IconvLiteEncoderStream(iconv.getEncoder(encoding, options), options);
}
// == Encoder stream =======================================================
iconv.decodeStream = function decodeStream(encoding, options) {
return new IconvLiteDecoderStream(iconv.getDecoder(encoding, options), options);
}
function IconvLiteEncoderStream (conv, options) {
this.conv = conv
options = options || {}
options.decodeStrings = false // We accept only strings, so we don't need to decode them.
Transform.call(this, options)
}
iconv.supportsStreams = true;
// Not published yet.
iconv.IconvLiteEncoderStream = IconvLiteEncoderStream;
iconv.IconvLiteDecoderStream = IconvLiteDecoderStream;
iconv._collect = IconvLiteDecoderStream.prototype.collect;
};
// == Encoder stream =======================================================
function IconvLiteEncoderStream(conv, options) {
this.conv = conv;
options = options || {};
options.decodeStrings = false; // We accept only strings, so we don't need to decode them.
Transform.call(this, options);
}
IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, {
IconvLiteEncoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteEncoderStream }
});
})
IconvLiteEncoderStream.prototype._transform = function (chunk, encoding, done) {
if (typeof chunk !== "string") {
return done(new Error("Iconv encoding stream needs strings as its input."))
}
IconvLiteEncoderStream.prototype._transform = function(chunk, encoding, done) {
if (typeof chunk != 'string')
return done(new Error("Iconv encoding stream needs strings as its input."));
try {
var res = this.conv.write(chunk);
if (res && res.length) this.push(res);
done();
var res = this.conv.write(chunk)
if (res && res.length) this.push(res)
done()
} catch (e) {
done(e)
}
catch (e) {
done(e);
}
}
}
IconvLiteEncoderStream.prototype._flush = function(done) {
IconvLiteEncoderStream.prototype._flush = function (done) {
try {
var res = this.conv.end();
if (res && res.length) this.push(res);
done();
var res = this.conv.end()
if (res && res.length) this.push(res)
done()
} catch (e) {
done(e)
}
catch (e) {
done(e);
}
}
}
IconvLiteEncoderStream.prototype.collect = function(cb) {
var chunks = [];
this.on('error', cb);
this.on('data', function(chunk) { chunks.push(chunk); });
this.on('end', function() {
cb(null, Buffer.concat(chunks));
});
return this;
}
IconvLiteEncoderStream.prototype.collect = function (cb) {
var chunks = []
this.on("error", cb)
this.on("data", function (chunk) { chunks.push(chunk) })
this.on("end", function () {
cb(null, Buffer.concat(chunks))
})
return this
}
// == Decoder stream =======================================================
// == Decoder stream =======================================================
function IconvLiteDecoderStream(conv, options) {
this.conv = conv;
options = options || {};
options.encoding = this.encoding = 'utf8'; // We output strings.
Transform.call(this, options);
}
function IconvLiteDecoderStream (conv, options) {
this.conv = conv
options = options || {}
options.encoding = this.encoding = "utf8" // We output strings.
Transform.call(this, options)
}
IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, {
IconvLiteDecoderStream.prototype = Object.create(Transform.prototype, {
constructor: { value: IconvLiteDecoderStream }
});
})
IconvLiteDecoderStream.prototype._transform = function(chunk, encoding, done) {
if (!Buffer.isBuffer(chunk))
return done(new Error("Iconv decoding stream needs buffers as its input."));
IconvLiteDecoderStream.prototype._transform = function (chunk, encoding, done) {
if (!Buffer.isBuffer(chunk) && !(chunk instanceof Uint8Array)) { return done(new Error("Iconv decoding stream needs buffers as its input.")) }
try {
var res = this.conv.write(chunk);
if (res && res.length) this.push(res, this.encoding);
done();
var res = this.conv.write(chunk)
if (res && res.length) this.push(res, this.encoding)
done()
} catch (e) {
done(e)
}
catch (e) {
done(e);
}
}
}
IconvLiteDecoderStream.prototype._flush = function(done) {
IconvLiteDecoderStream.prototype._flush = function (done) {
try {
var res = this.conv.end();
if (res && res.length) this.push(res, this.encoding);
done();
var res = this.conv.end()
if (res && res.length) this.push(res, this.encoding)
done()
} catch (e) {
done(e)
}
catch (e) {
done(e);
}
}
}
IconvLiteDecoderStream.prototype.collect = function(cb) {
var res = '';
this.on('error', cb);
this.on('data', function(chunk) { res += chunk; });
this.on('end', function() {
cb(null, res);
});
return this;
}
IconvLiteDecoderStream.prototype.collect = function (cb) {
var res = ""
this.on("error", cb)
this.on("data", function (chunk) { res += chunk })
this.on("end", function () {
cb(null, res)
})
return this
}
return {
IconvLiteEncoderStream: IconvLiteEncoderStream,
IconvLiteDecoderStream: IconvLiteDecoderStream
}
}

49
node_modules/iconv-lite/package.json generated vendored
View File

@@ -1,7 +1,7 @@
{
"name": "iconv-lite",
"description": "Convert character encodings in pure javascript.",
"version": "0.4.24",
"version": "0.7.0",
"license": "MIT",
"keywords": [
"iconv",
@@ -12,35 +12,48 @@
"author": "Alexander Shtuchkin <ashtuchkin@gmail.com>",
"main": "./lib/index.js",
"typings": "./lib/index.d.ts",
"homepage": "https://github.com/ashtuchkin/iconv-lite",
"bugs": "https://github.com/ashtuchkin/iconv-lite/issues",
"homepage": "https://github.com/pillarjs/iconv-lite",
"bugs": "https://github.com/pillarjs/iconv-lite/issues",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/express"
},
"repository": {
"type": "git",
"url": "git://github.com/ashtuchkin/iconv-lite.git"
"url": "https://github.com/pillarjs/iconv-lite.git"
},
"engines": {
"node": ">=0.10.0"
},
"scripts": {
"coverage": "istanbul cover _mocha -- --grep .",
"coverage-open": "open coverage/lcov-report/index.html",
"test": "mocha --reporter spec --grep ."
"lint": "eslint",
"lint:fix": "eslint --fix",
"test": "mocha --reporter spec --check-leaks --grep .",
"test:ci": "nyc --exclude test --reporter=lcovonly --reporter=text npm test",
"test:cov": "nyc --exclude test --reporter=html --reporter=text npm test",
"test:performance": "node --allow-natives-syntax performance/index.js",
"test:tap": "mocha --reporter tap --check-leaks --grep .",
"test:webpack": "npm pack && mv iconv-lite-*.tgz test/webpack/iconv-lite.tgz && cd test/webpack && npm install && npm run test && rm iconv-lite.tgz"
},
"browser": {
"./lib/extend-node": false,
"./lib/streams": false
"stream": false
},
"devDependencies": {
"mocha": "^3.1.0",
"request": "~2.87.0",
"unorm": "*",
"errto": "*",
"async": "*",
"istanbul": "*",
"semver": "*",
"iconv": "*"
"@stylistic/eslint-plugin": "^5.1.0",
"@stylistic/eslint-plugin-js": "^4.1.0",
"async": "^3.2.0",
"bench-node": "^0.10.0",
"eslint": "^9.0.0",
"errto": "^0.2.1",
"iconv": "^2.3.5",
"mocha": "^6.2.2",
"neostandard": "^0.12.0",
"nyc": "^14.1.1",
"request": "^2.88.2",
"semver": "^6.3.0",
"unorm": "^1.6.0"
},
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3"
"safer-buffer": ">= 2.1.2 < 3.0.0"
}
}