172 lines
5.5 KiB
JavaScript
172 lines
5.5 KiB
JavaScript
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||
|
|
||
|
// The WebIDL binder places static methods on the prototype, rather than
|
||
|
// on the constructor, which is a bit clumsy, and is definitely not
|
||
|
// idiomatic.
|
||
|
LanguageInfo.detectLanguage = LanguageInfo.prototype.detectLanguage;
|
||
|
|
||
|
// Closure is overzealous in its function call optimization, and tries
|
||
|
// to turn these singleton methods into unbound function calls.
|
||
|
ensureCache.alloc = ensureCache.alloc.bind(ensureCache);
|
||
|
ensureCache.prepare = ensureCache.prepare.bind(ensureCache);
|
||
|
|
||
|
// From public/encodings.h. Unfortunately, the WebIDL binder doesn't
|
||
|
// allow us to define or automatically derive these in the IDL.
|
||
|
var Encodings = {
|
||
|
'ISO_8859_1' : 0,
|
||
|
'ISO_8859_2' : 1,
|
||
|
'ISO_8859_3' : 2,
|
||
|
'ISO_8859_4' : 3,
|
||
|
'ISO_8859_5' : 4,
|
||
|
'ISO_8859_6' : 5,
|
||
|
'ISO_8859_7' : 6,
|
||
|
'ISO_8859_8' : 7,
|
||
|
'ISO_8859_9' : 8,
|
||
|
'ISO_8859_10' : 9,
|
||
|
'JAPANESE_EUC_JP' : 10,
|
||
|
'EUC_JP' : 10,
|
||
|
'JAPANESE_SHIFT_JIS' : 11,
|
||
|
'SHIFT_JIS' : 11,
|
||
|
'JAPANESE_JIS' : 12,
|
||
|
'JIS' : 12,
|
||
|
'CHINESE_BIG5' : 13,
|
||
|
'BIG5' : 13,
|
||
|
'CHINESE_GB' : 14,
|
||
|
'CHINESE_EUC_CN' : 15,
|
||
|
'EUC_CN' : 15,
|
||
|
'KOREAN_EUC_KR' : 16,
|
||
|
'EUC_KR' : 16,
|
||
|
'UNICODE_UNUSED' : 17,
|
||
|
'CHINESE_EUC_DEC' : 18,
|
||
|
'EUC_DEC' : 18,
|
||
|
'CHINESE_CNS' : 19,
|
||
|
'CNS' : 19,
|
||
|
'CHINESE_BIG5_CP950' : 20,
|
||
|
'BIG5_CP950' : 20,
|
||
|
'JAPANESE_CP932' : 21,
|
||
|
'CP932' : 21,
|
||
|
'UTF8' : 22,
|
||
|
'UNKNOWN_ENCODING' : 23,
|
||
|
'ASCII_7BIT' : 24,
|
||
|
'RUSSIAN_KOI8_R' : 25,
|
||
|
'KOI8_R' : 25,
|
||
|
'RUSSIAN_CP1251' : 26,
|
||
|
'CP1251' : 26,
|
||
|
'MSFT_CP1252' : 27,
|
||
|
'CP1252' : 27,
|
||
|
'RUSSIAN_KOI8_RU' : 28,
|
||
|
'KOI8_RU' : 28,
|
||
|
'MSFT_CP1250' : 29,
|
||
|
'CP1250' : 29,
|
||
|
'ISO_8859_15' : 30,
|
||
|
'MSFT_CP1254' : 31,
|
||
|
'CP1254' : 31,
|
||
|
'MSFT_CP1257' : 32,
|
||
|
'CP1257' : 32,
|
||
|
'ISO_8859_11' : 33,
|
||
|
'MSFT_CP874' : 34,
|
||
|
'CP874' : 34,
|
||
|
'MSFT_CP1256' : 35,
|
||
|
'CP1256' : 35,
|
||
|
'MSFT_CP1255' : 36,
|
||
|
'CP1255' : 36,
|
||
|
'ISO_8859_8_I' : 37,
|
||
|
'HEBREW_VISUAL' : 38,
|
||
|
'CZECH_CP852' : 39,
|
||
|
'CP852' : 39,
|
||
|
'CZECH_CSN_369103' : 40,
|
||
|
'CSN_369103' : 40,
|
||
|
'MSFT_CP1253' : 41,
|
||
|
'CP1253' : 41,
|
||
|
'RUSSIAN_CP866' : 42,
|
||
|
'CP866' : 42,
|
||
|
'ISO_8859_13' : 43,
|
||
|
'ISO_2022_KR' : 44,
|
||
|
'GBK' : 45,
|
||
|
'GB18030' : 46,
|
||
|
'BIG5_HKSCS' : 47,
|
||
|
'ISO_2022_CN' : 48,
|
||
|
'TSCII' : 49,
|
||
|
'TAMIL_MONO' : 50,
|
||
|
'TAMIL_BI' : 51,
|
||
|
'JAGRAN' : 52,
|
||
|
'MACINTOSH_ROMAN' : 53,
|
||
|
'UTF7' : 54,
|
||
|
'BHASKAR' : 55,
|
||
|
'HTCHANAKYA' : 56,
|
||
|
'UTF16BE' : 57,
|
||
|
'UTF16LE' : 58,
|
||
|
'UTF32BE' : 59,
|
||
|
'UTF32LE' : 60,
|
||
|
'BINARYENC' : 61,
|
||
|
'HZ_GB_2312' : 62,
|
||
|
'UTF8UTF8' : 63,
|
||
|
'TAM_ELANGO' : 64,
|
||
|
'TAM_LTTMBARANI' : 65,
|
||
|
'TAM_SHREE' : 66,
|
||
|
'TAM_TBOOMIS' : 67,
|
||
|
'TAM_TMNEWS' : 68,
|
||
|
'TAM_WEBTAMIL' : 69,
|
||
|
'KDDI_SHIFT_JIS' : 70,
|
||
|
'DOCOMO_SHIFT_JIS' : 71,
|
||
|
'SOFTBANK_SHIFT_JIS' : 72,
|
||
|
'KDDI_ISO_2022_JP' : 73,
|
||
|
'ISO_2022_JP' : 73,
|
||
|
'SOFTBANK_ISO_2022_JP' : 74,
|
||
|
};
|
||
|
|
||
|
// Accept forms both with and without underscores/hypens.
|
||
|
for (let code of Object.keys(Encodings)) {
|
||
|
if (code['includes']("_"))
|
||
|
Encodings[code.replace(/_/g, "")] = Encodings[code];
|
||
|
}
|
||
|
|
||
|
addOnPreMain(function() {
|
||
|
|
||
|
onmessage = function(aMsg){
|
||
|
let data = aMsg['data'];
|
||
|
|
||
|
let langInfo;
|
||
|
if (data['tld'] == undefined && data['encoding'] == undefined && data['language'] == undefined) {
|
||
|
langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML']);
|
||
|
} else {
|
||
|
// Do our best to find the given encoding in the encodings table.
|
||
|
// Otherwise, just fall back to unknown.
|
||
|
let enc = String(data['encoding']).toUpperCase().replace(/[_-]/g, "");
|
||
|
|
||
|
let encoding;
|
||
|
if (Encodings.hasOwnProperty(enc))
|
||
|
encoding = Encodings[enc];
|
||
|
else
|
||
|
encoding = Encodings['UNKNOWN_ENCODING'];
|
||
|
|
||
|
langInfo = LanguageInfo.detectLanguage(data['text'], !data['isHTML'],
|
||
|
data['tld'] || null,
|
||
|
encoding,
|
||
|
data['language'] || null);
|
||
|
}
|
||
|
|
||
|
postMessage({
|
||
|
'language': langInfo.getLanguageCode(),
|
||
|
'confident': langInfo.getIsReliable(),
|
||
|
|
||
|
'languages': new Array(3).fill(0).map((_, index) => {
|
||
|
let lang = langInfo.get_languages(index);
|
||
|
return {
|
||
|
'languageCode': lang.getLanguageCode(),
|
||
|
'percent': lang.getPercent(),
|
||
|
};
|
||
|
}).filter(lang => {
|
||
|
// Ignore empty results.
|
||
|
return lang['languageCode'] != "un" || lang['percent'] > 0;
|
||
|
}),
|
||
|
});
|
||
|
|
||
|
Module.destroy(langInfo);
|
||
|
};
|
||
|
|
||
|
postMessage("ready");
|
||
|
});
|