2018-11-01 00:08:29 +08:00
|
|
|
# coding=utf-8
|
2019-09-17 10:04:27 +08:00
|
|
|
from __future__ import absolute_import
|
2018-11-28 18:44:59 +08:00
|
|
|
import types
|
2019-10-06 04:55:35 +08:00
|
|
|
import re
|
2018-11-01 00:08:29 +08:00
|
|
|
|
2018-11-28 18:44:59 +08:00
|
|
|
from babelfish.exceptions import LanguageError
|
2019-12-24 07:25:02 +08:00
|
|
|
from babelfish import Language as Language_, basestr, LANGUAGE_MATRIX
|
2019-09-17 10:04:27 +08:00
|
|
|
from six.moves import zip
|
2018-11-01 00:08:29 +08:00
|
|
|
|
|
|
|
repl_map = {
|
|
|
|
"dk": "da",
|
|
|
|
"nld": "nl",
|
|
|
|
"english": "en",
|
2019-01-29 11:11:01 +08:00
|
|
|
"alb": "sq",
|
|
|
|
"arm": "hy",
|
|
|
|
"baq": "eu",
|
|
|
|
"bur": "my",
|
|
|
|
"chi": "zh",
|
|
|
|
"cze": "cs",
|
|
|
|
"dut": "nl",
|
|
|
|
"fre": "fr",
|
|
|
|
"geo": "ka",
|
|
|
|
"ger": "de",
|
|
|
|
"gre": "el",
|
|
|
|
"ice": "is",
|
|
|
|
"mac": "mk",
|
|
|
|
"mao": "mi",
|
|
|
|
"may": "ms",
|
|
|
|
"per": "fa",
|
|
|
|
"rum": "ro",
|
|
|
|
"slo": "sk",
|
|
|
|
"tib": "bo",
|
2018-11-01 00:08:29 +08:00
|
|
|
}
|
|
|
|
|
2021-06-06 21:57:29 +08:00
|
|
|
CUSTOM_LIST = ["chs", "sc", "zhs", "hans", "gb", u"简", u"双语",
|
|
|
|
"cht", "tc", "zht", "hant", "big5", u"繁", u"雙語",
|
|
|
|
"spl", "ea", "pob", "pb"]
|
|
|
|
|
2019-12-24 10:01:18 +08:00
|
|
|
ALPHA2_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha2, LANGUAGE_MATRIX)))) + list(repl_map.values())
|
|
|
|
ALPHA3b_LIST = list(set(filter(lambda x: x, map(lambda x: x.alpha3, LANGUAGE_MATRIX)))) + \
|
|
|
|
list(set(filter(lambda x: len(x) == 3, list(repl_map.keys()))))
|
2019-12-24 07:25:02 +08:00
|
|
|
FULL_LANGUAGE_LIST = ALPHA2_LIST + ALPHA3b_LIST
|
2021-06-06 21:57:29 +08:00
|
|
|
FULL_LANGUAGE_LIST.extend(CUSTOM_LIST)
|
2019-12-24 07:25:02 +08:00
|
|
|
|
|
|
|
|
2018-11-01 00:08:29 +08:00
|
|
|
def language_from_stream(l):
|
|
|
|
if not l:
|
|
|
|
raise LanguageError()
|
|
|
|
for method in ("fromietf", "fromalpha3t", "fromalpha3b"):
|
|
|
|
try:
|
|
|
|
return getattr(Language, method)(l)
|
|
|
|
except (LanguageError, ValueError):
|
|
|
|
pass
|
|
|
|
raise LanguageError()
|
|
|
|
|
|
|
|
|
|
|
|
def wrap_forced(f):
|
|
|
|
def inner(*args, **kwargs):
|
|
|
|
"""
|
|
|
|
classmethod wrapper
|
|
|
|
:param args: args[0] = cls
|
|
|
|
:param kwargs:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
args = list(args)
|
|
|
|
cls = args[0]
|
|
|
|
args = args[1:]
|
|
|
|
s = args.pop(0)
|
2018-11-28 18:44:59 +08:00
|
|
|
forced = None
|
2020-09-11 02:26:37 +08:00
|
|
|
hi = None
|
2019-09-17 10:04:27 +08:00
|
|
|
if isinstance(s, (str,)):
|
2018-11-28 18:44:59 +08:00
|
|
|
base, forced = s.split(":") if ":" in s else (s, False)
|
|
|
|
else:
|
|
|
|
base = s
|
|
|
|
|
2018-11-01 00:08:29 +08:00
|
|
|
instance = f(cls, base, *args, **kwargs)
|
|
|
|
if isinstance(instance, Language):
|
|
|
|
instance.forced = forced == "forced"
|
2020-09-11 02:26:37 +08:00
|
|
|
instance.hi = hi == "hi"
|
2018-11-01 00:08:29 +08:00
|
|
|
return instance
|
|
|
|
|
|
|
|
return inner
|
|
|
|
|
|
|
|
|
|
|
|
class Language(Language_):
|
|
|
|
forced = False
|
2020-09-11 02:26:37 +08:00
|
|
|
hi = False
|
2018-11-01 00:08:29 +08:00
|
|
|
|
2020-09-11 02:26:37 +08:00
|
|
|
def __init__(self, language, country=None, script=None, unknown=None, forced=False, hi=False):
|
2018-11-01 00:08:29 +08:00
|
|
|
self.forced = forced
|
2020-09-11 02:26:37 +08:00
|
|
|
self.hi = hi
|
2018-11-01 00:08:29 +08:00
|
|
|
super(Language, self).__init__(language, country=country, script=script, unknown=unknown)
|
|
|
|
|
|
|
|
def __getstate__(self):
|
2020-09-11 02:26:37 +08:00
|
|
|
return self.alpha3, self.country, self.script, self.hi, self.forced
|
2018-11-01 00:08:29 +08:00
|
|
|
|
2020-09-11 02:26:37 +08:00
|
|
|
def __setstate__(self, forced):
|
|
|
|
self.alpha3, self.country, self.script, self.hi, self.forced = forced
|
2018-11-01 00:08:29 +08:00
|
|
|
|
2019-09-18 23:30:46 +08:00
|
|
|
def __hash__(self):
|
|
|
|
return hash(str(self))
|
|
|
|
|
2018-11-01 00:08:29 +08:00
|
|
|
def __eq__(self, other):
|
|
|
|
if isinstance(other, basestr):
|
|
|
|
return str(self) == other
|
|
|
|
if not isinstance(other, Language):
|
|
|
|
return False
|
|
|
|
return (self.alpha3 == other.alpha3 and
|
|
|
|
self.country == other.country and
|
|
|
|
self.script == other.script and
|
2020-09-11 02:26:37 +08:00
|
|
|
bool(self.forced) == bool(other.forced) and
|
|
|
|
bool(self.hi) == bool(other.hi))
|
2018-11-01 00:08:29 +08:00
|
|
|
|
|
|
|
def __str__(self):
|
2023-06-23 21:52:55 +08:00
|
|
|
return super(Language, self).__str__() + (":forced" if self.forced else "")
|
2018-11-01 00:08:29 +08:00
|
|
|
|
2023-06-24 01:21:04 +08:00
|
|
|
def __repr__(self):
|
|
|
|
info = ";".join(f"{k}={v}" for k, v in vars(self).items() if v)
|
|
|
|
return f"<{self.__class__.__name__}: {info}>"
|
|
|
|
|
2018-11-01 00:08:29 +08:00
|
|
|
@property
|
|
|
|
def basename(self):
|
|
|
|
return super(Language, self).__str__()
|
|
|
|
|
|
|
|
def __getattr__(self, name):
|
|
|
|
ret = super(Language, self).__getattr__(name)
|
|
|
|
if isinstance(ret, Language):
|
|
|
|
ret.forced = self.forced
|
2020-09-11 02:26:37 +08:00
|
|
|
ret.hi = self.hi
|
2018-11-01 00:08:29 +08:00
|
|
|
return ret
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def rebuild(cls, instance, **replkw):
|
|
|
|
state = instance.__getstate__()
|
2020-09-11 02:26:37 +08:00
|
|
|
attrs = ("country", "script", "hi", "forced")
|
2018-11-01 00:08:29 +08:00
|
|
|
language = state[0]
|
2019-09-17 10:04:27 +08:00
|
|
|
kwa = dict(list(zip(attrs, state[1:])))
|
2018-11-01 00:08:29 +08:00
|
|
|
kwa.update(replkw)
|
|
|
|
return cls(language, **kwa)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@wrap_forced
|
|
|
|
def fromcode(cls, code, converter):
|
|
|
|
return Language(*Language_.fromcode(code, converter).__getstate__())
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@wrap_forced
|
|
|
|
def fromietf(cls, ietf):
|
|
|
|
ietf_lower = ietf.lower()
|
|
|
|
if ietf_lower in repl_map:
|
|
|
|
ietf = repl_map[ietf_lower]
|
|
|
|
|
|
|
|
return Language(*Language_.fromietf(ietf).__getstate__())
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@wrap_forced
|
|
|
|
def fromalpha3b(cls, s):
|
|
|
|
if s in repl_map:
|
|
|
|
s = repl_map[s]
|
|
|
|
return Language(*Language_.fromietf(s).__getstate__())
|
|
|
|
|
|
|
|
return Language(*Language_.fromalpha3b(s).__getstate__())
|
2019-10-06 04:55:35 +08:00
|
|
|
|
|
|
|
|
|
|
|
IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
|
|
|
|
ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
|
|
|
|
|
|
|
|
|
|
|
|
def match_ietf_language(s, ietf=False):
|
|
|
|
language_match = re.match(".+\.([^\.]+)$" if not ietf
|
|
|
|
else IETF_MATCH, s)
|
|
|
|
if language_match and len(language_match.groups()) == 1:
|
|
|
|
language = language_match.groups()[0]
|
|
|
|
return language
|
|
|
|
return s
|