2021-02-03 05:05:09 +08:00
|
|
|
|
|
|
|
import requests
|
2021-11-30 03:28:30 +08:00
|
|
|
from .constants import BASE_URLS, QCRI_LANGUAGE_TO_CODE
|
|
|
|
from .exceptions import (ServerException, TranslationNotFound)
|
2021-02-03 05:05:09 +08:00
|
|
|
|
|
|
|
class QCRI(object):
|
|
|
|
"""
|
|
|
|
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
|
|
|
"""
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
def __init__(self, api_key=None, source="en", target="en", **kwargs):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
@param api_key: your qrci api key. Get one for free here https://mt.qcri.org/api/v1/ref
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not api_key:
|
|
|
|
raise ServerException(401)
|
|
|
|
self.__base_url = BASE_URLS.get("QCRI")
|
2021-11-30 03:28:30 +08:00
|
|
|
self.source = source
|
|
|
|
self.target = target
|
2021-02-03 05:05:09 +08:00
|
|
|
self.api_key = api_key
|
|
|
|
self.api_endpoints = {
|
|
|
|
"get_languages": "getLanguagePairs",
|
|
|
|
"get_domains": "getDomains",
|
|
|
|
"translate": "translate",
|
|
|
|
}
|
|
|
|
|
|
|
|
self.params = {
|
|
|
|
"key": self.api_key
|
|
|
|
}
|
|
|
|
|
|
|
|
def _get(self, endpoint, params=None, return_text=True):
|
|
|
|
if not params:
|
|
|
|
params = self.params
|
|
|
|
try:
|
|
|
|
res = requests.get(self.__base_url.format(endpoint=self.api_endpoints[endpoint]), params=params)
|
|
|
|
return res.text if return_text else res
|
|
|
|
except Exception as e:
|
|
|
|
raise e
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
@staticmethod
|
|
|
|
def get_supported_languages(as_dict=False, **kwargs):
|
|
|
|
# Have no use for this as the format is not what we need
|
|
|
|
# Save this for whenever
|
|
|
|
# pairs = self._get("get_languages")
|
|
|
|
# Using a this one instead
|
|
|
|
return [*QCRI_LANGUAGE_TO_CODE.keys()] if not as_dict else QCRI_LANGUAGE_TO_CODE
|
2021-02-03 05:05:09 +08:00
|
|
|
|
|
|
|
@property
|
|
|
|
def languages(self):
|
|
|
|
return self.get_supported_languages()
|
|
|
|
|
|
|
|
def get_domains(self):
|
|
|
|
domains = self._get("get_domains")
|
|
|
|
return domains
|
|
|
|
|
|
|
|
@property
|
|
|
|
def domains(self):
|
|
|
|
return self.get_domains()
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
def translate(self, text, domain, **kwargs):
|
2021-02-03 05:05:09 +08:00
|
|
|
params = {
|
|
|
|
"key": self.api_key,
|
2021-11-30 03:28:30 +08:00
|
|
|
"langpair": "{}-{}".format(self.source, self.target),
|
2021-02-03 05:05:09 +08:00
|
|
|
"domain": domain,
|
|
|
|
"text": text
|
|
|
|
}
|
|
|
|
try:
|
|
|
|
response = self._get("translate", params=params, return_text=False)
|
|
|
|
except ConnectionError:
|
|
|
|
raise ServerException(503)
|
|
|
|
|
|
|
|
else:
|
|
|
|
if response.status_code != 200:
|
|
|
|
ServerException(response.status_code)
|
|
|
|
else:
|
|
|
|
res = response.json()
|
2021-11-30 03:28:30 +08:00
|
|
|
translation = res.get("translatedText")
|
2021-02-03 05:05:09 +08:00
|
|
|
if not translation:
|
|
|
|
raise TranslationNotFound(text)
|
|
|
|
return translation
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
def translate_batch(self, batch, domain, **kwargs):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
translate a batch of texts
|
2021-11-30 03:28:30 +08:00
|
|
|
@domain: domain
|
2021-02-03 05:05:09 +08:00
|
|
|
@param batch: list of texts to translate
|
|
|
|
@return: list of translations
|
|
|
|
"""
|
2021-11-30 03:28:30 +08:00
|
|
|
return [self.translate(domain, text, **kwargs) for text in batch]
|
2021-02-03 05:05:09 +08:00
|
|
|
|