2024-03-04 01:15:23 +08:00
|
|
|
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
|
|
|
|
|
|
|
|
import os
|
2022-05-01 20:00:20 +08:00
|
|
|
from typing import List, Optional
|
2021-02-03 05:05:09 +08:00
|
|
|
|
|
|
|
import requests
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
from deep_translator.base import BaseTranslator
|
2024-03-04 01:15:23 +08:00
|
|
|
from deep_translator.constants import (
|
|
|
|
BASE_URLS,
|
|
|
|
QCRI_ENV_VAR,
|
|
|
|
QCRI_LANGUAGE_TO_CODE,
|
|
|
|
)
|
|
|
|
from deep_translator.exceptions import (
|
|
|
|
ApiKeyException,
|
|
|
|
ServerException,
|
|
|
|
TranslationNotFound,
|
|
|
|
)
|
|
|
|
from deep_translator.validate import request_failed
|
2022-01-24 12:07:52 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
|
|
|
|
class QcriTranslator(BaseTranslator):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
|
|
|
"""
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
source: str = "en",
|
|
|
|
target: str = "en",
|
2024-03-04 01:15:23 +08:00
|
|
|
api_key: Optional[str] = os.getenv(QCRI_ENV_VAR, None),
|
2022-05-01 20:00:20 +08:00
|
|
|
**kwargs,
|
|
|
|
):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
2024-03-04 01:15:23 +08:00
|
|
|
@param api_key: your qrci api key.
|
|
|
|
Get one for free here https://mt.qcri.org/api/v1/ref
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
|
|
|
|
if not api_key:
|
2024-03-04 01:15:23 +08:00
|
|
|
raise ApiKeyException(QCRI_ENV_VAR)
|
|
|
|
|
2021-02-03 05:05:09 +08:00
|
|
|
self.api_key = api_key
|
|
|
|
self.api_endpoints = {
|
|
|
|
"get_languages": "getLanguagePairs",
|
|
|
|
"get_domains": "getDomains",
|
|
|
|
"translate": "translate",
|
|
|
|
}
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
self.params = {"key": self.api_key}
|
|
|
|
super().__init__(
|
|
|
|
base_url=BASE_URLS.get("QCRI"),
|
|
|
|
source=source,
|
|
|
|
target=target,
|
|
|
|
languages=QCRI_LANGUAGE_TO_CODE,
|
|
|
|
**kwargs,
|
|
|
|
)
|
2021-02-03 05:05:09 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def _get(
|
2024-03-04 01:15:23 +08:00
|
|
|
self,
|
|
|
|
endpoint: str,
|
|
|
|
params: Optional[dict] = None,
|
|
|
|
return_text: bool = True,
|
2022-05-01 20:00:20 +08:00
|
|
|
):
|
2021-02-03 05:05:09 +08:00
|
|
|
if not params:
|
|
|
|
params = self.params
|
|
|
|
try:
|
2022-05-01 20:00:20 +08:00
|
|
|
res = requests.get(
|
|
|
|
self._base_url.format(endpoint=self.api_endpoints[endpoint]),
|
|
|
|
params=params,
|
|
|
|
)
|
2021-02-03 05:05:09 +08:00
|
|
|
return res.text if return_text else res
|
|
|
|
except Exception as e:
|
|
|
|
raise e
|
|
|
|
|
|
|
|
@property
|
|
|
|
def languages(self):
|
|
|
|
return self.get_supported_languages()
|
|
|
|
|
|
|
|
def get_domains(self):
|
|
|
|
domains = self._get("get_domains")
|
|
|
|
return domains
|
|
|
|
|
|
|
|
@property
|
|
|
|
def domains(self):
|
|
|
|
return self.get_domains()
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def translate(self, text: str, **kwargs) -> str:
|
2021-02-03 05:05:09 +08:00
|
|
|
params = {
|
|
|
|
"key": self.api_key,
|
2022-05-01 20:00:20 +08:00
|
|
|
"langpair": f"{self._source}-{self._target}",
|
|
|
|
"domain": kwargs["domain"],
|
|
|
|
"text": text,
|
2021-02-03 05:05:09 +08:00
|
|
|
}
|
|
|
|
try:
|
|
|
|
response = self._get("translate", params=params, return_text=False)
|
|
|
|
except ConnectionError:
|
|
|
|
raise ServerException(503)
|
|
|
|
|
|
|
|
else:
|
2024-03-04 01:15:23 +08:00
|
|
|
if request_failed(status_code=response.status_code):
|
2021-02-03 05:05:09 +08:00
|
|
|
ServerException(response.status_code)
|
|
|
|
else:
|
|
|
|
res = response.json()
|
2021-11-30 03:28:30 +08:00
|
|
|
translation = res.get("translatedText")
|
2021-02-03 05:05:09 +08:00
|
|
|
if not translation:
|
|
|
|
raise TranslationNotFound(text)
|
|
|
|
return translation
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def translate_file(self, path: str, **kwargs) -> str:
|
|
|
|
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
translate a batch of texts
|
2021-11-30 03:28:30 +08:00
|
|
|
@domain: domain
|
2021-02-03 05:05:09 +08:00
|
|
|
@param batch: list of texts to translate
|
|
|
|
@return: list of translations
|
|
|
|
"""
|
2022-05-01 20:00:20 +08:00
|
|
|
return self._translate_batch(batch, **kwargs)
|