2021-11-30 03:28:30 +08:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2024-03-04 01:15:23 +08:00
|
|
|
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
import logging
|
2024-03-04 01:15:23 +08:00
|
|
|
import os
|
2021-11-30 03:28:30 +08:00
|
|
|
import sys
|
2022-05-01 20:00:20 +08:00
|
|
|
from typing import List, Optional
|
|
|
|
|
|
|
|
import requests
|
2021-11-30 03:28:30 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
from deep_translator.base import BaseTranslator
|
2024-03-04 01:15:23 +08:00
|
|
|
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR
|
|
|
|
from deep_translator.exceptions import ApiKeyException, MicrosoftAPIerror
|
2022-05-01 20:00:20 +08:00
|
|
|
from deep_translator.validate import is_input_valid
|
2021-11-30 03:28:30 +08:00
|
|
|
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
class MicrosoftTranslator(BaseTranslator):
|
2021-11-30 03:28:30 +08:00
|
|
|
"""
|
|
|
|
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
|
|
|
|
"""
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
source: str = "auto",
|
|
|
|
target: str = "en",
|
2024-03-04 01:15:23 +08:00
|
|
|
api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None),
|
|
|
|
region: Optional[str] = None,
|
2022-05-01 20:00:20 +08:00
|
|
|
proxies: Optional[dict] = None,
|
|
|
|
**kwargs,
|
|
|
|
):
|
2021-11-30 03:28:30 +08:00
|
|
|
"""
|
|
|
|
@params api_key and target are the required params
|
|
|
|
@param api_key: your Microsoft API key
|
|
|
|
@param region: your Microsoft Location
|
|
|
|
"""
|
2022-05-01 20:00:20 +08:00
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
if not api_key:
|
2024-03-04 01:15:23 +08:00
|
|
|
raise ApiKeyException(env_var=MSFT_ENV_VAR)
|
2021-11-30 03:28:30 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
self.api_key = api_key
|
2021-11-30 03:28:30 +08:00
|
|
|
self.proxies = proxies
|
|
|
|
self.headers = {
|
|
|
|
"Ocp-Apim-Subscription-Key": self.api_key,
|
|
|
|
"Content-type": "application/json",
|
|
|
|
}
|
2024-03-04 01:15:23 +08:00
|
|
|
# parameter region is not required but very common and goes to headers if passed
|
2021-11-30 03:28:30 +08:00
|
|
|
if region:
|
|
|
|
self.region = region
|
|
|
|
self.headers["Ocp-Apim-Subscription-Region"] = self.region
|
2022-05-01 20:00:20 +08:00
|
|
|
super().__init__(
|
|
|
|
base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"),
|
|
|
|
source=source,
|
|
|
|
target=target,
|
|
|
|
languages=self._get_supported_languages(),
|
|
|
|
**kwargs,
|
|
|
|
)
|
|
|
|
|
|
|
|
# this function get the actual supported languages of the msft translator and store them in a dict, where
|
|
|
|
# the keys are the abbreviations and the values are the languages
|
|
|
|
# a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES
|
|
|
|
def _get_supported_languages(self):
|
2022-11-08 02:06:49 +08:00
|
|
|
microsoft_languages_api_url = (
|
|
|
|
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope"
|
|
|
|
"=translation "
|
|
|
|
)
|
2024-03-04 01:15:23 +08:00
|
|
|
microsoft_languages_response = requests.get(
|
|
|
|
microsoft_languages_api_url
|
|
|
|
)
|
2022-05-01 20:00:20 +08:00
|
|
|
translation_dict = microsoft_languages_response.json()["translation"]
|
|
|
|
|
2022-11-08 02:06:49 +08:00
|
|
|
return {
|
|
|
|
translation_dict[k]["name"].lower(): k.lower()
|
|
|
|
for k in translation_dict.keys()
|
|
|
|
}
|
2022-05-01 20:00:20 +08:00
|
|
|
|
|
|
|
def translate(self, text: str, **kwargs) -> str:
|
2021-11-30 03:28:30 +08:00
|
|
|
"""
|
|
|
|
function that uses microsoft translate to translate a text
|
|
|
|
@param text: desired text to translate
|
|
|
|
@return: str: translated text
|
|
|
|
"""
|
|
|
|
# a body must be a list of dicts to process multiple texts;
|
|
|
|
# I have not added multiple text processing here since it is covered by the translate_batch method
|
2022-05-01 20:00:20 +08:00
|
|
|
response = None
|
|
|
|
if is_input_valid(text):
|
|
|
|
self._url_params["from"] = self._source
|
|
|
|
self._url_params["to"] = self._target
|
|
|
|
|
|
|
|
valid_microsoft_json = [{"text": text}]
|
|
|
|
try:
|
|
|
|
response = requests.post(
|
|
|
|
self._base_url,
|
|
|
|
params=self._url_params,
|
|
|
|
headers=self.headers,
|
|
|
|
json=valid_microsoft_json,
|
|
|
|
proxies=self.proxies,
|
|
|
|
)
|
|
|
|
except requests.exceptions.RequestException:
|
|
|
|
exc_type, value, traceback = sys.exc_info()
|
|
|
|
logging.warning(f"Returned error: {exc_type.__name__}")
|
|
|
|
|
|
|
|
# Where Microsoft API responds with an api error, it returns a dict in response.json()
|
|
|
|
if type(response.json()) is dict:
|
|
|
|
error_message = response.json()["error"]
|
|
|
|
raise MicrosoftAPIerror(error_message)
|
|
|
|
# Where it responds with a translation, its response.json() is a list
|
|
|
|
# e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}]
|
|
|
|
elif type(response.json()) is list:
|
|
|
|
all_translations = [
|
|
|
|
i["text"] for i in response.json()[0]["translations"]
|
|
|
|
]
|
|
|
|
return "\n".join(all_translations)
|
|
|
|
|
|
|
|
def translate_file(self, path: str, **kwargs) -> str:
|
2021-11-30 03:28:30 +08:00
|
|
|
"""
|
|
|
|
translate from a file
|
|
|
|
@param path: path to file
|
|
|
|
@return: translated text
|
|
|
|
"""
|
2022-05-01 20:00:20 +08:00
|
|
|
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
|
2021-11-30 03:28:30 +08:00
|
|
|
"""
|
|
|
|
translate a batch of texts
|
|
|
|
@param batch: list of texts to translate
|
|
|
|
@return: list of translations
|
|
|
|
"""
|
2022-05-01 20:00:20 +08:00
|
|
|
return self._translate_batch(batch, **kwargs)
|