2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
language detection API
|
|
|
|
"""
|
2022-05-01 20:00:20 +08:00
|
|
|
from typing import List, Optional, Union
|
|
|
|
|
2021-02-03 05:05:09 +08:00
|
|
|
import requests
|
|
|
|
from requests.exceptions import HTTPError
|
|
|
|
|
2021-11-30 03:28:30 +08:00
|
|
|
# Module global config
|
2022-05-01 20:00:20 +08:00
|
|
|
config = {
|
|
|
|
"url": "https://ws.detectlanguage.com/0.2/detect",
|
|
|
|
"headers": {
|
|
|
|
"User-Agent": "Detect Language API Python Client 1.4.0",
|
|
|
|
"Authorization": "Bearer {}",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2021-02-03 05:05:09 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def get_request_body(text: Union[str, List[str]], api_key: str, *args, **kwargs):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
send a request and return the response body parsed as dictionary
|
|
|
|
|
|
|
|
@param text: target text that you want to detect its language
|
|
|
|
@type text: str
|
|
|
|
@type api_key: str
|
|
|
|
@param api_key: your private API key
|
|
|
|
|
|
|
|
"""
|
|
|
|
if not api_key:
|
2022-05-01 20:00:20 +08:00
|
|
|
raise Exception(
|
|
|
|
"you need to get an API_KEY for this to work. "
|
|
|
|
"Get one for free here: https://detectlanguage.com/documentation"
|
|
|
|
)
|
2021-02-03 05:05:09 +08:00
|
|
|
if not text:
|
|
|
|
raise Exception("Please provide an input text")
|
|
|
|
|
|
|
|
else:
|
|
|
|
try:
|
2022-05-01 20:00:20 +08:00
|
|
|
headers = config["headers"]
|
|
|
|
headers["Authorization"] = headers["Authorization"].format(api_key)
|
|
|
|
response = requests.post(config["url"], json={"q": text}, headers=headers)
|
2021-02-03 05:05:09 +08:00
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
body = response.json().get("data")
|
2021-02-03 05:05:09 +08:00
|
|
|
return body
|
|
|
|
|
|
|
|
except HTTPError as e:
|
|
|
|
print("Error occured while requesting from server: ", e.args)
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def single_detection(
|
|
|
|
text: str, api_key: Optional[str] = None, detailed: bool = False, *args, **kwargs
|
|
|
|
):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
function responsible for detecting the language from a text
|
|
|
|
|
|
|
|
@param text: target text that you want to detect its language
|
|
|
|
@type text: str
|
|
|
|
@type api_key: str
|
|
|
|
@param api_key: your private API key
|
|
|
|
@param detailed: set to True if you want to get detailed information about the detection process
|
|
|
|
"""
|
|
|
|
body = get_request_body(text, api_key)
|
2022-05-01 20:00:20 +08:00
|
|
|
detections = body.get("detections")
|
2021-02-03 05:05:09 +08:00
|
|
|
if detailed:
|
|
|
|
return detections[0]
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
lang = detections[0].get("language", None)
|
2021-02-03 05:05:09 +08:00
|
|
|
if lang:
|
|
|
|
return lang
|
|
|
|
|
|
|
|
|
2022-05-01 20:00:20 +08:00
|
|
|
def batch_detection(
|
|
|
|
text_list: List[str], api_key: str, detailed: bool = False, *args, **kwargs
|
|
|
|
):
|
2021-02-03 05:05:09 +08:00
|
|
|
"""
|
|
|
|
function responsible for detecting the language from a text
|
|
|
|
|
|
|
|
@param text_list: target batch that you want to detect its language
|
|
|
|
@param api_key: your private API key
|
|
|
|
@param detailed: set to True if you want to get detailed information about the detection process
|
|
|
|
"""
|
|
|
|
body = get_request_body(text_list, api_key)
|
2022-05-01 20:00:20 +08:00
|
|
|
detections = body.get("detections")
|
2021-02-03 05:05:09 +08:00
|
|
|
res = [obj[0] for obj in detections]
|
|
|
|
if detailed:
|
|
|
|
return res
|
|
|
|
else:
|
2022-05-01 20:00:20 +08:00
|
|
|
return [obj["language"] for obj in res]
|