2018-09-17 08:27:00 +08:00
|
|
|
from .core import encode, decode, alabel, ulabel, IDNAError
|
|
|
|
import codecs
|
|
|
|
import re
|
2022-01-24 12:07:52 +08:00
|
|
|
from typing import Tuple, Optional
|
2018-09-17 08:27:00 +08:00
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]')
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
class Codec(codecs.Codec):
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]:
|
2018-09-17 08:27:00 +08:00
|
|
|
if errors != 'strict':
|
2022-01-24 12:07:52 +08:00
|
|
|
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
if not data:
|
2022-01-24 12:07:52 +08:00
|
|
|
return b"", 0
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
return encode(data), len(data)
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]:
|
2018-09-17 08:27:00 +08:00
|
|
|
if errors != 'strict':
|
2022-01-24 12:07:52 +08:00
|
|
|
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
if not data:
|
2022-01-24 12:07:52 +08:00
|
|
|
return '', 0
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
return decode(data), len(data)
|
|
|
|
|
|
|
|
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
|
2022-01-24 12:07:52 +08:00
|
|
|
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
|
2018-09-17 08:27:00 +08:00
|
|
|
if errors != 'strict':
|
2022-01-24 12:07:52 +08:00
|
|
|
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
if not data:
|
2022-01-24 12:07:52 +08:00
|
|
|
return "", 0
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
labels = _unicode_dots_re.split(data)
|
2022-01-24 12:07:52 +08:00
|
|
|
trailing_dot = ''
|
2018-09-17 08:27:00 +08:00
|
|
|
if labels:
|
|
|
|
if not labels[-1]:
|
|
|
|
trailing_dot = '.'
|
|
|
|
del labels[-1]
|
|
|
|
elif not final:
|
|
|
|
# Keep potentially unfinished label until the next call
|
|
|
|
del labels[-1]
|
|
|
|
if labels:
|
|
|
|
trailing_dot = '.'
|
|
|
|
|
|
|
|
result = []
|
|
|
|
size = 0
|
|
|
|
for label in labels:
|
|
|
|
result.append(alabel(label))
|
|
|
|
if size:
|
|
|
|
size += 1
|
|
|
|
size += len(label)
|
|
|
|
|
|
|
|
# Join with U+002E
|
2022-01-24 12:07:52 +08:00
|
|
|
result_str = '.'.join(result) + trailing_dot # type: ignore
|
2018-09-17 08:27:00 +08:00
|
|
|
size += len(trailing_dot)
|
2022-01-24 12:07:52 +08:00
|
|
|
return result_str, size
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
2022-01-24 12:07:52 +08:00
|
|
|
def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore
|
2018-09-17 08:27:00 +08:00
|
|
|
if errors != 'strict':
|
2022-01-24 12:07:52 +08:00
|
|
|
raise IDNAError('Unsupported error handling \"{}\"'.format(errors))
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
if not data:
|
2022-01-24 12:07:52 +08:00
|
|
|
return ('', 0)
|
|
|
|
|
|
|
|
labels = _unicode_dots_re.split(data)
|
|
|
|
trailing_dot = ''
|
2018-09-17 08:27:00 +08:00
|
|
|
if labels:
|
|
|
|
if not labels[-1]:
|
2022-01-24 12:07:52 +08:00
|
|
|
trailing_dot = '.'
|
2018-09-17 08:27:00 +08:00
|
|
|
del labels[-1]
|
|
|
|
elif not final:
|
|
|
|
# Keep potentially unfinished label until the next call
|
|
|
|
del labels[-1]
|
|
|
|
if labels:
|
2022-01-24 12:07:52 +08:00
|
|
|
trailing_dot = '.'
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
result = []
|
|
|
|
size = 0
|
|
|
|
for label in labels:
|
|
|
|
result.append(ulabel(label))
|
|
|
|
if size:
|
|
|
|
size += 1
|
|
|
|
size += len(label)
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
result_str = '.'.join(result) + trailing_dot
|
2018-09-17 08:27:00 +08:00
|
|
|
size += len(trailing_dot)
|
2022-01-24 12:07:52 +08:00
|
|
|
return (result_str, size)
|
2018-09-17 08:27:00 +08:00
|
|
|
|
|
|
|
|
|
|
|
class StreamWriter(Codec, codecs.StreamWriter):
|
|
|
|
pass
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
|
2018-09-17 08:27:00 +08:00
|
|
|
class StreamReader(Codec, codecs.StreamReader):
|
|
|
|
pass
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
|
|
|
|
def getregentry() -> codecs.CodecInfo:
|
|
|
|
# Compatibility as a search_function for codecs.register()
|
2018-09-17 08:27:00 +08:00
|
|
|
return codecs.CodecInfo(
|
|
|
|
name='idna',
|
2022-01-24 12:07:52 +08:00
|
|
|
encode=Codec().encode, # type: ignore
|
|
|
|
decode=Codec().decode, # type: ignore
|
2018-09-17 08:27:00 +08:00
|
|
|
incrementalencoder=IncrementalEncoder,
|
|
|
|
incrementaldecoder=IncrementalDecoder,
|
|
|
|
streamwriter=StreamWriter,
|
|
|
|
streamreader=StreamReader,
|
|
|
|
)
|