Source code for apertium.translation

import re
from subprocess import Popen, PIPE, CalledProcessError

if False:
    from typing import List, Dict, Tuple, Union, Optional, NamedTuple  # noqa: F401

import apertium  # noqa: F401
from apertium.utils import to_alpha3_code, execute, parse_mode_file  # noqa: F401


[docs]class Translator: """ Attributes: translation_cmds (Dict[Tuple[str, str], List[List[str]]]) l1 (str) l2 (str) """ def __init__(self, l1, l2): # type: (Translator, str, str) -> None """ Args: l1 (str) l2 (str) """ self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 self.l2 = l2 def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] """ Args: l1 (str) l2 (str) Returns: List[List[str]] """ if (l1, l2) not in self.translation_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path) return self.translation_cmds[(l1, l2)] def _get_format(self, format, deformat, reformat): # type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]] """ Args: format (Optional[str]) deformat (Optional[str]) reformat (Optional[str]) Returns: Tuple[Optional[str], Optional[str]] """ if format: deformat = 'apertium-des' + format reformat = 'apertium-re' + format else: if 'apertium-des' not in deformat: # type: ignore deformat = 'apertium-des' + deformat # type: ignore if 'apertium-re' not in reformat: # type: ignore reformat = 'apertium-re' + reformat # type: ignore return deformat, reformat def _check_ret_code(self, proc): # type: (Translator, Popen) -> None """ Args: proc (Popen) """ if proc.returncode != 0: raise CalledProcessError() # type: ignore def _validate_formatters(self, deformat, reformat): # type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]] """ Args: deformat (Optional[str]) reformat (Optional[str]) Returns: Tuple[Union[str, object], Union[str, object]] """ def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, object] """ Args: elt (Optional[str]) lst (List[object]) Returns: Union[str, object] """ if elt in lst: return elt else: return lst[0] # First is fallback: deformatters = [ 'apertium-deshtml', 'apertium-destxt', 'apertium-desrtf', False, ] reformatters = [ 'apertium-rehtml-noent', 'apertium-rehtml', 'apertium-retxt', 'apertium-rertf', False, ] return valid1(deformat, deformatters), valid1(reformat, reformatters) def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str """ Args: deformat (str) text (str) Returns: str """ if deformat: proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) proc_deformat.stdin.write(bytes(text, 'utf-8')) deformatted = proc_deformat.communicate()[0] deformatted = deformatted.decode() self._check_ret_code(proc_deformat) else: deformatted = bytes(text, 'utf-8') res = str(deformatted) return res def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str """ Args: reformat (str) text (str) Returns: str """ if reformat: proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) proc_reformat.stdin.write(bytes(text, 'utf-8')) result = proc_reformat.communicate()[0] self._check_ret_code(proc_reformat) else: result = re.sub(rb'\0$', b'', text) # type: ignore return result # type: ignore
[docs] def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (Translator, str, bool, Optional[str], str, str) -> str """ Args: text (str) mark_unknown (bool) format (Optional[str]) deformat (str) reformat (str) Returns: str """ if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs: # type: ignore pair = map(to_alpha3_code, [self.l1, self.l2]) else: raise apertium.ModeNotInstalled() if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) unsafe_deformat, unsafe_reformat = self._get_format(format, deformat, reformat) deformater, reformater = self._validate_formatters(unsafe_deformat, unsafe_reformat) deformatted = self._get_deformat(str(deformater), text) output = execute(deformatted, cmds) result = self._get_reformat(str(reformater), output).strip() return result.decode() # type: ignore
[docs]def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Optional[str], str, str) -> str """ Args: text (str) mark_unknown (bool) format (Optional[str]) deformat (str) reformat (str) Returns: str """ translator = apertium.Translator(l1, l2) return translator.translate(text, mark_unknown, format, deformat, reformat)