Source code for apertium.analysis
from streamparser import parse, LexicalUnit # noqa: F401
import apertium
from apertium.utils import to_alpha3_code, execute, parse_mode_file
if False:
from typing import List, Union, Dict # noqa: F401
[docs]class Analyzer:
"""
Attributes:
analyzer_cmds (Dict[str, List[List[str]]])
lang (str)
"""
def __init__(self, lang): # type: (Analyzer, str) -> None
"""
Args:
lang (str)
"""
self.analyzer_cmds = {} # type: Dict[str, List[List[str]]]
self.lang = to_alpha3_code(lang) # type: str
if self.lang not in apertium.analyzers:
raise apertium.ModeNotInstalled(self.lang)
else:
self.path, self.mode = apertium.analyzers[self.lang]
def _get_commands(self): # type: (Analyzer) -> List[List[str]]
"""
Returns:
List[List[str]]
"""
if self.lang not in self.analyzer_cmds:
mode_path, mode = apertium.analyzers[self.lang]
self.analyzer_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode')
return self.analyzer_cmds[self.lang]
def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit]
"""
Postprocesses the input
Args:
result (str)
Returns:
List[LexicalUnit]
"""
lexical_units = list(parse(result))
return lexical_units
[docs] def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> List[LexicalUnit]
"""
Runs apertium to analyze the input
Args:
in_text (str)
formatting (str)
Returns:
List[LexicalUnit]
"""
commands = [['apertium', '-d', self.path, '-f', formatting, self.mode]]
result = execute(in_text, commands)
return self._postproc_text(result)
[docs]def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit]
"""
Args:
lang (str)
in_text (str)
formatting (str)
Returns:
List[LexicalUnit]
"""
analyzer = Analyzer(lang)
return analyzer.analyze(in_text, formatting)