Source code for flexget.utils.parsers.generic
"""Collection of generic parser related utilities used by internal parser and with parsing plugins."""
import re
[docs]
class ParseWarning(Warning):
def __init__(self, parsed, value, **kwargs):
self.value = value
self.parsed = parsed
self.kwargs = kwargs
def __unicode__(self):
return self.value
def __str__(self):
return self.__unicode__().encode('utf-8')
def __repr__(self):
return f'ParseWarning({self}, **{self.kwargs!r})'
default_ignore_prefixes = [
r'(?:\[[^\[\]]*\])', # ignores group names before the name, eg [foobar] name
r'(?:HD.720p?:)',
r'(?:HD.1080p?:)',
r'(?:HD.2160p?:)',
]
[docs]
def name_to_re(name, ignore_prefixes=None, parser=None):
"""Convert 'foo bar' to '^[^...]*foo[^...]*bar[^...]+'."""
if not ignore_prefixes:
ignore_prefixes = default_ignore_prefixes
parenthetical = None
if name.endswith(')'):
p_start = name.rfind('(')
if p_start != -1:
parenthetical = re.escape(name[p_start + 1 : -1])
name = name[: p_start - 1]
# Blanks are any non word characters except & and _
blank = r'(?:[^\w&]|_)'
ignore = '(?:' + '|'.join(ignore_prefixes) + ')?'
res = re.sub(re.compile(blank + '+'), ' ', name)
res = res.strip()
# accept either '&' or 'and'
res = re.sub(' (&|and) ', ' (?:and|&) ', res)
# The replacement has a regex escape in it (\w) which needs to be escaped again in python 3.7+
res = re.sub(' +', blank.replace('\\', '\\\\') + '*', res)
if parenthetical:
res += '(?:' + blank + '+' + parenthetical + ')?'
# Turn on exact mode for series ending with a parenthetical,
# so that 'Show (US)' is not accepted as 'Show (UK)'
if parser:
parser.strict_name = True
return '^' + ignore + blank + '*' + '(' + res + ')(?:\\b|_)' + blank + '*'