Source code for flexget.plugins.modify.manipulate

import re

from loguru import logger

from flexget import plugin
from flexget.event import event

logger = logger.bind(name='manipulate')


[docs] class Manipulate: r"""The manipulate plugin. Usage:: manipulate: - <destination field>: [find_all]: <boolean> [phase]: <phase> [from]: <source field> [extract]: <regexp> [separator]: <text> [replace]: regexp: <regexp> format: <regexp> [remove]: <boolean> [erase]: <list of regexps> Example: .. code:: yaml manipulate: - title: extract: \[\d\d\d\d\](.*) - title: erase: - "^unwanted.noise." - "^more.advertisement." """ schema = { 'type': 'array', 'items': { 'type': 'object', 'additionalProperties': { 'type': 'object', 'properties': { 'phase': {'enum': ['metainfo', 'filter', 'modify']}, 'from': {'type': 'string'}, 'extract': {'type': 'string', 'format': 'regex'}, 'separator': {'type': 'string'}, 'remove': {'type': 'boolean'}, 'erase': { 'type': 'array', 'items': {'type': 'string', 'format': 'regex'}, }, 'find_all': {'type': 'boolean'}, 'replace': { 'type': 'object', 'properties': { 'regexp': {'type': 'string', 'format': 'regex'}, 'format': {'type': 'string'}, }, 'required': ['regexp', 'format'], 'additionalProperties': False, }, }, 'additionalProperties': False, }, }, }
[docs] def on_task_start(self, task, config): """Separate the config into a dict with a list of jobs per phase. Allow us to skip phases without any jobs in them. """ self.phase_jobs = {'filter': [], 'metainfo': [], 'modify': []} for item in config: for item_config in item.values(): # Get the phase specified for this item, or use default of metainfo phase = item_config.get('phase', 'metainfo') self.phase_jobs[phase].append(item)
[docs] @plugin.priority(plugin.PRIORITY_FIRST) def on_task_metainfo(self, task, config): if not self.phase_jobs['metainfo']: # return if no jobs for this phase return modified = sum(self.process(entry, self.phase_jobs['metainfo']) for entry in task.entries) logger.verbose('Modified {} entries.', modified)
[docs] @plugin.priority(plugin.PRIORITY_FIRST) def on_task_filter(self, task, config): if not self.phase_jobs['filter']: # return if no jobs for this phase return modified = sum(self.process(entry, self.phase_jobs['filter']) for entry in task.entries) logger.verbose('Modified {} entries.', modified)
[docs] @plugin.priority(plugin.PRIORITY_FIRST) def on_task_modify(self, task, config): if not self.phase_jobs['modify']: # return if no jobs for this phase return modified = sum(self.process(entry, self.phase_jobs['modify']) for entry in task.entries) logger.verbose('Modified {} entries.', modified)
[docs] def process(self, entry, jobs): """Process given jobs from config for an entry. :param entry: Entry to modify :param jobs: Config items to run on this entry :return: True if any fields were modified """ modified = False for item in jobs: for field, config in item.items(): from_field = field if 'from' in config: from_field = config['from'] field_value = entry.get(from_field) logger.debug( 'field: `{}` from_field: `{}` field_value: `{}`', field, from_field, field_value, ) if config.get('remove'): # Remove entire field if field in entry: del entry[field] modified = True continue if config.get('erase'): # Erase text matching regex patterns if not field_value: logger.warning( 'Cannot erase patterns, field `{}` is not present', from_field ) continue original_value = field_value for pattern in config['erase']: field_value = re.sub( pattern, '', field_value, flags=re.IGNORECASE | re.UNICODE ) field_value = field_value.strip() if original_value != field_value: logger.debug('field `{}` after erase patterns: `{}`', field, field_value) # Fail entry if title field becomes empty after erase if field == 'title' and not field_value: entry.fail('Title became empty after erase operation') if 'extract' in config: if not field_value: logger.warning('Cannot extract, field `{}` is not present', from_field) continue if config.get('find_all'): match = re.findall( config['extract'], field_value, re.IGNORECASE | re.UNICODE ) logger.debug('all matches: {}', match) field_value = config.get('separator', ' ').join(match).strip() logger.debug('field `{}` after extract: `{}`', field, field_value) else: match = re.search( config['extract'], field_value, re.IGNORECASE | re.UNICODE ) if match: groups = [x for x in match.groups() if x is not None] logger.debug('groups: {}', groups) field_value = config.get('separator', ' ').join(groups).strip() logger.debug('field `{}` after extract: `{}`', field, field_value) if 'replace' in config: if not field_value: logger.warning('Cannot replace, field `{}` is not present', from_field) continue replace_config = config['replace'] regexp = re.compile(replace_config['regexp'], flags=re.IGNORECASE | re.UNICODE) field_value = regexp.sub(replace_config['format'], field_value).strip() logger.debug('field `{}` after replace: `{}`', field, field_value) if from_field != field or entry[field] != field_value: logger.verbose('Field `{}` is now `{}`', field, field_value) modified = True entry[field] = field_value return modified
[docs] @event('plugin.register') def register_plugin(): plugin.register(Manipulate, 'manipulate', api_ver=2)