Source code for flexget.plugins.modify.sort_by_weight

from datetime import datetime, timedelta

from loguru import logger

from flexget import plugin
from flexget.event import event
from flexget.utils.qualities import Quality
from flexget.utils.tools import parse_timedelta

__author__ = 'andy'

logger = logger.bind(name='sort_by_weight')

ENTRY_WEIGHT_FIELD_NAME = 'sort_by_weight_sum'
DEFAULT_STRIDE = (
    10  # its a design choice to allow 'similar' values to-be grouped under the same slot/weight
)


[docs] class PluginSortByWeight: """Sort task entries based on multiple fields using a sort weight per field. Result per entry is stored in 'sort_by_weight_sum'. Basic Concept: For each field we calculate a weight based on given parameters and than sum the weights up and do a sort based on it. =============== ============================================================================================================== Option Description =============== ============================================================================================================== field Name of the sort field weight The sort weight used, values between 10-200 are good starts inverse: yes Use inverse weighting for the field, example: date, age fields that range in the past This means the lowest entry/value will get the highest weight upper_limit The upper value limit or upper cutoff value, that will be used for weighting. This will change the slot distribution, which helps narrow down to more meaningfully weighting results. Example: Entry1 is 100 days old, Entry2 is 7 days old, Entry3 is 1000 days old Without a upper_limit the weights will be distributed between 0-1000 days, with a upper_limit: 100 days weights will be distributed between 0-100 days and any value larger than upper_limit, gets the highest score. So we can smoothly distribute the rest between 0-100 days. delta_distance The distance, step until a new slot is used for weighting. Think of this like: Any value that is within this distance will get the same weight for the step. NOTE: If not given the delta_distance will be distributed over 10 distinct steps Example: Size1 = 4000 MB, Size2 = 3000 MB, Size3 = 700 MB With a weight: 50 and delta_distance: 1200 Size1 and Size2 both get the maximum weight of 50, while Size3 gets the weight for the 0-1200 MB step. =============== ============================================================================================================== Example:: simple: sort_by_weight: - field: quality weight: 100 # quality is most important, use highest weight - field: content_size weight: 70 # size is still a good quality estimate so use a high weight - field: newznab_pubdate weight: 30 # age is somewhat important so use low weight inverse: yes advanced: sort_by_weight: - field: content_size weight: 80 # we want large files mainly = good quality delta_distance: 500 # anything within 500 MB gets the same weight upper_limit: 8000 # anything over 8000 MB is fine and will get the max weight (80) - field: newznab_pubdate weight: 30 # we still like new releases upper_limit: 60 days # anything older 60 days gets the lowest weight (because of inverse: yes) inverse: yes # reverse weight order for date/age fields - field: newznab_grabs weight: 20 # we like releases that others already downloaded upper_limit: 100 # anything over 100 grabs is fine and gets maximum weight In this example the best result can have a 'sort_by_weight_sum' of sum = 80 + 30 + 20 """ schema = { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'field': {'type': 'string'}, 'weight': {'type': 'integer', 'minimum': 5}, 'inverse': {'type': 'boolean', 'default': False}, 'upper_limit': { 'oneOf': [ {'type': 'integer', 'minimum': 1}, {'type': 'string', 'format': 'interval'}, ] }, 'delta_distance': { 'oneOf': [ {'type': 'integer', 'minimum': 1}, {'type': 'string', 'format': 'interval'}, ] }, }, 'required': ['field', 'weight'], 'additionalProperties': False, }, 'minItems': 2, }
[docs] def prepare_config(self, config): settings = {} for entry in config: if isinstance(entry, dict) and entry.get('field') and not entry.get('field').isspace(): key = entry.get('field') settings[key] = entry delta = settings[key].get('delta_distance') if delta and isinstance(delta, str): settings[key]['delta_distance'] = parse_timedelta(delta) limit = settings[key].get('upper_limit') if limit and isinstance(limit, str): settings[key]['upper_limit'] = parse_timedelta(limit) return settings
[docs] @plugin.priority(127) # run after default filters def on_task_filter(self, task, config): entries = list(task.accepted) + list(task.undecided) # ['undecided', 'accepted'] if len(entries) < 2: return config = self.prepare_config(config) logger.verbose( 'Calculating weights for undecided, accepted entries and sorting by result field: {}', ENTRY_WEIGHT_FIELD_NAME, ) self.calc_weights(entries, config) task.all_entries.sort(key=lambda e: e.get(ENTRY_WEIGHT_FIELD_NAME, 0), reverse=True)
# debug # for entry in task.all_entries: # log.verbose('sum[ %s ] weights: %s, title: %s', entry.get(ENTRY_WEIGHT_FIELD_NAME, -1), entry.get('weights', -1), entry['title'])
[docs] @staticmethod def _get_lower_limit(value): min_value = 0 if isinstance(value, Quality): min_value = Quality() elif isinstance(value, bool): min_value = False elif isinstance(value, datetime): min_value = datetime.now() # assume date comparison vs now() elif isinstance(value, timedelta): min_value = timedelta(0) return min_value
[docs] @staticmethod def _limit_value(key, value, config): if config[key].get('upper_limit'): limit = config[key]['upper_limit'] # auto handle datetime if isinstance(value, datetime) and isinstance(limit, timedelta): if config[key]['inverse'] is True: value = max(value, datetime.now() - limit) elif (datetime.now() + limit) < value: value = datetime.now() + limit elif value > limit: value = limit return value
[docs] def _calc_stride_delta(self, key, entries, config): delta = None stride = None lower_default = self._get_lower_limit(entries[0][key]) max_entry = max(entries, key=lambda e, k=key, d=lower_default: e.get(k, d)) min_entry = min(entries, key=lambda e, k=key, d=lower_default: e.get(k, d)) max_value = max_entry[key] max_value = self._limit_value(key, max_value, config) min_value = min_entry[key] try: min_value = min(min_value, lower_default) # try normalize to natural lower bound except Exception as ex: logger.debug('Incompatible min_value op: {}', ex) value_range = max_value - min_value if value_range: if 'delta_distance' in config[key]: delta = config[key]['delta_distance'] stride = value_range / delta if isinstance(stride, timedelta): stride = stride.days else: delta = value_range / DEFAULT_STRIDE return stride, delta
[docs] def calc_weights(self, entries, config): for key in config: if key not in entries[0]: continue # stride = max / delta # step = max_weight / stride # weight = (entry / delta) * step stride = None delta = None try: stride, delta = self._calc_stride_delta(key, entries, config) except Exception as ex: delta = None logger.warning( 'Could not calculate stride for key: {}, type: {}, using fallback sort. Error: {}', key, type(entries[0][key]), ex, ) lower_default = self._get_lower_limit(entries[0][key]) entries.sort(key=lambda e, k=key, d=lower_default: e.get(k, d), reverse=True) if not stride: stride = DEFAULT_STRIDE max_weight = config[key]['weight'] weight_step = max_weight / max(int(stride), 1) current_value = None weight = None # log.verbose('*** key: `%s`, delta: %s, weight_step: %s', key, delta, weight_step) for entry in entries: if ENTRY_WEIGHT_FIELD_NAME not in entry: entry[ENTRY_WEIGHT_FIELD_NAME] = 0 value = entry[key] value = self._limit_value(key, value, config) if value is None: continue if current_value is None: current_value = value if weight is None: weight = max_weight if delta: try: weight = (value / delta) * weight_step except Exception: try: # convert value to distance from minimum value_normalized = abs(value - self._get_lower_limit(value)) weight = (value_normalized / delta) * weight_step except Exception as ex: logger.warning( 'Skipping entry: {}, could not calc weight for key: {}, error: {}', entry, key, ex, ) continue current_value = value elif value < current_value: weight = weight - weight_step current_value = value if config[key]['inverse'] is True: weight = max_weight - weight weight = int(max(weight, 0)) entry[ENTRY_WEIGHT_FIELD_NAME] += weight
# self._add_debug_info(key, entry, weight, entry[key], value) # debug only
[docs] def _add_debug_info(self, key, entry, weight, *args): if 'weights' not in entry: entry['weights'] = {} short_args = [] for arg in args: if isinstance(arg, timedelta): short_args.append(arg.days) elif isinstance(arg, datetime): date = arg.date() short_args.append(f'{date.year}-{date.month}-{date.day}') elif isinstance(arg, Quality): quality_string = f'[ {arg.resolution} ]-{arg.source}-{arg.codec}, [ {arg.audio} ]' if quality_string not in short_args: short_args.append(quality_string) else: short_args.append(arg) entry['weights'][key] = f'{weight} = {short_args}'
[docs] @event('plugin.register') def register_plugin(): plugin.register(PluginSortByWeight, 'sort_by_weight', api_ver=2)