Source code for flexget.utils.parsers.movie

import re
from datetime import datetime

from loguru import logger

from flexget.utils import qualities
from flexget.utils.parsers.parser import TitleParser
from flexget.utils.tools import str_to_int

logger = logger.bind(name='movieparser')


[docs] def diff_pos(string1, string2): """Return first position where string1 and string2 differ.""" for count, c in enumerate(string1): if len(string2) <= count: return count if string2[count] != c: return count return None
[docs] class MovieParser(TitleParser): def __init__(self): self.data = None self.reset() TitleParser.__init__(self)
[docs] def reset(self): # parsing results self.name = None self.year = None self.year_pos = None self.quality = qualities.Quality() self.proper_count = 0
def __str__(self): return f'<MovieParser(name={self.name},year={self.year},quality={self.quality})>'
[docs] def parse(self, data=None): """Parse movie name. Populates name, year, quality and proper_count attributes.""" # Reset before parsing, so the parser can be reused. self.reset() if data is None: data = self.data # Move anything in leading brackets to the end data = re.sub(r'^\[(.*?)\](.*)', r'\2 \1', data) for char in '[]()_,.': data = data.replace(char, ' ') # if there are no spaces if data.find(' ') == -1: data = data.replace('-', ' ') # remove unwanted words (imax, ..) self.remove_words(data, self.remove) data = self.strip_spaces(data) # split to parts parts = data.split(' ') cut_part = 256 all_caps = True for part_pos, part in enumerate(parts): cut = False # Don't let the first word be cutoff word if part_pos < 1: continue # check for year num = str_to_int(part) if num is not None and 1930 < num <= datetime.now().year: if self.year_pos == cut_part: # Looks like a year, but we already set the cutpoint to a year, let's move it forward cut_part = part_pos self.year = num self.year_pos = part_pos cut = True # Don't consider all caps words cut words if the whole title has been all caps if not part.isupper(): all_caps = False # if length > 3 and whole word in uppers, consider as cut word (most likely a group name) if len(part) > 3 and part.isupper() and part.isalpha() and not all_caps: cut = True # check for cutoff words if part.lower() in self.cutoffs: cut = True # check for propers, 'real' and 'final' are too common in movie parsers, only cut if it comes after year if ( part.lower() in self.propers and part.lower() not in ['real', 'final'] ) or self.year: self.proper_count += 1 cut = True # update cut position if cut and parts.index(part) < cut_part: cut_part = part_pos if cut_part != 256: logger.debug('parts: {}, cut is: {}', parts, parts[cut_part]) # calculate cut position from cut_part abs_cut = len(' '.join(parts[:cut_part])) logger.debug( 'after parts check, cut data would be: `{}` abs_cut: {}', data[:abs_cut], abs_cut ) # parse quality quality = qualities.Quality(data) if quality: self.quality = quality # remaining string is same as data but quality information removed # find out position where there is first difference, this is earliest # quality bit, anything after that has no relevance to the movie name dp = diff_pos(data, quality.clean_text) if dp is not None: logger.debug('quality start: {}', dp) if dp < abs_cut: logger.debug('quality cut is even shorter') abs_cut = dp # make cut data = data[:abs_cut].strip() logger.debug('data cut to `{}` - this will be the name', data) # save results self.name = data