Sone-162-javhd-today-04192024-javhd-today02-23-... -
"raw_filename": "SONE-162-JAVHD-TODAY-04192024-JAVHD-TODAY02-23-...", "movie_id": "SONE-162", "source": "JAVHD", "release_date": "2024-04-19", "segment": "23", "is_duplicate_tag": True
return features filename = "SONE-162-JAVHD-TODAY-04192024-JAVHD-TODAY02-23-..." print(parse_jav_filename(filename)) SONE-162-JAVHD-TODAY-04192024-JAVHD-TODAY02-23-...
# Extract segment (e.g., 02, 23) seg_match = re.findall(r'\b(\d2)\b', filename) if len(seg_match) > 1: features["segment"] = seg_match[-1] # last 2-digit number 23) seg_match = re.findall(r'\b(\d2)\b'
# Extract movie ID (e.g., SONE-162) movie_match = re.search(r'([A-Z]+-\d+)', filename) if movie_match: features["movie_id"] = movie_match.group(1) filename) if len(seg_match) >
"filename_cleaned": "SONE-162 04192024", "has_date": true, "has_javhd": true, "word_count": 5, "digit_group_count": 3, "possible_quality": "unknown", "possible_part": 2