Over 1,000,000 CPUs Benchmarked
import re import json import unicodedata from typing import List, Dict, Any import requests from bs4 import BeautifulSoup from rapidfuzz import fuzz, process
results.append( "source": "Filmy4wap", "title": title, "year": year, "language": language, "quality": quality, "url": href, ) return results
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "+")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.result-item") results = [] for c in cards: a = c.select_one("a.title") if not a: continue title = a.get_text(strip=True) href = cls._clean_link(a["href"])
# ---------------------------------------------------------------------- # 1️⃣ Helper – normalise user query # ---------------------------------------------------------------------- def normalize(text: str) -> str: """Lower‑case, strip accents, collapse whitespace, remove punctuation.""" text = unicodedata.normalize("NFKD", text) text = text.encode("ascii", "ignore").decode() text = re.sub(r"[^\w\s-]", "", text) # keep hyphens (some titles use them) text = re.sub(r"\s+", " ", text).strip() return text.lower() import re import json import unicodedata from typing
query_str = " ".join(args.title) data = search_movie(query_str)
# Sort by most‑popular (higher source_count) → higher quality quality_order = "4k": 4, "1080p": 3, "720p": 2, "480p": 1, None: 0 matches.sort( key=lambda x: ( -x["source_count"], -quality_order.get(x["quality"].lower() if x["quality"] else None, 0), ) )
# ---------------------------------------------------------------------- # 3️⃣ Matching logic (exact first, then fuzzy) # ---------------------------------------------------------------------- def match_results( results: List[Dict[str, Any]], query_norm: str, min_fuzzy: int = 85, ) -> List[Dict[str, Any]]: """Return a list of results that match the query.""" exact = [r for r in results if normalize(r["title"]) == query_norm] if exact: return exact process results.append( "source": "Filmy4wap"
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "-")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("article.movie-item") results = [] for c in cards: a = c.select_one("h3 a") if not a: continue title = a.get_text(strip=True) href = cls._clean_link(a["href"])
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "%20")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.movie-box") # CSS selector works for current layout results = [] for c in cards: title_tag = c.select_one("h2 a") if not title_tag: continue title = title_tag.get_text(strip=True) href = cls._clean_link(title_tag["href"])
@staticmethod def _clean_link(raw: str) -> str: """Turn relative URLs into absolute ones.""" return raw if raw.startswith("http") else f"https:raw" ) return results @classmethod def search(cls
# Collect raw results from each site raw = [] for scraper in (FilmyFlyScraper, Filmy4wapScraper, FilmywapScraper): try: raw.extend(scraper.search(query_norm)) except Exception as e: # We never want a single site failure to break the whole flow print(f"[⚠️] scraper.__name__ failed: e")
class FilmywapScraper(BaseScraper): SEARCH_URL = "https://www.filmywap.net/search/query"
return "query": query, "normalized_query": query_norm, "total_matches": len(matches), "results": matches,
@staticmethod def _get(url: str) -> requests.Response: """GET with a tiny retry loop.""" for _ in range(3): try: r = requests.get(url, headers=BaseScraper.HEADERS, timeout=12) r.raise_for_status() return r except requests.RequestException: continue raise RuntimeError(f"Failed to fetch url")