
# ---------------------------------------------------------------------- # 2️⃣ Site‑specific search utilities # ---------------------------------------------------------------------- class BaseScraper: """Common helpers for all three sites."""
# Add a source‑count field (how many sites host the same file) url_to_count = {} for m in matches: url_to_count[m["url"]] = url_to_count.get(m["url"], 0) + 1 for m in matches: m["source_count"] = url_to_count[m["url"]]
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "%20")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.movie-box") # CSS selector works for current layout results = [] for c in cards: title_tag = c.select_one("h2 a") if not title_tag: continue title = title_tag.get_text(strip=True) href = cls._clean_link(title_tag["href"])
# ---------------------------------------------------------------------- # 1️⃣ Helper – normalise user query # ---------------------------------------------------------------------- def normalize(text: str) -> str: """Lower‑case, strip accents, collapse whitespace, remove punctuation.""" text = unicodedata.normalize("NFKD", text) text = text.encode("ascii", "ignore").decode() text = re.sub(r"[^\w\s-]", "", text) # keep hyphens (some titles use them) text = re.sub(r"\s+", " ", text).strip() return text.lower()
class FilmywapScraper(BaseScraper): SEARCH_URL = "https://www.filmywap.net/search/query"
# Sort by most‑popular (higher source_count) → higher quality quality_order = "4k": 4, "1080p": 3, "720p": 2, "480p": 1, None: 0 matches.sort( key=lambda x: ( -x["source_count"], -quality_order.get(x["quality"].lower() if x["quality"] else None, 0), ) )
@classmethod def search(cls, query: str) -> List[Dict[str, Any]]: url = cls.SEARCH_URL.format(query=query.replace(" ", "+")) soup = BeautifulSoup(cls._get(url).text, "html.parser") cards = soup.select("div.result-item") results = [] for c in cards: a = c.select_one("a.title") if not a: continue title = a.get_text(strip=True) href = cls._clean_link(a["href"])
query_str = " ".join(args.title) data = search_movie(query_str)
# Example meta: "2022 Hindi 1080p" meta = c.select_one("span.meta") year, language, quality = None, None, None if meta: txt = meta.get_text() m_year = re.search(r"\b(20\d2)\b", txt) year = m_year.group(1) if m_year else None language = "Hindi" if "hindi" in txt.lower() else None qual_match = re.search(r"\b(720p|1080p|4k)\b", txt, re.I) quality = qual_match.group(0) if qual_match else None
