"""rel=me parser service for extracting email addresses from HTML.""" from bs4 import BeautifulSoup class RelMeParser: """Service for parsing rel=me links from HTML.""" def parse_relme_links(self, html: str) -> list[str]: """ Parse HTML for rel=me links. Args: html: HTML content to parse Returns: List of rel=me link URLs """ try: soup = BeautifulSoup(html, 'html.parser') links = [] # Find all tags with rel="me" attribute for link in soup.find_all('a', rel='me'): href = link.get('href') if href: links.append(href) # Also check for tags with rel="me" for link in soup.find_all('link', rel='me'): href = link.get('href') if href: links.append(href) return links except Exception: return [] def extract_mailto_email(self, relme_links: list[str]) -> str | None: """ Extract email address from mailto: links. Args: relme_links: List of rel=me link URLs Returns: Email address if found, None otherwise """ for link in relme_links: if link.startswith('mailto:'): # Extract email address from mailto: link email = link[7:] # Remove 'mailto:' prefix # Strip any query parameters (e.g., ?subject=...) if '?' in email: email = email.split('?')[0] # Basic validation if '@' in email and '.' in email: return email.strip() return None def find_email(self, html: str) -> str | None: """ Find email address from HTML by parsing rel=me links. Args: html: HTML content to parse Returns: Email address if found, None otherwise """ relme_links = self.parse_relme_links(html) return self.extract_mailto_email(relme_links)