""" Shared test helper for verifying feed ordering across all formats This module provides utilities to verify that feed items are in the correct order (newest first) regardless of feed format (RSS, ATOM, JSON Feed). """ import xml.etree.ElementTree as ET from datetime import datetime import json from email.utils import parsedate_to_datetime def assert_feed_newest_first(feed_content, format_type='rss', expected_count=None): """ Verify feed items are in newest-first order Args: feed_content: Feed content as string (XML for RSS/ATOM, JSON string for JSON Feed) format_type: Feed format ('rss', 'atom', or 'json') expected_count: Optional expected number of items (for validation) Raises: AssertionError: If items are not in newest-first order or count mismatch Examples: >>> feed_xml = generate_rss_feed(notes) >>> assert_feed_newest_first(feed_xml, 'rss', expected_count=10) >>> feed_json = generate_json_feed(notes) >>> assert_feed_newest_first(feed_json, 'json') """ if format_type == 'rss': dates = _extract_rss_dates(feed_content) elif format_type == 'atom': dates = _extract_atom_dates(feed_content) elif format_type == 'json': dates = _extract_json_feed_dates(feed_content) else: raise ValueError(f"Unsupported format type: {format_type}") # Verify expected count if provided if expected_count is not None: assert len(dates) == expected_count, \ f"Expected {expected_count} items but found {len(dates)}" # Verify items are not empty assert len(dates) > 0, "Feed contains no items" # Verify dates are in descending order (newest first) for i in range(len(dates) - 1): current = dates[i] next_item = dates[i + 1] assert current >= next_item, \ f"Item {i} (date: {current}) should be newer than or equal to item {i+1} (date: {next_item}). " \ f"Feed items are not in newest-first order!" return True def _extract_rss_dates(feed_xml): """ Extract publication dates from RSS feed Args: feed_xml: RSS feed XML string Returns: List of datetime objects in feed order """ root = ET.fromstring(feed_xml) # Find all item elements items = root.findall('.//item') dates = [] for item in items: pub_date_elem = item.find('pubDate') if pub_date_elem is not None and pub_date_elem.text: # Parse RFC-822 date format dt = parsedate_to_datetime(pub_date_elem.text) dates.append(dt) return dates def _extract_atom_dates(feed_xml): """ Extract published/updated dates from ATOM feed Args: feed_xml: ATOM feed XML string Returns: List of datetime objects in feed order """ # Parse ATOM namespace root = ET.fromstring(feed_xml) ns = {'atom': 'http://www.w3.org/2005/Atom'} # Find all entry elements entries = root.findall('.//atom:entry', ns) dates = [] for entry in entries: # Try published first, fall back to updated published = entry.find('atom:published', ns) updated = entry.find('atom:updated', ns) date_elem = published if published is not None else updated if date_elem is not None and date_elem.text: # Parse RFC 3339 (ISO 8601) date format dt = datetime.fromisoformat(date_elem.text.replace('Z', '+00:00')) dates.append(dt) return dates def _extract_json_feed_dates(feed_json): """ Extract publication dates from JSON Feed Args: feed_json: JSON Feed string Returns: List of datetime objects in feed order """ feed_data = json.loads(feed_json) items = feed_data.get('items', []) dates = [] for item in items: # JSON Feed uses date_published (RFC 3339) date_str = item.get('date_published') if date_str: # Parse RFC 3339 (ISO 8601) date format dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) dates.append(dt) return dates