From eecee074e2dfd1ed3d012658264203875a2d011c Mon Sep 17 00:00:00 2001 From: Phil Date: Tue, 8 Oct 2024 13:55:13 -0600 Subject: [PATCH] added Dockerfile for container build --- Dockerfile | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 6 ++++-- 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b884553 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,57 @@ +# Use the official Python 3.12.7 Slim image as the base +FROM python:3.12.7-slim-bullseye + +# Set the working directory +WORKDIR /app + +# Install system dependencies needed for Playwright and its browsers +RUN apt-get update && apt-get install -y \ + bash \ + build-essential \ + libffi-dev \ + git \ + curl \ + ca-certificates \ + wget \ + gnupg \ + libnss3 \ + libatk-bridge2.0-0 \ + libx11-xcb1 \ + libxcomposite1 \ + libxcursor1 \ + libxdamage1 \ + libxi6 \ + libxtst6 \ + libappindicator3-1 \ + libxrandr2 \ + xdg-utils \ + libgbm1 \ + libpango-1.0-0 \ + libasound2 \ + libpangocairo-1.0-0 \ + libxshmfence1 \ + libx11-6 \ + libatk1.0-0 \ + libgtk-3-0 \ + libdrm2 \ + && rm -rf /var/lib/apt/lists/* + +# Install Playwright and required Python dependencies +RUN pip install --upgrade pip && \ + pip install \ + playwright \ + beautifulsoup4 \ + feedgen \ + pytz + +# Install Playwright browser binaries +RUN playwright install + +# Copy the Python script to the container +COPY main.py . + +# Set the environment variable to ensure Playwright works in the container +ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright + +# Command to run the Python script +CMD ["python", "main.py"] diff --git a/main.py b/main.py index 739e0c9..c77696e 100644 --- a/main.py +++ b/main.py @@ -16,7 +16,7 @@ def scrape_and_generate_rss(url): page = browser.new_page() # Set a longer timeout for loading the page - page.set_default_navigation_timeout(60000) + page.set_default_navigation_timeout(120000) # Load the Warhammer Community page page.goto(url, wait_until="networkidle") @@ -98,9 +98,11 @@ def scrape_and_generate_rss(url): rss_feed = fg.rss_str(pretty=True) # Save the RSS feed to a file - with open('warhammer_rss_feed.xml', 'wb') as f: + with open('/app/output/warhammer_rss_feed.xml', 'wb') as f: f.write(rss_feed) + with open('/app/output/page.html','w', encoding='utf-8') as f: + f.write(soup.prettify()) print('RSS feed generated and saved as warhammer_rss_feed.xml') # Run the function