added Dockerfile for container build

This commit is contained in:
Phil 2024-10-08 13:55:13 -06:00
parent 3d44106f02
commit eecee074e2
2 changed files with 61 additions and 2 deletions

57
Dockerfile Normal file
View File

@ -0,0 +1,57 @@
# Use the official Python 3.12.7 Slim image as the base
FROM python:3.12.7-slim-bullseye
# Set the working directory
WORKDIR /app
# Install system dependencies needed for Playwright and its browsers
RUN apt-get update && apt-get install -y \
bash \
build-essential \
libffi-dev \
git \
curl \
ca-certificates \
wget \
gnupg \
libnss3 \
libatk-bridge2.0-0 \
libx11-xcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxi6 \
libxtst6 \
libappindicator3-1 \
libxrandr2 \
xdg-utils \
libgbm1 \
libpango-1.0-0 \
libasound2 \
libpangocairo-1.0-0 \
libxshmfence1 \
libx11-6 \
libatk1.0-0 \
libgtk-3-0 \
libdrm2 \
&& rm -rf /var/lib/apt/lists/*
# Install Playwright and required Python dependencies
RUN pip install --upgrade pip && \
pip install \
playwright \
beautifulsoup4 \
feedgen \
pytz
# Install Playwright browser binaries
RUN playwright install
# Copy the Python script to the container
COPY main.py .
# Set the environment variable to ensure Playwright works in the container
ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
# Command to run the Python script
CMD ["python", "main.py"]

View File

@ -16,7 +16,7 @@ def scrape_and_generate_rss(url):
page = browser.new_page()
# Set a longer timeout for loading the page
page.set_default_navigation_timeout(60000)
page.set_default_navigation_timeout(120000)
# Load the Warhammer Community page
page.goto(url, wait_until="networkidle")
@ -98,9 +98,11 @@ def scrape_and_generate_rss(url):
rss_feed = fg.rss_str(pretty=True)
# Save the RSS feed to a file
with open('warhammer_rss_feed.xml', 'wb') as f:
with open('/app/output/warhammer_rss_feed.xml', 'wb') as f:
f.write(rss_feed)
with open('/app/output/page.html','w', encoding='utf-8') as f:
f.write(soup.prettify())
print('RSS feed generated and saved as warhammer_rss_feed.xml')
# Run the function