added Dockerfile for container build
This commit is contained in:
parent
3d44106f02
commit
eecee074e2
57
Dockerfile
Normal file
57
Dockerfile
Normal file
@ -0,0 +1,57 @@
|
||||
# Use the official Python 3.12.7 Slim image as the base
|
||||
FROM python:3.12.7-slim-bullseye
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies needed for Playwright and its browsers
|
||||
RUN apt-get update && apt-get install -y \
|
||||
bash \
|
||||
build-essential \
|
||||
libffi-dev \
|
||||
git \
|
||||
curl \
|
||||
ca-certificates \
|
||||
wget \
|
||||
gnupg \
|
||||
libnss3 \
|
||||
libatk-bridge2.0-0 \
|
||||
libx11-xcb1 \
|
||||
libxcomposite1 \
|
||||
libxcursor1 \
|
||||
libxdamage1 \
|
||||
libxi6 \
|
||||
libxtst6 \
|
||||
libappindicator3-1 \
|
||||
libxrandr2 \
|
||||
xdg-utils \
|
||||
libgbm1 \
|
||||
libpango-1.0-0 \
|
||||
libasound2 \
|
||||
libpangocairo-1.0-0 \
|
||||
libxshmfence1 \
|
||||
libx11-6 \
|
||||
libatk1.0-0 \
|
||||
libgtk-3-0 \
|
||||
libdrm2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Playwright and required Python dependencies
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install \
|
||||
playwright \
|
||||
beautifulsoup4 \
|
||||
feedgen \
|
||||
pytz
|
||||
|
||||
# Install Playwright browser binaries
|
||||
RUN playwright install
|
||||
|
||||
# Copy the Python script to the container
|
||||
COPY main.py .
|
||||
|
||||
# Set the environment variable to ensure Playwright works in the container
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
|
||||
|
||||
# Command to run the Python script
|
||||
CMD ["python", "main.py"]
|
6
main.py
6
main.py
@ -16,7 +16,7 @@ def scrape_and_generate_rss(url):
|
||||
page = browser.new_page()
|
||||
|
||||
# Set a longer timeout for loading the page
|
||||
page.set_default_navigation_timeout(60000)
|
||||
page.set_default_navigation_timeout(120000)
|
||||
|
||||
# Load the Warhammer Community page
|
||||
page.goto(url, wait_until="networkidle")
|
||||
@ -98,9 +98,11 @@ def scrape_and_generate_rss(url):
|
||||
rss_feed = fg.rss_str(pretty=True)
|
||||
|
||||
# Save the RSS feed to a file
|
||||
with open('warhammer_rss_feed.xml', 'wb') as f:
|
||||
with open('/app/output/warhammer_rss_feed.xml', 'wb') as f:
|
||||
f.write(rss_feed)
|
||||
|
||||
with open('/app/output/page.html','w', encoding='utf-8') as f:
|
||||
f.write(soup.prettify())
|
||||
print('RSS feed generated and saved as warhammer_rss_feed.xml')
|
||||
|
||||
# Run the function
|
||||
|
Loading…
Reference in New Issue
Block a user