added Dockerfile for container build
This commit is contained in:
parent
3d44106f02
commit
eecee074e2
57
Dockerfile
Normal file
57
Dockerfile
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# Use the official Python 3.12.7 Slim image as the base
|
||||||
|
FROM python:3.12.7-slim-bullseye
|
||||||
|
|
||||||
|
# Set the working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies needed for Playwright and its browsers
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
bash \
|
||||||
|
build-essential \
|
||||||
|
libffi-dev \
|
||||||
|
git \
|
||||||
|
curl \
|
||||||
|
ca-certificates \
|
||||||
|
wget \
|
||||||
|
gnupg \
|
||||||
|
libnss3 \
|
||||||
|
libatk-bridge2.0-0 \
|
||||||
|
libx11-xcb1 \
|
||||||
|
libxcomposite1 \
|
||||||
|
libxcursor1 \
|
||||||
|
libxdamage1 \
|
||||||
|
libxi6 \
|
||||||
|
libxtst6 \
|
||||||
|
libappindicator3-1 \
|
||||||
|
libxrandr2 \
|
||||||
|
xdg-utils \
|
||||||
|
libgbm1 \
|
||||||
|
libpango-1.0-0 \
|
||||||
|
libasound2 \
|
||||||
|
libpangocairo-1.0-0 \
|
||||||
|
libxshmfence1 \
|
||||||
|
libx11-6 \
|
||||||
|
libatk1.0-0 \
|
||||||
|
libgtk-3-0 \
|
||||||
|
libdrm2 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Playwright and required Python dependencies
|
||||||
|
RUN pip install --upgrade pip && \
|
||||||
|
pip install \
|
||||||
|
playwright \
|
||||||
|
beautifulsoup4 \
|
||||||
|
feedgen \
|
||||||
|
pytz
|
||||||
|
|
||||||
|
# Install Playwright browser binaries
|
||||||
|
RUN playwright install
|
||||||
|
|
||||||
|
# Copy the Python script to the container
|
||||||
|
COPY main.py .
|
||||||
|
|
||||||
|
# Set the environment variable to ensure Playwright works in the container
|
||||||
|
ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
|
||||||
|
|
||||||
|
# Command to run the Python script
|
||||||
|
CMD ["python", "main.py"]
|
6
main.py
6
main.py
@ -16,7 +16,7 @@ def scrape_and_generate_rss(url):
|
|||||||
page = browser.new_page()
|
page = browser.new_page()
|
||||||
|
|
||||||
# Set a longer timeout for loading the page
|
# Set a longer timeout for loading the page
|
||||||
page.set_default_navigation_timeout(60000)
|
page.set_default_navigation_timeout(120000)
|
||||||
|
|
||||||
# Load the Warhammer Community page
|
# Load the Warhammer Community page
|
||||||
page.goto(url, wait_until="networkidle")
|
page.goto(url, wait_until="networkidle")
|
||||||
@ -98,9 +98,11 @@ def scrape_and_generate_rss(url):
|
|||||||
rss_feed = fg.rss_str(pretty=True)
|
rss_feed = fg.rss_str(pretty=True)
|
||||||
|
|
||||||
# Save the RSS feed to a file
|
# Save the RSS feed to a file
|
||||||
with open('warhammer_rss_feed.xml', 'wb') as f:
|
with open('/app/output/warhammer_rss_feed.xml', 'wb') as f:
|
||||||
f.write(rss_feed)
|
f.write(rss_feed)
|
||||||
|
|
||||||
|
with open('/app/output/page.html','w', encoding='utf-8') as f:
|
||||||
|
f.write(soup.prettify())
|
||||||
print('RSS feed generated and saved as warhammer_rss_feed.xml')
|
print('RSS feed generated and saved as warhammer_rss_feed.xml')
|
||||||
|
|
||||||
# Run the function
|
# Run the function
|
||||||
|
Loading…
Reference in New Issue
Block a user