Improve Docker configuration and gitignore
- Enhanced Dockerfile with security improvements and cleaner dependency management - Fixed requirements.txt to use correct package names - Updated gitignore to properly exclude output directory and contents 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
70540bacf0
commit
e0647325ff
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
*.xml
|
*.xml
|
||||||
.python-version
|
.python-version
|
||||||
output/
|
output/
|
||||||
|
output/*
|
24
Dockerfile
24
Dockerfile
@ -4,7 +4,7 @@ FROM python:3.12.7-slim-bullseye
|
|||||||
# Set the working directory
|
# Set the working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install system dependencies needed for Playwright and its browsers
|
# Install system dependencies needed for Playwright and gosu
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
bash \
|
bash \
|
||||||
build-essential \
|
build-essential \
|
||||||
@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \
|
|||||||
ca-certificates \
|
ca-certificates \
|
||||||
wget \
|
wget \
|
||||||
gnupg \
|
gnupg \
|
||||||
|
gosu \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
libatk-bridge2.0-0 \
|
libatk-bridge2.0-0 \
|
||||||
libx11-xcb1 \
|
libx11-xcb1 \
|
||||||
@ -36,16 +37,11 @@ RUN apt-get update && apt-get install -y \
|
|||||||
libdrm2 \
|
libdrm2 \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Playwright and required Python dependencies
|
# Copy requirements and install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
RUN pip install --upgrade pip && \
|
RUN pip install --upgrade pip && \
|
||||||
pip install \
|
pip install -r requirements.txt
|
||||||
playwright \
|
|
||||||
beautifulsoup4 \
|
|
||||||
feedgen \
|
|
||||||
pytz
|
|
||||||
|
|
||||||
# Install only Chromium (faster than all browsers)
|
|
||||||
RUN playwright install chromium
|
|
||||||
|
|
||||||
# Create an entrypoint script to handle permissions (as root)
|
# Create an entrypoint script to handle permissions (as root)
|
||||||
RUN echo '#!/bin/bash\n\
|
RUN echo '#!/bin/bash\n\
|
||||||
@ -56,8 +52,6 @@ fi\n\
|
|||||||
# Run as scraper user\n\
|
# Run as scraper user\n\
|
||||||
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
|
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
|
||||||
|
|
||||||
# Install gosu for user switching
|
|
||||||
RUN apt-get update && apt-get install -y gosu && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Create non-root user for security
|
# Create non-root user for security
|
||||||
RUN useradd -m -u 1001 scraper && \
|
RUN useradd -m -u 1001 scraper && \
|
||||||
@ -69,13 +63,15 @@ RUN useradd -m -u 1001 scraper && \
|
|||||||
COPY main.py .
|
COPY main.py .
|
||||||
RUN chown scraper:scraper main.py
|
RUN chown scraper:scraper main.py
|
||||||
|
|
||||||
# Set the environment variable to ensure Playwright works in the container
|
# Set environment variables
|
||||||
ENV PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
|
||||||
|
|
||||||
# Don't switch user here - entrypoint will handle it
|
# Don't switch user here - entrypoint will handle it
|
||||||
# USER scraper
|
# USER scraper
|
||||||
|
|
||||||
# Install Chromium for the scraper user (only what we need)
|
# Install Chromium for the scraper user
|
||||||
USER scraper
|
USER scraper
|
||||||
RUN playwright install chromium
|
RUN playwright install chromium
|
||||||
USER root
|
USER root
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
requests
|
requests
|
||||||
bs4
|
beautifulsoup4
|
||||||
feedgen
|
feedgen
|
||||||
playwright
|
playwright
|
||||||
pytz
|
pytz
|
Loading…
x
Reference in New Issue
Block a user