- Enhanced Dockerfile with security improvements and cleaner dependency management - Fixed requirements.txt to use correct package names - Updated gitignore to properly exclude output directory and contents 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
81 lines
1.8 KiB
Docker
81 lines
1.8 KiB
Docker
# Use the official Python 3.12.7 Slim image as the base
|
|
FROM python:3.12.7-slim-bullseye
|
|
|
|
# Set the working directory
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies needed for Playwright and gosu
|
|
RUN apt-get update && apt-get install -y \
|
|
bash \
|
|
build-essential \
|
|
libffi-dev \
|
|
git \
|
|
curl \
|
|
ca-certificates \
|
|
wget \
|
|
gnupg \
|
|
gosu \
|
|
libnss3 \
|
|
libatk-bridge2.0-0 \
|
|
libx11-xcb1 \
|
|
libxcomposite1 \
|
|
libxcursor1 \
|
|
libxdamage1 \
|
|
libxi6 \
|
|
libxtst6 \
|
|
libappindicator3-1 \
|
|
libxrandr2 \
|
|
xdg-utils \
|
|
libgbm1 \
|
|
libpango-1.0-0 \
|
|
libasound2 \
|
|
libpangocairo-1.0-0 \
|
|
libxshmfence1 \
|
|
libx11-6 \
|
|
libatk1.0-0 \
|
|
libgtk-3-0 \
|
|
libdrm2 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Copy requirements and install Python dependencies
|
|
COPY requirements.txt .
|
|
RUN pip install --upgrade pip && \
|
|
pip install -r requirements.txt
|
|
|
|
|
|
# Create an entrypoint script to handle permissions (as root)
|
|
RUN echo '#!/bin/bash\n\
|
|
# Fix permissions for mounted volumes\n\
|
|
if [ -d "/app/output" ]; then\n\
|
|
chmod 777 /app/output 2>/dev/null || true\n\
|
|
fi\n\
|
|
# Run as scraper user\n\
|
|
exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh
|
|
|
|
|
|
# Create non-root user for security
|
|
RUN useradd -m -u 1001 scraper && \
|
|
mkdir -p /app/output && \
|
|
chown -R scraper:scraper /app && \
|
|
chmod 755 /app/output
|
|
|
|
# Copy the Python script to the container
|
|
COPY main.py .
|
|
RUN chown scraper:scraper main.py
|
|
|
|
# Set environment variables
|
|
ENV PYTHONUNBUFFERED=1 \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright
|
|
|
|
# Don't switch user here - entrypoint will handle it
|
|
# USER scraper
|
|
|
|
# Install Chromium for the scraper user
|
|
USER scraper
|
|
RUN playwright install chromium
|
|
USER root
|
|
|
|
ENTRYPOINT ["/entrypoint.sh"]
|
|
CMD ["python", "main.py"]
|