# Use the official Python 3.12.7 Slim image as the base FROM python:3.12.7-slim-bullseye # Set the working directory WORKDIR /app # Install system dependencies needed for Playwright and gosu RUN apt-get update && apt-get install -y \ bash \ build-essential \ libffi-dev \ git \ curl \ ca-certificates \ wget \ gnupg \ gosu \ libnss3 \ libatk-bridge2.0-0 \ libx11-xcb1 \ libxcomposite1 \ libxcursor1 \ libxdamage1 \ libxi6 \ libxtst6 \ libappindicator3-1 \ libxrandr2 \ xdg-utils \ libgbm1 \ libpango-1.0-0 \ libasound2 \ libpangocairo-1.0-0 \ libxshmfence1 \ libx11-6 \ libatk1.0-0 \ libgtk-3-0 \ libdrm2 \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install Python dependencies COPY requirements.txt . RUN pip install --upgrade pip && \ pip install -r requirements.txt # Create an entrypoint script to handle permissions (as root) RUN echo '#!/bin/bash\n\ # Fix permissions for mounted volumes\n\ if [ -d "/app/output" ]; then\n\ chmod 777 /app/output 2>/dev/null || true\n\ fi\n\ # Run as scraper user\n\ exec gosu scraper "$@"' > /entrypoint.sh && chmod +x /entrypoint.sh # Create non-root user for security RUN useradd -m -u 1001 scraper && \ mkdir -p /app/output && \ chown -R scraper:scraper /app && \ chmod 755 /app/output # Copy the application code to the container COPY main.py . COPY src/ src/ RUN chown -R scraper:scraper main.py src/ # Set environment variables ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PLAYWRIGHT_BROWSERS_PATH=/home/scraper/.cache/ms-playwright # Don't switch user here - entrypoint will handle it # USER scraper # Install Chromium for the scraper user USER scraper RUN playwright install chromium USER root ENTRYPOINT ["/entrypoint.sh"] CMD ["python", "main.py"]