added Dockerfile for container build
This commit is contained in:
		
							
								
								
									
										57
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
# Use the official Python 3.12.7 Slim image as the base
 | 
			
		||||
FROM python:3.12.7-slim-bullseye
 | 
			
		||||
 | 
			
		||||
# Set the working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Install system dependencies needed for Playwright and its browsers
 | 
			
		||||
RUN apt-get update && apt-get install -y \
 | 
			
		||||
    bash \
 | 
			
		||||
    build-essential \
 | 
			
		||||
    libffi-dev \
 | 
			
		||||
    git \
 | 
			
		||||
    curl \
 | 
			
		||||
    ca-certificates \
 | 
			
		||||
    wget \
 | 
			
		||||
    gnupg \
 | 
			
		||||
    libnss3 \
 | 
			
		||||
    libatk-bridge2.0-0 \
 | 
			
		||||
    libx11-xcb1 \
 | 
			
		||||
    libxcomposite1 \
 | 
			
		||||
    libxcursor1 \
 | 
			
		||||
    libxdamage1 \
 | 
			
		||||
    libxi6 \
 | 
			
		||||
    libxtst6 \
 | 
			
		||||
    libappindicator3-1 \
 | 
			
		||||
    libxrandr2 \
 | 
			
		||||
    xdg-utils \
 | 
			
		||||
    libgbm1 \
 | 
			
		||||
    libpango-1.0-0 \
 | 
			
		||||
    libasound2 \
 | 
			
		||||
    libpangocairo-1.0-0 \
 | 
			
		||||
    libxshmfence1 \
 | 
			
		||||
    libx11-6 \
 | 
			
		||||
    libatk1.0-0 \
 | 
			
		||||
    libgtk-3-0 \
 | 
			
		||||
    libdrm2 \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Install Playwright and required Python dependencies
 | 
			
		||||
RUN pip install --upgrade pip && \
 | 
			
		||||
    pip install \
 | 
			
		||||
    playwright \
 | 
			
		||||
    beautifulsoup4 \
 | 
			
		||||
    feedgen \
 | 
			
		||||
    pytz
 | 
			
		||||
 | 
			
		||||
# Install Playwright browser binaries
 | 
			
		||||
RUN playwright install
 | 
			
		||||
 | 
			
		||||
# Copy the Python script to the container
 | 
			
		||||
COPY main.py .
 | 
			
		||||
 | 
			
		||||
# Set the environment variable to ensure Playwright works in the container
 | 
			
		||||
ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
 | 
			
		||||
 | 
			
		||||
# Command to run the Python script
 | 
			
		||||
CMD ["python", "main.py"]
 | 
			
		||||
							
								
								
									
										6
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								main.py
									
									
									
									
									
								
							@@ -16,7 +16,7 @@ def scrape_and_generate_rss(url):
 | 
			
		||||
        page = browser.new_page()
 | 
			
		||||
        
 | 
			
		||||
        # Set a longer timeout for loading the page
 | 
			
		||||
        page.set_default_navigation_timeout(60000)
 | 
			
		||||
        page.set_default_navigation_timeout(120000)
 | 
			
		||||
        
 | 
			
		||||
        # Load the Warhammer Community page
 | 
			
		||||
        page.goto(url, wait_until="networkidle")
 | 
			
		||||
@@ -98,9 +98,11 @@ def scrape_and_generate_rss(url):
 | 
			
		||||
    rss_feed = fg.rss_str(pretty=True)
 | 
			
		||||
 | 
			
		||||
    # Save the RSS feed to a file
 | 
			
		||||
    with open('warhammer_rss_feed.xml', 'wb') as f:
 | 
			
		||||
    with open('/app/output/warhammer_rss_feed.xml', 'wb') as f:
 | 
			
		||||
        f.write(rss_feed)
 | 
			
		||||
 | 
			
		||||
    with open('/app/output/page.html','w', encoding='utf-8') as f:
 | 
			
		||||
        f.write(soup.prettify())
 | 
			
		||||
    print('RSS feed generated and saved as warhammer_rss_feed.xml')
 | 
			
		||||
 | 
			
		||||
# Run the function
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user