added Dockerfile for container build
This commit is contained in:
		
							
								
								
									
										57
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
				
			|||||||
 | 
					# Use the official Python 3.12.7 Slim image as the base
 | 
				
			||||||
 | 
					FROM python:3.12.7-slim-bullseye
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set the working directory
 | 
				
			||||||
 | 
					WORKDIR /app
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Install system dependencies needed for Playwright and its browsers
 | 
				
			||||||
 | 
					RUN apt-get update && apt-get install -y \
 | 
				
			||||||
 | 
					    bash \
 | 
				
			||||||
 | 
					    build-essential \
 | 
				
			||||||
 | 
					    libffi-dev \
 | 
				
			||||||
 | 
					    git \
 | 
				
			||||||
 | 
					    curl \
 | 
				
			||||||
 | 
					    ca-certificates \
 | 
				
			||||||
 | 
					    wget \
 | 
				
			||||||
 | 
					    gnupg \
 | 
				
			||||||
 | 
					    libnss3 \
 | 
				
			||||||
 | 
					    libatk-bridge2.0-0 \
 | 
				
			||||||
 | 
					    libx11-xcb1 \
 | 
				
			||||||
 | 
					    libxcomposite1 \
 | 
				
			||||||
 | 
					    libxcursor1 \
 | 
				
			||||||
 | 
					    libxdamage1 \
 | 
				
			||||||
 | 
					    libxi6 \
 | 
				
			||||||
 | 
					    libxtst6 \
 | 
				
			||||||
 | 
					    libappindicator3-1 \
 | 
				
			||||||
 | 
					    libxrandr2 \
 | 
				
			||||||
 | 
					    xdg-utils \
 | 
				
			||||||
 | 
					    libgbm1 \
 | 
				
			||||||
 | 
					    libpango-1.0-0 \
 | 
				
			||||||
 | 
					    libasound2 \
 | 
				
			||||||
 | 
					    libpangocairo-1.0-0 \
 | 
				
			||||||
 | 
					    libxshmfence1 \
 | 
				
			||||||
 | 
					    libx11-6 \
 | 
				
			||||||
 | 
					    libatk1.0-0 \
 | 
				
			||||||
 | 
					    libgtk-3-0 \
 | 
				
			||||||
 | 
					    libdrm2 \
 | 
				
			||||||
 | 
					    && rm -rf /var/lib/apt/lists/*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Install Playwright and required Python dependencies
 | 
				
			||||||
 | 
					RUN pip install --upgrade pip && \
 | 
				
			||||||
 | 
					    pip install \
 | 
				
			||||||
 | 
					    playwright \
 | 
				
			||||||
 | 
					    beautifulsoup4 \
 | 
				
			||||||
 | 
					    feedgen \
 | 
				
			||||||
 | 
					    pytz
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Install Playwright browser binaries
 | 
				
			||||||
 | 
					RUN playwright install
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Copy the Python script to the container
 | 
				
			||||||
 | 
					COPY main.py .
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set the environment variable to ensure Playwright works in the container
 | 
				
			||||||
 | 
					ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Command to run the Python script
 | 
				
			||||||
 | 
					CMD ["python", "main.py"]
 | 
				
			||||||
							
								
								
									
										6
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								main.py
									
									
									
									
									
								
							@@ -16,7 +16,7 @@ def scrape_and_generate_rss(url):
 | 
				
			|||||||
        page = browser.new_page()
 | 
					        page = browser.new_page()
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # Set a longer timeout for loading the page
 | 
					        # Set a longer timeout for loading the page
 | 
				
			||||||
        page.set_default_navigation_timeout(60000)
 | 
					        page.set_default_navigation_timeout(120000)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # Load the Warhammer Community page
 | 
					        # Load the Warhammer Community page
 | 
				
			||||||
        page.goto(url, wait_until="networkidle")
 | 
					        page.goto(url, wait_until="networkidle")
 | 
				
			||||||
@@ -98,9 +98,11 @@ def scrape_and_generate_rss(url):
 | 
				
			|||||||
    rss_feed = fg.rss_str(pretty=True)
 | 
					    rss_feed = fg.rss_str(pretty=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Save the RSS feed to a file
 | 
					    # Save the RSS feed to a file
 | 
				
			||||||
    with open('warhammer_rss_feed.xml', 'wb') as f:
 | 
					    with open('/app/output/warhammer_rss_feed.xml', 'wb') as f:
 | 
				
			||||||
        f.write(rss_feed)
 | 
					        f.write(rss_feed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open('/app/output/page.html','w', encoding='utf-8') as f:
 | 
				
			||||||
 | 
					        f.write(soup.prettify())
 | 
				
			||||||
    print('RSS feed generated and saved as warhammer_rss_feed.xml')
 | 
					    print('RSS feed generated and saved as warhammer_rss_feed.xml')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Run the function
 | 
					# Run the function
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user