A really basic Python-based web scraper.
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
import requests import json
# Load the JSON file containing the list of websites with open('websites.json', 'r') as file: websites = json.load(file)
# Function to scrape website and save HTML content to a file def scrape_website(url): try: full_url = f"https://{url}" if not url.startswith("http") else url response = requests.get(full_url) if response.status_code == 200: html_content = response.text with open(f'{url.replace(".", "_")}.html', 'w', encoding='utf-8') as f: f.write(html_content) print(f'Successfully scraped {url}') else: print(f'Failed to scrape {url}. Status code: {response.status_code}') except Exception as e: print(f'Error scraping {url}: {str(e)}')
# Scrape each website in the list for website in websites: scrape_website(website)
|