A really basic Python-based web scraper.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

25 lines
879 B

import requests
import json
# Load the JSON file containing the list of websites
with open('websites.json', 'r') as file:
websites = json.load(file)
# Function to scrape website and save HTML content to a file
def scrape_website(url):
try:
full_url = f"https://{url}" if not url.startswith("http") else url
response = requests.get(full_url)
if response.status_code == 200:
html_content = response.text
with open(f'{url.replace(".", "_")}.html', 'w', encoding='utf-8') as f:
f.write(html_content)
print(f'Successfully scraped {url}')
else:
print(f'Failed to scrape {url}. Status code: {response.status_code}')
except Exception as e:
print(f'Error scraping {url}: {str(e)}')
# Scrape each website in the list
for website in websites:
scrape_website(website)