A really basic Python-based web scraper.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

25 lines
879 B

  1. import requests
  2. import json
  3. # Load the JSON file containing the list of websites
  4. with open('websites.json', 'r') as file:
  5. websites = json.load(file)
  6. # Function to scrape website and save HTML content to a file
  7. def scrape_website(url):
  8. try:
  9. full_url = f"https://{url}" if not url.startswith("http") else url
  10. response = requests.get(full_url)
  11. if response.status_code == 200:
  12. html_content = response.text
  13. with open(f'{url.replace(".", "_")}.html', 'w', encoding='utf-8') as f:
  14. f.write(html_content)
  15. print(f'Successfully scraped {url}')
  16. else:
  17. print(f'Failed to scrape {url}. Status code: {response.status_code}')
  18. except Exception as e:
  19. print(f'Error scraping {url}: {str(e)}')
  20. # Scrape each website in the list
  21. for website in websites:
  22. scrape_website(website)