You've already forked Curso-lenguaje-python
Add exercises 21 - Solved 2
Signed-off-by: Manuel Vergara <manuel@vergaracarmona.es>
This commit is contained in:
34
30-days-of-python/22_Web_scraping/02_web_scraping.py
Normal file
34
30-days-of-python/22_Web_scraping/02_web_scraping.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
01_web_scraping.py
|
||||
"""
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
|
||||
# 2. Extrae la tabla de esta URL
|
||||
# (https://archive.ics.uci.edu/ml/datasets.php)
|
||||
# y conviértela en un archivo JSON.
|
||||
|
||||
url = 'https://archive.ics.uci.edu/'
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
data = {}
|
||||
|
||||
for section in soup.find_all('section', class_='rounded-box'):
|
||||
|
||||
section_name = section.h1.get_text().strip()
|
||||
section_data = {}
|
||||
|
||||
for div in section.find_all('div', class_='rounded-box'):
|
||||
key = div.find('a', {'class': 'link-hover'}).get_text().strip()
|
||||
value = div.find('p', {'class': 'truncate'}).get_text().strip()
|
||||
section_data[key] = value
|
||||
|
||||
data[section_name] = section_data
|
||||
|
||||
|
||||
with open('uci_datasets.json', 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print("Datos guardados en uci_datasets.json")
|
||||
Reference in New Issue
Block a user