Programa dia 11 finalizado

2023-04-14 08:07:58 +02:00
parent c47a45839e
commit d64aeec0fe
2 changed files with 51 additions and 41 deletions
--- a/dia_11/03_web_scraping_03.py
+++ b/dia_11/03_web_scraping_03.py
@@ -1,41 +0,0 @@
 """
 Web scraping básico
 Con las librerias beautifulsoup4, lxml y requests
 """
 import bs4
 import requests
 # Variables
 url_base = 'http://books.toscrape.com/catalogue/page-{}.html'
 fin_url = True
 page = 0
 lista_titulos = []
 # Bucle para formar url y añadir a una lista
 while fin_url:
    page += 1
    page = str(page)
    enlace = url_base.format(page)
    resultado = requests.get(enlace)
    sopa = bs4.BeautifulSoup(resultado.text, 'lxml')
    if resultado:
        page = int(page)
        # Todos los títulos
        titulos = sopa.select('.product_pod a')
        for title in titulos:
            if title.get('title') != None:
                lista_titulos.append(title.get('title'))
    else:
        fin_url = False
 print(lista_titulos)
--- a/dia_11/programa_web_scraping.py
+++ b/dia_11/programa_web_scraping.py
@@ -0,0 +1,51 @@
 """
 Web scraping 
 nivel intermedio
 Con las librerias beautifulsoup4, lxml y requests
 """
 import bs4
 import requests
 # Variables
 URL_BASE = 'http://books.toscrape.com/catalogue/page-{}.html'
 FIN_URL = True
 PAGE = 0
 lista_titulos = []
 print(f'\nLIBROS CON 4 O 5 ESTRELLAS\n')
 # Bucle para formar url y añadir a una lista
 while FIN_URL:
    PAGE += 1
    ENLACE = URL_BASE.format(PAGE)
    resultado = requests.get(ENLACE)
    sopa = bs4.BeautifulSoup(resultado.text, 'lxml')
    # Si existe la url
    if resultado:
        # Todos los títulos
        libros = sopa.select('.product_pod')
        # Mostramos la página y el enlace
        print(f'\nNÚMERO DE PÁGINA: {PAGE}\nENLACE: {ENLACE}\nLIBROS:')
        # Seleccionamos uno a uno cada libro
        for libro in libros:
            # Seleccionamos los libros que tengan 4 o 5 estrellas
            if libro.select('.star-rating.Four') or libro.select('.star-rating.Five'):
                # Seleccionamos el texto del title del elemento
                titulo_libro = libro.select('a')[1]['title']
                # Mostramos el libro
                print(f'\t- \"{titulo_libro}\"')
    # Si no existe la url
    else:
        print("")
        FIN_URL = False