Ejercicio web scraping webs correlativas

2023-04-12 22:36:48 +02:00
parent 57bf0e8777
commit c681de5c61
1 changed files with 20 additions and 28 deletions
--- a/dia_11/03_web_scraping_03.py
+++ b/dia_11/03_web_scraping_03.py
@@ -8,40 +8,32 @@ Con las librerias beautifulsoup4, lxml y requests
 import bs4
 import requests
 # Variables
 raiz = 'http://books.toscrape.com/catalogue/page-'
 extension = '.html'
 fin_url = True
 page = 0
 lista_titulos = []
-
+# Bucle para formar url y añadir a una lista
-def comprobar_enlaces(http, ext):
+while fin_url:
    """ Comprobar si el enlace existe """
    # Variables
    enlaces = []
    fin_url = True
    page = 0
    # Bucle para formar url y añadir a una lista
    while fin_url:
    page += 1
    page = str(page)
-        enlace = http + page + ext
+    enlace = raiz + page + extension
    resultado = requests.get(enlace)
    sopa = bs4.BeautifulSoup(resultado.text, 'lxml')
    if resultado:
            enlaces.append(enlace)
        page = int(page)
-            # BORRAR CUANDO ESTE LISTO EL RESTO DEL DOCUMENTO
+
-            break
+        # Todos los títulos
        titulos = sopa.select('.product_pod a')
        for title in titulos:
            if title.get('title') != None:
                lista_titulos.append(title.get('title'))
    else:
        fin_url = False
    return enlaces
 print(comprobar_enlaces(raiz, extension))
 # sopa = bs4.BeautifulSoup(resultado.text, 'lxml')