Ejercicio web scraping webs correlativas

2023-04-12 22:36:48 +02:00
parent 57bf0e8777
commit c681de5c61
1 changed files with 20 additions and 28 deletions
--- a/dia_11/03_web_scraping_03.py
+++ b/dia_11/03_web_scraping_03.py
@@ -8,40 +8,32 @@ Con las librerias beautifulsoup4, lxml y requests
 import bs4
 import requests
 # Variables
 raiz = 'http://books.toscrape.com/catalogue/page-'
 extension = '.html'
 fin_url = True
 page = 0
 lista_titulos = []
 # Bucle para formar url y añadir a una lista
 while fin_url:
-def comprobar_enlaces(http, ext):
+    page += 1
-    """ Comprobar si el enlace existe """
+    page = str(page)
-    # Variables
+    enlace = raiz + page + extension
    enlaces = []
    fin_url = True
    page = 0
-    # Bucle para formar url y añadir a una lista
+    resultado = requests.get(enlace)
-    while fin_url:
+    sopa = bs4.BeautifulSoup(resultado.text, 'lxml')
-        page += 1
+    if resultado:
-        page = str(page)
+        page = int(page)
-        enlace = http + page + ext
+        # Todos los títulos
        titulos = sopa.select('.product_pod a')
        for title in titulos:
            if title.get('title') != None:
                lista_titulos.append(title.get('title'))
-        resultado = requests.get(enlace)
+    else:
-
+        fin_url = False
        if resultado:
            enlaces.append(enlace)
            page = int(page)
            # BORRAR CUANDO ESTE LISTO EL RESTO DEL DOCUMENTO
            break
        else:
            fin_url = False
    return enlaces
 print(comprobar_enlaces(raiz, extension))
 # sopa = bs4.BeautifulSoup(resultado.text, 'lxml')