You've already forked Curso-lenguaje-python
Update elastic stack test
This commit is contained in:
15
catch-all/05_infra_test/04_elastic_stack/app/Dockerfile
Normal file
15
catch-all/05_infra_test/04_elastic_stack/app/Dockerfile
Normal file
@@ -0,0 +1,15 @@
|
||||
# Usa la imagen base de Python
|
||||
FROM python:3.9-slim
|
||||
|
||||
# Establece el directorio de trabajo
|
||||
WORKDIR /app
|
||||
|
||||
# Copia el archivo requirements.txt e instala las dependencias
|
||||
COPY requirements.txt /app/
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copia el código fuente a /app
|
||||
COPY . /app/
|
||||
|
||||
# Comando para ejecutar la aplicación
|
||||
CMD ["python", "main.py"]
|
||||
114
catch-all/05_infra_test/04_elastic_stack/app/main.py
Normal file
114
catch-all/05_infra_test/04_elastic_stack/app/main.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import pandas as pd
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch.helpers import bulk
|
||||
|
||||
# Configura la conexión a Elasticsearch
|
||||
es = Elasticsearch("http://elasticsearch:9200")
|
||||
|
||||
|
||||
def create_index():
|
||||
"""
|
||||
Crea un índice en Elasticsearch con el nombre 'movies' si no existe.
|
||||
Define el mapeo del índice para los campos de los documentos.
|
||||
"""
|
||||
|
||||
# Define el mapeo del índice 'movies'
|
||||
mappings = {
|
||||
"properties": {
|
||||
# Campo para el título de la película
|
||||
"title": {"type": "text", "analyzer": "english"},
|
||||
# Campo para la etnicidad
|
||||
"ethnicity": {"type": "text", "analyzer": "standard"},
|
||||
# Campo para el director
|
||||
"director": {"type": "text", "analyzer": "standard"},
|
||||
# Campo para el elenco
|
||||
"cast": {"type": "text", "analyzer": "standard"},
|
||||
# Campo para el género
|
||||
"genre": {"type": "text", "analyzer": "standard"},
|
||||
# Campo para el argumento de la película
|
||||
"plot": {"type": "text", "analyzer": "english"},
|
||||
# Campo para el año de lanzamiento
|
||||
"year": {"type": "integer"},
|
||||
# Campo para la página de Wikipedia
|
||||
"wiki_page": {"type": "keyword"}
|
||||
}
|
||||
}
|
||||
|
||||
# Verifica si el índice 'movies' ya existe
|
||||
if not es.indices.exists(index="movies"):
|
||||
|
||||
# Crea el índice 'movies' si no existe
|
||||
es.indices.create(index="movies", mappings=mappings)
|
||||
print("\n[+] Índice 'movies' creado.")
|
||||
|
||||
else:
|
||||
|
||||
print("\n[!] El índice 'movies' ya existe.")
|
||||
|
||||
|
||||
def load_data():
|
||||
"""
|
||||
Carga datos desde un archivo CSV a Elasticsearch.
|
||||
"""
|
||||
|
||||
try:
|
||||
|
||||
# Lee el archivo CSV
|
||||
df = pd.read_csv("/app/wiki_movie_plots_deduped.csv", quoting=1)
|
||||
|
||||
# Verifica el número de filas en el DataFrame
|
||||
num_rows = len(df)
|
||||
sample_size = min(5000, num_rows)
|
||||
|
||||
# Elimina filas con valores nulos y toma una muestra
|
||||
df = df.dropna().sample(sample_size, random_state=42).reset_index(drop=True)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"\n[!] Error al leer el archivo CSV: {e}")
|
||||
|
||||
return
|
||||
|
||||
# Prepara los datos para la carga en Elasticsearch
|
||||
bulk_data = [
|
||||
{
|
||||
"_index": "movies", # Nombre del índice en Elasticsearch
|
||||
"_id": i, # ID del documento en Elasticsearch
|
||||
"_source": {
|
||||
"title": row["Title"], # Título de la película
|
||||
"ethnicity": row["Origin/Ethnicity"], # Etnicidad
|
||||
"director": row["Director"], # Director
|
||||
"cast": row["Cast"], # Elenco
|
||||
"genre": row["Genre"], # Género
|
||||
"plot": row["Plot"], # Argumento
|
||||
"year": row["Release Year"], # Año de lanzamiento
|
||||
"wiki_page": row["Wiki Page"], # Página de Wikipedia
|
||||
}
|
||||
}
|
||||
for i, row in df.iterrows() # Itera sobre cada fila del DataFrame
|
||||
]
|
||||
|
||||
try:
|
||||
|
||||
# Carga los datos en Elasticsearch en bloques
|
||||
bulk(es, bulk_data)
|
||||
print("\n[+] Datos cargados en Elasticsearch.")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"\n[!] Error al cargar datos en Elasticsearch: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Función principal que crea el índice y carga los datos.
|
||||
"""
|
||||
|
||||
create_index() # Crea el índice en Elasticsearch
|
||||
load_data() # Carga los datos en Elasticsearch
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ejecuta la función principal si el script se ejecuta directamente
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,3 @@
|
||||
pandas==2.0.1
|
||||
numpy==1.24.2
|
||||
elasticsearch==8.8.0
|
||||
@@ -0,0 +1,7 @@
|
||||
Title,Origin/Ethnicity,Director,Cast,Genre,Plot,Release Year,Wiki Page
|
||||
The Shawshank Redemption,American,Frank Darabont,"Tim Robbins, Morgan Freeman",Drama,"Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.",1994,https://en.wikipedia.org/wiki/The_Shawshank_Redemption
|
||||
The Godfather,American,Francis Ford Coppola,"Marlon Brando, Al Pacino",Crime,"The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.",1972,https://en.wikipedia.org/wiki/The_Godfather
|
||||
The Dark Knight,American,Christopher Nolan,"Christian Bale, Heath Ledger",Action,"When the menace known as the Joker emerges from his mysterious past, he wreaks havoc and chaos on the people of Gotham.",2008,https://en.wikipedia.org/wiki/The_Dark_Knight
|
||||
Pulp Fiction,American,Quentin Tarantino,"John Travolta, Uma Thurman",Crime,"The lives of two mob hitmen, a boxer, a gangster's wife, and a pair of diner bandits intertwine in four tales of violence and redemption.",1994,https://en.wikipedia.org/wiki/Pulp_Fiction
|
||||
The Lord of the Rings: The Return of the King,American,Peter Jackson,"Elijah Wood, Viggo Mortensen",Fantasy,"The final battle for Middle-earth begins. The forces of good and evil are drawn into a confrontation and the outcome will determine the fate of the world.",2003,https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Return_of_the_King
|
||||
Inception,American,Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt",Sci-Fi,"A thief who enters the dreams of others to steal secrets from their subconscious is given the inverse task of planting an idea into the mind of a CEO.",2010,https://en.wikipedia.org/wiki/Inception
|
||||
|
Reference in New Issue
Block a user