Update elastic stack test

This commit is contained in:
2024-08-07 19:56:05 +02:00
parent 89959d29ee
commit 4756d756ac
8 changed files with 423 additions and 7 deletions

View File

@@ -0,0 +1,15 @@
# Usa la imagen base de Python
FROM python:3.9-slim
# Establece el directorio de trabajo
WORKDIR /app
# Copia el archivo requirements.txt e instala las dependencias
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
# Copia el código fuente a /app
COPY . /app/
# Comando para ejecutar la aplicación
CMD ["python", "main.py"]

View File

@@ -0,0 +1,114 @@
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
# Configura la conexión a Elasticsearch
es = Elasticsearch("http://elasticsearch:9200")
def create_index():
"""
Crea un índice en Elasticsearch con el nombre 'movies' si no existe.
Define el mapeo del índice para los campos de los documentos.
"""
# Define el mapeo del índice 'movies'
mappings = {
"properties": {
# Campo para el título de la película
"title": {"type": "text", "analyzer": "english"},
# Campo para la etnicidad
"ethnicity": {"type": "text", "analyzer": "standard"},
# Campo para el director
"director": {"type": "text", "analyzer": "standard"},
# Campo para el elenco
"cast": {"type": "text", "analyzer": "standard"},
# Campo para el género
"genre": {"type": "text", "analyzer": "standard"},
# Campo para el argumento de la película
"plot": {"type": "text", "analyzer": "english"},
# Campo para el año de lanzamiento
"year": {"type": "integer"},
# Campo para la página de Wikipedia
"wiki_page": {"type": "keyword"}
}
}
# Verifica si el índice 'movies' ya existe
if not es.indices.exists(index="movies"):
# Crea el índice 'movies' si no existe
es.indices.create(index="movies", mappings=mappings)
print("\n[+] Índice 'movies' creado.")
else:
print("\n[!] El índice 'movies' ya existe.")
def load_data():
"""
Carga datos desde un archivo CSV a Elasticsearch.
"""
try:
# Lee el archivo CSV
df = pd.read_csv("/app/wiki_movie_plots_deduped.csv", quoting=1)
# Verifica el número de filas en el DataFrame
num_rows = len(df)
sample_size = min(5000, num_rows)
# Elimina filas con valores nulos y toma una muestra
df = df.dropna().sample(sample_size, random_state=42).reset_index(drop=True)
except Exception as e:
print(f"\n[!] Error al leer el archivo CSV: {e}")
return
# Prepara los datos para la carga en Elasticsearch
bulk_data = [
{
"_index": "movies", # Nombre del índice en Elasticsearch
"_id": i, # ID del documento en Elasticsearch
"_source": {
"title": row["Title"], # Título de la película
"ethnicity": row["Origin/Ethnicity"], # Etnicidad
"director": row["Director"], # Director
"cast": row["Cast"], # Elenco
"genre": row["Genre"], # Género
"plot": row["Plot"], # Argumento
"year": row["Release Year"], # Año de lanzamiento
"wiki_page": row["Wiki Page"], # Página de Wikipedia
}
}
for i, row in df.iterrows() # Itera sobre cada fila del DataFrame
]
try:
# Carga los datos en Elasticsearch en bloques
bulk(es, bulk_data)
print("\n[+] Datos cargados en Elasticsearch.")
except Exception as e:
print(f"\n[!] Error al cargar datos en Elasticsearch: {e}")
def main():
"""
Función principal que crea el índice y carga los datos.
"""
create_index() # Crea el índice en Elasticsearch
load_data() # Carga los datos en Elasticsearch
if __name__ == "__main__":
# Ejecuta la función principal si el script se ejecuta directamente
main()

View File

@@ -0,0 +1,3 @@
pandas==2.0.1
numpy==1.24.2
elasticsearch==8.8.0

View File

@@ -0,0 +1,7 @@
Title,Origin/Ethnicity,Director,Cast,Genre,Plot,Release Year,Wiki Page
The Shawshank Redemption,American,Frank Darabont,"Tim Robbins, Morgan Freeman",Drama,"Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.",1994,https://en.wikipedia.org/wiki/The_Shawshank_Redemption
The Godfather,American,Francis Ford Coppola,"Marlon Brando, Al Pacino",Crime,"The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.",1972,https://en.wikipedia.org/wiki/The_Godfather
The Dark Knight,American,Christopher Nolan,"Christian Bale, Heath Ledger",Action,"When the menace known as the Joker emerges from his mysterious past, he wreaks havoc and chaos on the people of Gotham.",2008,https://en.wikipedia.org/wiki/The_Dark_Knight
Pulp Fiction,American,Quentin Tarantino,"John Travolta, Uma Thurman",Crime,"The lives of two mob hitmen, a boxer, a gangster's wife, and a pair of diner bandits intertwine in four tales of violence and redemption.",1994,https://en.wikipedia.org/wiki/Pulp_Fiction
The Lord of the Rings: The Return of the King,American,Peter Jackson,"Elijah Wood, Viggo Mortensen",Fantasy,"The final battle for Middle-earth begins. The forces of good and evil are drawn into a confrontation and the outcome will determine the fate of the world.",2003,https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Return_of_the_King
Inception,American,Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt",Sci-Fi,"A thief who enters the dreams of others to steal secrets from their subconscious is given the inverse task of planting an idea into the mind of a CEO.",2010,https://en.wikipedia.org/wiki/Inception
1 Title Origin/Ethnicity Director Cast Genre Plot Release Year Wiki Page
2 The Shawshank Redemption American Frank Darabont Tim Robbins, Morgan Freeman Drama Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency. 1994 https://en.wikipedia.org/wiki/The_Shawshank_Redemption
3 The Godfather American Francis Ford Coppola Marlon Brando, Al Pacino Crime The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son. 1972 https://en.wikipedia.org/wiki/The_Godfather
4 The Dark Knight American Christopher Nolan Christian Bale, Heath Ledger Action When the menace known as the Joker emerges from his mysterious past, he wreaks havoc and chaos on the people of Gotham. 2008 https://en.wikipedia.org/wiki/The_Dark_Knight
5 Pulp Fiction American Quentin Tarantino John Travolta, Uma Thurman Crime The lives of two mob hitmen, a boxer, a gangster's wife, and a pair of diner bandits intertwine in four tales of violence and redemption. 1994 https://en.wikipedia.org/wiki/Pulp_Fiction
6 The Lord of the Rings: The Return of the King American Peter Jackson Elijah Wood, Viggo Mortensen Fantasy The final battle for Middle-earth begins. The forces of good and evil are drawn into a confrontation and the outcome will determine the fate of the world. 2003 https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Return_of_the_King
7 Inception American Christopher Nolan Leonardo DiCaprio, Joseph Gordon-Levitt Sci-Fi A thief who enters the dreams of others to steal secrets from their subconscious is given the inverse task of planting an idea into the mind of a CEO. 2010 https://en.wikipedia.org/wiki/Inception