Machine learning + Elastic Search Get me Similar Movie Title¶
- lets try to develop a architecture given a movie it can tell us similar movie with ELK and Google pre Trained Model
Download Data Set https://www.kaggle.com/shivamb/netflix-shows
pre trained ML model
- use following model
- https://tfhub.dev/google/tf2-preview/nnlm-es-dim50-with-normalization/1
- https://tfhub.dev/google/universal-sentence-encoder-lite/2
About Myself¶
Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python
Website : http://soumilshah.herokuapp.com/
Youtube :https://www.youtube.com/channel/UC_eOodxvwS_H7x2uLQa-svw Currently i work as a Software Engineer at JobTarget
Step 1:¶
- define Imports
try:
import elasticsearch
from elasticsearch import Elasticsearch
import pandas as pd
import json
from ast import literal_eval
from tqdm import tqdm
import datetime
import os
import sys
import os
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from elasticsearch import helpers
print("Loaded .. . . . . . . .")
except Exception as E:
print("Some Modules are Missing {} ".format(e))
Step 2:¶
- read the Dataset
[x for x in os.listdir()]
df = pd.read_csv("netflix_titles.csv")
df.head(1)
We shall Perfrom ML on title¶
- Convert all Title in Vectors
# Load the ML Model
embed = hub.KerasLayer(os.getcwd())
def apply_transform(x):
tem = str(x)
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x = x[0].tolist()
return x
df["ml_vector"] = df["title"].apply(apply_transform)
len(df["ml_vector"].iloc[0])
df.head(2)
We Converted all Title into Vector¶
Define ELK Mappings¶
Settings = {
"settings":{
"number_of_shards":1,
"number_of_replicas":0
},
"mappings":{
"properties":{
"ml_vector":{
"type":"dense_vector",
"dims":20
}
}
}
}
ENDPOINT = "http://localhost:9200/"
es = Elasticsearch(timeout=600,hosts=ENDPOINT)
es.ping()
IndexName = 'netflix_ml'
my = es.indices.create(index=IndexName, ignore=[400,404], body=Settings)
my
Transform Data¶
df.columns
def generator(df2):
for c, line in enumerate(df2):
yield {
'_index': 'netflix_ml',
'_type': '_doc',
'_id': c,
'_source': {
"title":line.get("title", ""),
'director':line.get('director', ""),
'description':line.get('description', ""),
'ml_vector':line.get('ml_vector', "")
}
}
raise StopIteration
How Single Record Looks Like¶
df22 = df.to_dict('records')
next(generator(df22))
Upload¶
try:
res = helpers.bulk(es, generator(df22))
print("Working")
except Exception as e:
pass
Test¶
title = "Krish Trish and Baltiboy: Best Friends Forever"
tem = str(x)
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x = x[0].tolist()
x
Query¶
Query = {
"_source":[
"title"
],
"size":100,
"query":{
"script_score":{
"query":{
"match":{
"title":"Krish Trish and Baltiboy: Best Friends Forever"
}
},
"script":{
"source":"cosineSimilarity(params.query_vector, 'ml_vector') + 1.0",
"params":{
"query_vector":[
-4.9311370849609375,
-0.3049483299255371,
-3.552788734436035,
-0.737078070640564,
1.7232768535614014,
0.8952591419219971,
-3.95497465133667,
-2.081494092941284,
-3.7464582920074463,
3.05448317527771,
2.427945137023926,
2.4168589115142822,
3.5033276081085205,
-2.7748589515686035,
4.356207847595215,
-2.048246383666992,
-4.424686908721924,
3.495077610015869,
4.518932819366455,
-0.9115778207778931
]
}
}
}
}
}
Similar Movies title with Cosine Sim¶
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 11,
"relation" : "eq"
},
"max_score" : 1.6070579,
"hits" : [
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "441",
"_score" : 1.6070579,
"_source" : {
"title" : "The Death and Life of Marsha P. Johnson"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "14",
"_score" : 1.4465705,
"_source" : {
"title" : "Krish Trish and Baltiboy: Best Friends Forever"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "341",
"_score" : 1.4409094,
"_source" : {
"title" : "Love and Shukla"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "18",
"_score" : 1.4052283,
"_source" : {
"title" : "Krish Trish and Baltiboy: The Greatest Trick"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "13",
"_score" : 1.2392325,
"_source" : {
"title" : "Krish Trish and Baltiboy: Battle of Wits"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "40",
"_score" : 1.2374816,
"_source" : {
"title" : "Hell and Back"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "15",
"_score" : 1.1443582,
"_source" : {
"title" : "Krish Trish and Baltiboy: Comics of India"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "86",
"_score" : 1.1186142,
"_source" : {
"title" : "Cultivating the Seas: History and Future of the Full-Cycle Cultured Kindai Tuna"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "189",
"_score" : 1.09444,
"_source" : {
"title" : "Come and Find Me"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "306",
"_score" : 1.0660063,
"_source" : {
"title" : "Just Friends"
}
},
{
"_index" : "netflix_ml",
"_type" : "_doc",
"_id" : "16",
"_score" : 0.9835859,
"_source" : {
"title" : "Krish Trish and Baltiboy: Oversmartness Never Pays"
}
}
]
}
}
No comments:
Post a Comment