Comparing Different google pre trained Model for Cosine Similarity¶
- as you know word embedding plays a very important role we should always select the model after doing the research i took two popular pre trained model from google tensorflow hub and was curious to know is there any difference or change in similarity if i use different models
- word1 ="senior Software developer"
- word 2 = "Software engineer"
Model 1:¶
universal-sentence-encoder-lite 20 DIM¶
In [14]:
try:
import elasticsearch
from elasticsearch import Elasticsearch
import pandas as pd
import json
from ast import literal_eval
from tqdm import tqdm
import datetime
import os
import sys
import os
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from math import sqrt
print("Loaded .. . . . . . . .")
except Exception as E:
print("Some Modules are Missing {} ".format(e))
embed = hub.KerasLayer(os.getcwd())
#url = "https://tfhub.dev/google/universal-sentence-encoder-lite/2"
#embed = hub.KerasLayer(url)
tem = "Software engineer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x1 = x[0].tolist()
tem = "senior Software developer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x2 = x[0].tolist()
from math import sqrt
def cosineSim(a1,a2):
sum = 0
suma1 = 0
sumb1 = 0
for i,j in zip(a1, a2):
suma1 += i * i
sumb1 += j*j
sum += i*j
cosine_sim = sum / ((sqrt(suma1))*(sqrt(sumb1)))
return cosine_sim
print(cosineSim(x1,x2))
tf2-preview/nnlm-en-dim50¶
- Collection of feed-forward neural network language token embeddings in SavedModel 2.0 format.
- we shall use the same code but i have already converted into Vector to save time and just computing similarity
In [17]:
try:
import elasticsearch
from elasticsearch import Elasticsearch
import pandas as pd
import json
from ast import literal_eval
from tqdm import tqdm
import datetime
import os
import sys
import os
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from math import sqrt
print("Loaded .. . . . . . . .")
except Exception as E:
print("Some Modules are Missing {} ".format(e))
#embed = hub.KerasLayer(os.getcwd())
url = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1"
embed = hub.KerasLayer(url)
tem = "Software engineer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x1 = x[0].tolist()
tem = "senior Software developer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x2 = x[0].tolist()
from math import sqrt
def cosineSim(a1,a2):
sum = 0
suma1 = 0
sumb1 = 0
for i,j in zip(a1, a2):
suma1 += i * i
sumb1 += j*j
sum += i*j
cosine_sim = sum / ((sqrt(suma1))*(sqrt(sumb1)))
return cosine_sim
print(cosineSim(x1,x2))
ELMO¶
- Embeddings from a language model trained on the 1 Billion Word Benchmark.
In [20]:
try:
import elasticsearch
from elasticsearch import Elasticsearch
import pandas as pd
import json
from ast import literal_eval
from tqdm import tqdm
import datetime
import os
import sys
import os
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from math import sqrt
print("Loaded .. . . . . . . .")
except Exception as E:
print("Some Modules are Missing {} ".format(e))
embed = hub.KerasLayer(os.getcwd())
#url = "https://tfhub.dev/google/elmo/3"
#embed = hub.KerasLayer(url)
tem = "Software engineer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x1 = x[0].tolist()
tem = "senior Software developer"
x = tf.constant([tem])
embeddings = embed(x)
x = np.asarray(embeddings)
x2 = x[0].tolist()
from math import sqrt
def cosineSim(a1,a2):
sum = 0
suma1 = 0
sumb1 = 0
for i,j in zip(a1, a2):
suma1 += i * i
sumb1 += j*j
sum += i*j
cosine_sim = sum / ((sqrt(suma1))*(sqrt(sumb1)))
return cosine_sim
print(cosineSim(x1,x2))
No comments:
Post a Comment