Spaces:
Running
Running
| from openai import OpenAI | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| class Embed_Eval: | |
| def __init__(self, model="gpt"): | |
| self.client = OpenAI() | |
| self.model = model | |
| self.BERT_embed_model = "" | |
| if "pubmedbert" in self.model: | |
| self.BERT_embed_model = SentenceTransformer("neuml/pubmedbert-base-embeddings") | |
| def get_embedding(self, text): | |
| text = text.replace("\n", " ") | |
| if "gpt" in self.model: | |
| return self.client.embeddings.create(input = [text], model="text-embedding-3-large").data[0].embedding | |
| if "pubmedbert" in self.model: | |
| embeddings = self.BERT_embed_model.encode(text) | |
| return embeddings | |
| def compute(self, predictions, references): | |
| ref_embeddings = [self.get_embedding(sent) for sent in references] | |
| pred_embeddings = [self.get_embedding(sent) for sent in predictions] | |
| # Compute pairwise cosine similarities | |
| similarity_matrix = cosine_similarity(ref_embeddings, pred_embeddings) | |
| # Get maximum similarity for each token in ref | |
| ref_max_similarities = np.max(similarity_matrix, axis=1) | |
| # Get maximum similarity for each token in pred | |
| pred_max_similarities = np.max(similarity_matrix, axis=0) | |
| # Compute precision, recall, and F1 score | |
| precision = np.mean(pred_max_similarities) | |
| recall = np.mean(ref_max_similarities) | |
| f1_score = 2 * (precision * recall) / (precision + recall + 1e-8) | |
| return {'precision': precision, 'recall': recall, 'f1_score': f1_score} | |
| if __name__ == "__main__": | |
| gpt = Embed_Eval(model="gpt") | |
| pubmedbert = Embed_Eval(model="pubmedbert") | |
| print(gpt.compute(predictions="hello", references="hi")) | |
| print(pubmedbert.compute(predictions="hello", references="hi")) | |