Skip to content

Commit

Permalink
evaluating several spanish embedding models
Browse files Browse the repository at this point in the history
  • Loading branch information
bukosabino committed Feb 8, 2024
1 parent 6bbe0a4 commit 910caca
Show file tree
Hide file tree
Showing 44 changed files with 1,036 additions and 0 deletions.
21 changes: 21 additions & 0 deletions evaluation/embeddings_model/mteb_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from mteb import MTEB
from sentence_transformers import SentenceTransformer


# https://github.com/embeddings-benchmark/mteb


# TODO: write results on model cards huggingface
# Define the sentence-transformers model name
# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn"
# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es"
# model_name = "PlanTL-GOB-ES/roberta-base-bne"

# model_name = "hiiamsid/sentence_similarity_spanish_es"
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
# model_name = "intfloat/multilingual-e5-large"


model = SentenceTransformer(model_name)
evaluation = MTEB(task_langs=["es"])
results = evaluation.run(model, output_folder=f"results/{model_name}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
"mteb_dataset_name": "AmazonReviewsClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.34144,
"accuracy_stderr": 0.016988419585117386,
"f1": 0.33636678117905106,
"f1_stderr": 0.013519205351623978,
"main_score": 0.34144
},
"evaluation_time": 100.22
},
"validation": {
"es": {
"accuracy": 0.33520000000000005,
"accuracy_stderr": 0.013966817819388924,
"f1": 0.330814637640804,
"f1_stderr": 0.010710885426829122,
"main_score": 0.33520000000000005
},
"evaluation_time": 100.33
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf",
"mteb_dataset_name": "MTOPDomainClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.7890593729152768,
"accuracy_stderr": 0.018794435810413904,
"f1": 0.7876848449116528,
"f1_stderr": 0.017456518212651397,
"main_score": 0.7890593729152768
},
"evaluation_time": 28.83
},
"validation": {
"es": {
"accuracy": 0.7865749836280289,
"accuracy_stderr": 0.02095295080769619,
"f1": 0.7891430481808399,
"f1_stderr": 0.019727057188543985,
"main_score": 0.7865749836280289
},
"evaluation_time": 19.34
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba",
"mteb_dataset_name": "MTOPIntentClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.6316877918612408,
"accuracy_stderr": 0.01618058056575705,
"f1": 0.40606514595674226,
"f1_stderr": 0.008349574016439362,
"main_score": 0.6316877918612408
},
"evaluation_time": 89.32
},
"validation": {
"es": {
"accuracy": 0.644204322200393,
"accuracy_stderr": 0.018306294524708992,
"f1": 0.4008335277417391,
"f1_stderr": 0.015864741206238393,
"main_score": 0.644204322200393
},
"evaluation_time": 80.09
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7",
"mteb_dataset_name": "MassiveIntentClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.5029926025554808,
"accuracy_stderr": 0.0127299316323427,
"f1": 0.4743998239333547,
"f1_stderr": 0.014234461765030411,
"main_score": 0.5029926025554808
},
"evaluation_time": 63.7
},
"validation": {
"es": {
"accuracy": 0.5098868666994589,
"accuracy_stderr": 0.01210714080560374,
"f1": 0.4804865677543891,
"f1_stderr": 0.013410987834272919,
"main_score": 0.5098868666994589
},
"evaluation_time": 55.74
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634",
"mteb_dataset_name": "MassiveScenarioClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.5482178883658372,
"accuracy_stderr": 0.019817617388854396,
"f1": 0.5326008604217796,
"f1_stderr": 0.01830565953868972,
"main_score": 0.5482178883658372
},
"evaluation_time": 28.95
},
"validation": {
"es": {
"accuracy": 0.536694540088539,
"accuracy_stderr": 0.028431954052779713,
"f1": 0.526995563025894,
"f1_stderr": 0.027563486339162634,
"main_score": 0.536694540088539
},
"evaluation_time": 23.03
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d",
"mteb_dataset_name": "STS17",
"mteb_version": "1.1.1",
"test": {
"es-es": {
"cos_sim": {
"pearson": 0.6571868805745334,
"spearman": 0.7074876873146115
},
"euclidean": {
"pearson": 0.6784231601234936,
"spearman": 0.7060150120314596
},
"manhattan": {
"pearson": 0.6838790384087841,
"spearman": 0.712420763445709
}
},
"evaluation_time": 3.11
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80",
"mteb_dataset_name": "STS22",
"mteb_version": "1.1.1",
"test": {
"es": {
"cos_sim": {
"pearson": 0.14297354292597228,
"spearman": 0.48236753079813854
},
"euclidean": {
"pearson": 0.28919439095575666,
"spearman": 0.4845776660733576
},
"manhattan": {
"pearson": 0.29897180519935346,
"spearman": 0.48815426704531506
}
},
"evaluation_time": 74.71
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
"mteb_dataset_name": "AmazonReviewsClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.28186,
"accuracy_stderr": 0.017661041871871543,
"f1": 0.2806067493281096,
"f1_stderr": 0.01775108480910862,
"main_score": 0.28186
},
"evaluation_time": 93.01
},
"validation": {
"es": {
"accuracy": 0.28406,
"accuracy_stderr": 0.014107884320478386,
"f1": 0.28246198081890056,
"f1_stderr": 0.014232525443229387,
"main_score": 0.28406
},
"evaluation_time": 91.96
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf",
"mteb_dataset_name": "MTOPDomainClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.7927284856571047,
"accuracy_stderr": 0.014499662991338199,
"f1": 0.7831712189404422,
"f1_stderr": 0.011730917631352086,
"main_score": 0.7927284856571047
},
"evaluation_time": 26.76
},
"validation": {
"es": {
"accuracy": 0.8023575638506877,
"accuracy_stderr": 0.014858102808007794,
"f1": 0.8024051617881145,
"f1_stderr": 0.01452340761132967,
"main_score": 0.8023575638506877
},
"evaluation_time": 17.76
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba",
"mteb_dataset_name": "MTOPIntentClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.5307538358905938,
"accuracy_stderr": 0.017212120729854646,
"f1": 0.3579310659827192,
"f1_stderr": 0.011572431982612487,
"main_score": 0.5307538358905938
},
"evaluation_time": 82.73
},
"validation": {
"es": {
"accuracy": 0.5364767518009168,
"accuracy_stderr": 0.0202500569378784,
"f1": 0.31238300055355966,
"f1_stderr": 0.012765541417361052,
"main_score": 0.5364767518009168
},
"evaluation_time": 74.18
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7",
"mteb_dataset_name": "MassiveIntentClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.5474781439139207,
"accuracy_stderr": 0.014695053508966507,
"f1": 0.5388416786451913,
"f1_stderr": 0.011391899035244148,
"main_score": 0.5474781439139207
},
"evaluation_time": 58.9
},
"validation": {
"es": {
"accuracy": 0.5647319232661092,
"accuracy_stderr": 0.01970785559946992,
"f1": 0.5528968514715037,
"f1_stderr": 0.011608052194429121,
"main_score": 0.5647319232661092
},
"evaluation_time": 56.98
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634",
"mteb_dataset_name": "MassiveScenarioClassification",
"mteb_version": "1.1.1",
"test": {
"es": {
"accuracy": 0.6311365164761265,
"accuracy_stderr": 0.019017462978698926,
"f1": 0.6335648828399776,
"f1_stderr": 0.01624440039840478,
"main_score": 0.6311365164761265
},
"evaluation_time": 27.45
},
"validation": {
"es": {
"accuracy": 0.6377274963108706,
"accuracy_stderr": 0.02220489522288182,
"f1": 0.6427239311554347,
"f1_stderr": 0.021055866780710124,
"main_score": 0.6377274963108706
},
"evaluation_time": 21.66
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d",
"mteb_dataset_name": "STS17",
"mteb_version": "1.1.1",
"test": {
"es-es": {
"cos_sim": {
"pearson": 0.7935023671448772,
"spearman": 0.8138426859678183
},
"euclidean": {
"pearson": 0.798022745583715,
"spearman": 0.7969494579586819
},
"manhattan": {
"pearson": 0.7984973437956621,
"spearman": 0.798509079048555
}
},
"evaluation_time": 2.68
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80",
"mteb_dataset_name": "STS22",
"mteb_version": "1.1.1",
"test": {
"es": {
"cos_sim": {
"pearson": 0.6113034593400181,
"spearman": 0.5948046751699049
},
"euclidean": {
"pearson": 0.6224913312842192,
"spearman": 0.5999849091787655
},
"manhattan": {
"pearson": 0.6249518846355359,
"spearman": 0.5997154342133152
}
},
"evaluation_time": 64.23
}
}
Loading

0 comments on commit 910caca

Please sign in to comment.