查看可用的评价指标
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from datasets import list_metrics #列出评价指标 metrics_list = list_metrics() len(metrics_list), metrics_list # (41, # ['accuracy', # 'bertscore', # 'bleu', # 'bleurt', # 'cer', # 'chrf', # 'code_eval', # 'comet', # 'competition_math', # 'coval', # 'cuad', # 'f1', # 'frugalscore', # 'gleu', # 'glue', # 'google_bleu', # 'indic_glue', # 'mae', # 'mahalanobis', # 'matthews_correlation', # 'mauve', # 'mean_iou', # 'meteor', # 'mse', # 'pearsonr', # 'perplexity', # 'precision', # 'recall', # 'rouge', # 'sacrebleu', # 'sari', # 'seqeval', # 'spearmanr', # 'squad', # 'squad_v2', # 'super_glue', # 'ter', # 'wer', # 'wiki_split', # 'xnli', # 'xtreme_s']) |
使用评价指标
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
from datasets import load_metric #加载一个评价指标 metric = load_metric('glue', 'mrpc') print(metric.inputs_description) # 一些指标描述文档 # 计算一个评价指标 predictions = [0, 1, 0] # 预测 references = [0, 1, 1] # 实际 final_score = metric.compute(predictions=predictions, references=references) final_score # {'accuracy': 0.6666666666666666, 'f1': 0.6666666666666666} |