奇趣5分彩

Python完奇趣5分彩计较AUC的三种体例总结

 更新时辰:2022年07月12日 11:29:38   作者:strive_1106  
AUC(Area under curve)是机械进奇趣5分彩经奇趣5分彩利用的二分类评测手腕,间接寄义是ROC曲线下的面积。本文总结了Python说话完奇趣5分彩计较AUC的三种体例,感乐趣的能够或许进奇趣5分彩一下

先容

AUC(Area Under Curve)被界说为ROC曲线下与坐标轴围奇趣5分彩的面积,明显这个面积的数值不会大于1。又因为ROC曲线普通奇趣5分彩处于y=x这条直线的上方,以是AUC的取值规模在0.5和1之间。AUC越靠近1.0,检测体例实在性越高;即是0.5时,则实在性最低,无操纵代价。

auc计较体例:参考奇趣5分彩:Python完奇趣5分彩计较AUC的示例代码

完奇趣5分彩代码

import numpy as np
from sklearn.metrics import roc_auc_score
y_true = [1,1,0,0,1,1,0]
y_pred = [0.8,0.7,0.5,0.5,0.5,0.5,0.3]
print(roc_auc_score(y_true, y_pred))
# 上面完奇趣5分彩的是体例1
# http://blog.csdn.net/lieyingkub99/article/details/81266664?utm_medium=distribute.pc_relevant.none-task-blog-title-1&spm=1001.2101.3001.4242
def cal_auc1(y_true, y_pred):
    n_bins = 10
    postive_len = sum(y_true)  # M正样本个数
    negative_len = len(y_true) - postive_len  # N负样本个数
    total_case = postive_len * negative_len  # M * N样本对数
    pos_histogram = [0 for _ in range(n_bins)]  # 保管每一个几率值下的正样本个数
    neg_histogram = [0 for _ in range(n_bins)]  # 保管每一个几率值下的负样本个数
    bin_width = 1.0 / n_bins
    for i in range(len(y_true)):
        nth_bin = int(y_pred[i] / bin_width)  # 几率值转化为整数下标
        if y_true[i] == 1:
            pos_histogram[nth_bin] += 1
        else:
            neg_histogram[nth_bin] += 1
    print(pos_histogram)
    print(neg_histogram)
    accumulated_neg = 0
    satisfied_pair = 0
    for i in range(n_bins):
        satisfied_pair += (pos_histogram[i] * accumulated_neg + pos_histogram[i] * neg_histogram[i] * 0.5)
        print(pos_histogram[i], neg_histogram[i], accumulated_neg, satisfied_pair)
        accumulated_neg += neg_histogram[i]
 
    return satisfied_pair / float(total_case)
print(cal_auc1(y_true, y_pred))
# 上面完奇趣5分彩的是体例2
# http://blog.csdn.net/lieyingkub99/article/details/81266664?utm_medium=distribute.pc_relevant.none-task-blog-title-1&spm=1001.2101.3001.4242
def cal_auc2(y_true, y_pred):
    n_bins = 10
    postive_len = sum(y_true)  # M正样本个数
    negative_len = len(y_true) - postive_len  # N负样本个数
    total_case = postive_len * negative_len  # M * N样本对数
    prob_rank = [0 for _ in range(n_bins)]  # 保管每一个几率值的rank
    prob_num = [0 for _ in range(n_bins)]  # 保管每一个几率值呈现的次数
    bin_width = 1.0 / n_bins
    raw_arr = []
    for i in range(len(y_true)):
        raw_arr.append([y_pred[i], y_true[i]])
    arr = sorted(raw_arr, key=lambda d: d[0]) # 按几率由低到高排序
    for i in range(len(arr)):
        nth_bin = int(arr[i][0] / bin_width)  # 几率值转化为整数下标
        prob_rank[nth_bin] = prob_rank[nth_bin] + i + 1
        prob_num[nth_bin] = prob_num[nth_bin] + 1
    satisfied_pair = 0
    for i in range(len(arr)):
        if arr[i][1] == 1:
            nth_bin = int(arr[i][0] / bin_width)  # 几率值转化为整数下标
            satisfied_pair = satisfied_pair + prob_rank[nth_bin] / prob_num[nth_bin]
    return (satisfied_pair - postive_len * (postive_len + 1) / 2 ) / total_case
 
 
 
print(cal_auc2(y_true, y_pred))
 
# 按照roc曲线,找差别点算上面积, 须要点充足多
def cal_auc3(y_true, y_pred):
    """Summary
    Args:
        raw_arr (TYPE): Description
    Returns:
        TYPE: Description
    """
    raw_arr = []
    for i in range(len(y_true)):
        raw_arr.append([y_pred[i], y_true[i]])
    print(raw_arr)
    arr = sorted(raw_arr, key=lambda d:d[0], reverse=True)
    pos, neg = 0., 0.
    for record in arr:
        if record[1] == 1.:
            pos += 1
        else:
            neg += 1
 
    fp, tp = 0., 0.
    xy_arr = []
    for record in arr:
        if record[1] == 1.:
            tp += 1
        else:
            fp += 1
        xy_arr.append([fp/neg, tp/pos])
    print(xy_arr)
    auc = 0.
    prev_x = 0.
    prev_y = 0.
    for x, y in xy_arr:
        if x != prev_x:
            auc += ((x - prev_x) * (y + prev_y) / 2.)
            prev_x = x
            prev_y = y
        print(auc)
    import numpy as np
    from sklearn.metrics import roc_auc_score
    y_true = [1, 1, 0, 0, 1, 1, 0]
    y_pred = [0.8, 0.7, 0.5, 0.5, 0.5, 0.5, 0.3]
    print(roc_auc_score(y_true, y_pred))

体例补充

上面是小编为大师找到的别的三个计较AUC的代码,会输入三种体例各自的auc,和经由进程面积计较AUC时的ROC曲线。

在经由进程面积计较AUC的体例奇趣5分彩,不遍历数据的展望几率作为分类阈值,而是对[0,1]区间平分获得一奇趣5分彩列阈值。

# AUC的计较
import numpy as np
import matplotlib.pyplot as plt

for e in range(3):
    print("\nRound: ", e+1)

    num = 1000
    auc1 = auc2 = auc3 = 0.

    # 筹办数据
    pred_prob = list(np.random.uniform(low=0,high=1, size=[num]))
    labels = [int(prob>0.5) for prob in list(np.random.uniform(low=0,high=1, size=[num]))]

    # 查抄数据
    # print("pred_prob:\n", pred_prob)
    # print("labels:\n", labels)

    # 体例一,面积加和
    roc_point = []
    for i in range(num):
        i = pred_prob[i]
        TP = 0  # 真阳样本数
        FP = 0  # 假阳样本数
        TP_rate = 0.  # 真阳率
        FP_rate = 0.  # 假阳率
        pos_num = 0   # 展望真样本数

        # 计数进程
        for ind, prob in enumerate(pred_prob):
            if prob>i:
                pos_num += 1
            if prob>i and labels[ind]>0.5:
                TP+=1
            elif prob>i and labels[ind]<0.5:
                FP+=1
        if pos_num!=0:
            TP_rate = TP / sum(labels)
            FP_rate = FP / (num-sum(labels))
        roc_point.append([FP_rate, TP_rate])  # 记实ROC奇趣5分彩的点
    # 画出ROC曲线
    roc_point.sort(key=lambda x: x[0])
    plt.plot(np.array(roc_point)[1:, 0], np.array(roc_point)[1: ,1])
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.show()

    # 计较每一个小奇趣5分彩方形的面积,乞降即为auc
    lastx = 0.
    for x,y in roc_point:
        auc1 += (x-lastx)*y  # 底乘高
        lastx = x

    print("体例一 auc:", auc1)

    # 体例二,操纵AUC对于摆列几率的界说计较
    auc2 = 0
    P_ind = []  # 正样本下标
    F_ind = []  # 负样本下标
    P_F = 0  # 正样本分数高于负样本的数目
    F_P = 0  # 负样本分数高于正样本的数目

    #  计数进程
    for ind, val in enumerate(labels):
        if val > 0.5:
            P_ind.append(ind)
        else:
            F_ind.append(ind)
    for Pi in P_ind:
        for Fi in F_ind:
            if pred_prob[Pi] > pred_prob[Fi]:
                P_F += 1
            else:
                F_P += 1
    auc2 = P_F/(len(P_ind)*len(F_ind))
    print("体例二 auc:", auc2)

    # 体例三,体例二的改良,简化了计较,下降了时辰庞杂度
    new_data = [[p, l] for p, l in zip(pred_prob, labels)]
    new_data.sort(key=lambda x:x[0])

    # 求正样本rank之和
    rank_sum = 0
    for ind, [prob,label] in enumerate(new_data):
        if label>0.5:
            rank_sum+=ind
    auc3 = (rank_sum - len(P_ind)*(1+len(P_ind))/2) / (len(P_ind)*len(F_ind))
    print("体例三 auc:", auc3)

运转奇趣5分彩果

到此这篇对于Python完奇趣5分彩计较AUC的三种体例总结的文章就先容到这了,更多相干Python计较AUC内容请搜刮剧本之奇趣5分彩之前的文章或持续阅读上面的相干文章但愿大师今后多多撑持剧本之奇趣5分彩!

相干文章

最新批评