
import json
import re

import torch
from torch import nn
from tqdm import tqdm


def softmax_with_temperature(logits, temperature):
    logits = logits / temperature
    return nn.functional.softmax(logits, dim=-1)


def block_cosine_similarity(tensor1, tensor2, block_size=100):
    with torch.no_grad():
        size1 = tensor1.size()
        size2 = tensor2.size()
        result = torch.zeros(size1[0], size2[0])
        for i in tqdm(range(0, size1[0], block_size)):
            for j in range(0, size2[0], block_size):
                result[i:i + block_size, j:j + block_size] = torch.cosine_similarity(
                    tensor1[i:i + block_size].unsqueeze(1), tensor2[j:j + block_size].unsqueeze(0), dim=-1)
        torch.cuda.empty_cache()
        return result




def get_history_count(xquad_result_file_path):
    try:
        xquad_result_file = open(xquad_result_file_path, 'r', encoding='utf-8')
        done_count = len(xquad_result_file.readlines())
        xquad_result_file.close()
    except:
        done_count = 0
    print(done_count)
    return done_count


def kl_divergence(p_log_probs, q_log_probs):
        """
        计算两个概率分布之间的KL散度。
        p_log_probs 和 q_log_probs 应该是对数概率张量。
        """
        # 确保两个张量的形状相同
        assert p_log_probs.shape == q_log_probs.shape, "张量的形状必须相同"

        # 计算KL散度的负值
        kl_div = torch.nn.functional.kl_div(torch.log(p_log_probs), q_log_probs, reduction='batchmean')

        # 由于F.kl_div返回的是KL散度的负值，我们需要取其相反数
        kl_div = -kl_div

        return kl_div


