Spaces:

giridhar99
/

giridhar_rgb

Sleeping

File size: 4,749 Bytes

7fc5ec5

import requests


def check(question, answer, url, apikey):
    prompt = '''I will give you a question and an answer generated through document retrieval. Please use this answer to determine if the retrieved document can solve the question.

Demonstrations:

Question: 2023年澳网女单冠军是谁

Answer:文档信息不足，因此我无法基于提供的文档回答该问题。

No, the question is not addressed by the documents.



Question: Who is the champion of Australian Open 2023 Women's Singles?

Answer: Serena Williams

Yes, the question is addressed by the documents.



Question: Where is ACL2023 held?

Answer: Location of ACL2023 has not been confirmed.

No, the question is not addressed by the documents.



Question:  2023年中国GDP是多少?

Answer: I can not answer this question。

No, the question is not addressed by the documents.



Begin to generate:

Question: {question}

Answer: {answer}

    '''
    text2 = prompt.format(question=question,answer=answer)
    return getdata(text2,url,apikey)


def getdata(text,url,API_KEY):
    data = {
        "model": "gpt-3.5-turbo",
        "messages": [{"role": "user", "content": text}]
    }
    headers={"Authorization": f"Bearer {API_KEY}"}
    completion = requests.post(url, json=data, headers=headers)
    completion = completion.json()['choices'][0]['message']['content']
    return completion

import json
import tqdm, os

import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--modelname', type=str, default='chatgpt',
        help='model name'
    )
    parser.add_argument(
        '--dataset', type=str, default='en',
        help='evaluetion dataset',
        choices=['en','zh','en_int','zh_int','en_fact','zh_fact']
    )
    parser.add_argument(
        '--api_key', type=str, default='api_key',
        help='api key of chatgpt'
    )
    parser.add_argument(
        '--url', type=str, default='https://api.openai.com/v1/completions',
        help='url of chatgpt'
    )
    parser.add_argument(
        '--temp', type=float, default=0.7,
        help='corpus id'
    )
    parser.add_argument(
        '--passage_num', type=int, default=5,
        help='number of external passages'
    )

    args = parser.parse_args()

    if 'en' in args.dataset:
        resultpath = 'result-en'
    elif 'zh' in args.dataset:
        resultpath = 'result-zh'

    evaluefile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}.json'

    outputfile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}_chatgpt.json'

    resultfile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}_chatgptresult.json'



    results = []
    useddata = {}
    if os.path.exists(outputfile):
        with open(outputfile) as f:
            for line in f:
                data = json.loads(line)
                useddata[data['id']] = data
    


    with open(outputfile,'w',encoding='utf-8') as f:
        with open(evaluefile, 'r', encoding='utf-8') as f2:
            for line in tqdm.tqdm(f2):
                data = json.loads(line)
                if data['id'] in useddata and data['query'] == useddata[data['id']]['query'] and data['ans']  == useddata[data['id']]['ans'] :
                    results.append(useddata[data['id']])
                    f.write(json.dumps(useddata[data['id']],ensure_ascii=False)+'\n')
                    continue
                try:
                    question = data['query']
                    answer = data['prediction']
                    
                    evaluation = check(question, answer, args.url, args.api_key)
                    data['evaluation'] = evaluation
                    results.append(data)
                    f.write(json.dumps(data,ensure_ascii=False)+'\n')
                except Exception as e:
                    print(e)
                    print(question,answer)
                    continue
    
    rejecttt = 0
    tt = 0
    for i in results:
        if "not addressed" in i['evaluation']:
            rejecttt += 1
        if 0 not in i['label'] and 1 in i['label']:
            tt += 1
    print(tt/len(results))
    scores = {
        'reject_rate': rejecttt/len(results),
        'all_rate': (tt)/len(results),
        'tt':tt,
        'rejecttt':rejecttt,
        'nums': len(results),
    }
    json.dump(scores, open(resultfile, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)