File size: 4,749 Bytes
7fc5ec5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import requests


def check(question, answer, url, apikey):
    prompt = '''I will give you a question and an answer generated through document retrieval. Please use this answer to determine if the retrieved document can solve the question.

Demonstrations:

Question: 2023年澳网女单冠军是谁

Answer:文档信息不足,因此我无法基于提供的文档回答该问题。

No, the question is not addressed by the documents.



Question: Who is the champion of Australian Open 2023 Women's Singles?

Answer: Serena Williams

Yes, the question is addressed by the documents.



Question: Where is ACL2023 held?

Answer: Location of ACL2023 has not been confirmed.

No, the question is not addressed by the documents.



Question:  2023年中国GDP是多少?

Answer: I can not answer this question。

No, the question is not addressed by the documents.



Begin to generate:

Question: {question}

Answer: {answer}

    '''
    text2 = prompt.format(question=question,answer=answer)
    return getdata(text2,url,apikey)


def getdata(text,url,API_KEY):
    data = {
        "model": "gpt-3.5-turbo",
        "messages": [{"role": "user", "content": text}]
    }
    headers={"Authorization": f"Bearer {API_KEY}"}
    completion = requests.post(url, json=data, headers=headers)
    completion = completion.json()['choices'][0]['message']['content']
    return completion

import json
import tqdm, os

import argparse

if __name__ == '__main__':

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--modelname', type=str, default='chatgpt',
        help='model name'
    )
    parser.add_argument(
        '--dataset', type=str, default='en',
        help='evaluetion dataset',
        choices=['en','zh','en_int','zh_int','en_fact','zh_fact']
    )
    parser.add_argument(
        '--api_key', type=str, default='api_key',
        help='api key of chatgpt'
    )
    parser.add_argument(
        '--url', type=str, default='https://api.openai.com/v1/completions',
        help='url of chatgpt'
    )
    parser.add_argument(
        '--temp', type=float, default=0.7,
        help='corpus id'
    )
    parser.add_argument(
        '--passage_num', type=int, default=5,
        help='number of external passages'
    )

    args = parser.parse_args()

    if 'en' in args.dataset:
        resultpath = 'result-en'
    elif 'zh' in args.dataset:
        resultpath = 'result-zh'

    evaluefile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}.json'

    outputfile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}_chatgpt.json'

    resultfile = f'{resultpath}/prediction_{args.dataset}_{args.modelname}_temp{args.temp}_noise{1.0}_passage{args.passage_num}_correct{0.0}_chatgptresult.json'



    results = []
    useddata = {}
    if os.path.exists(outputfile):
        with open(outputfile) as f:
            for line in f:
                data = json.loads(line)
                useddata[data['id']] = data
    


    with open(outputfile,'w',encoding='utf-8') as f:
        with open(evaluefile, 'r', encoding='utf-8') as f2:
            for line in tqdm.tqdm(f2):
                data = json.loads(line)
                if data['id'] in useddata and data['query'] == useddata[data['id']]['query'] and data['ans']  == useddata[data['id']]['ans'] :
                    results.append(useddata[data['id']])
                    f.write(json.dumps(useddata[data['id']],ensure_ascii=False)+'\n')
                    continue
                try:
                    question = data['query']
                    answer = data['prediction']
                    
                    evaluation = check(question, answer, args.url, args.api_key)
                    data['evaluation'] = evaluation
                    results.append(data)
                    f.write(json.dumps(data,ensure_ascii=False)+'\n')
                except Exception as e:
                    print(e)
                    print(question,answer)
                    continue
    
    rejecttt = 0
    tt = 0
    for i in results:
        if "not addressed" in i['evaluation']:
            rejecttt += 1
        if 0 not in i['label'] and 1 in i['label']:
            tt += 1
    print(tt/len(results))
    scores = {
        'reject_rate': rejecttt/len(results),
        'all_rate': (tt)/len(results),
        'tt':tt,
        'rejecttt':rejecttt,
        'nums': len(results),
    }
    json.dump(scores, open(resultfile, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)