File size: 783 Bytes
207cddf
 
 
 
 
 
 
 
 
 
 
 
 
e9d1a5a
207cddf
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

from idiomify.fetchers import fetch_epie


def main():
    epie = fetch_epie()
    idioms = set([
        idiom
        for idiom, _, _ in epie
    ])

    # so, what do you want? you want to build an idiom-masked language modeling?
    for idiom, context, tag in epie:
        print(idiom, context)

    for idx, idiom in enumerate(idioms):
        print(idx, idiom)

    # isn't it better to just leave the idiom there, and have it guess what meaning it has?
    # in that case, It may be better to use a generative model?
    # but what would happen if you let it... just guess it?
    # the problem with non-masking is that ... you give the model the answer.
    # what you should rather do is... do something like...  find similar words.


if __name__ == '__main__':
    main()