File size: 8,433 Bytes
7934b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
@article{devlin2018bert,
  title={Bert: Pre-training of deep bidirectional transformers for language understanding},
  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  journal={arXiv preprint arXiv:1810.04805},
  year={2018}
}

@article{shoeybi2019megatron,
  title={Megatron-lm: Training multi-billion parameter language models using model parallelism},
  author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
  journal={arXiv preprint arXiv:1909.08053},
  year={2019}
}

@InProceedings{maas2011,
  author    = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},
  title     = {Learning Word Vectors for Sentiment Analysis},
  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
  month     = {June},
  year      = {2011},
  address   = {Portland, Oregon, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {142--150},
  url       = {http://www.aclweb.org/anthology/P11-1015}
}

@inproceedings{socher2013,
    title = "Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank",
    author = "Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning, Christopher D. and Ng, Andrew and Potts, Christopher",
    booktitle = "Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing",
    month = oct,
    year = "2013",
    address = "Seattle, Washington, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D13-1170",
    pages = "1631--1642",
}

@article{lim2018chemical,
  title={Chemical--gene relation extraction using recursive neural network},
  author={Lim, Sangrak and Kang, Jaewoo},
  journal={Database},
  volume={2018},
  year={2018},
  publisher={Oxford Academic}
}

@inproceedings{li2007scalable,
  title={Scalable term selection for text categorization},
  author={Li, Jingyang and Sun, Maosong},
  booktitle={Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)},
  pages={774--782},
  year={2007}
}

@misc{lee2019biobert,
    title={BioBERT: a pre-trained biomedical language representation model for biomedical text mining},
    author={Jinhyuk Lee and Wonjin Yoon and Sungdong Kim and Donghyeon Kim and Sunkyu Kim and Chan Ho So and Jaewoo Kang},
    year={2019},
    eprint={1901.08746},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

@misc{shin2020biomegatron,
      title={BioMegatron: Larger Biomedical Domain Language Model},
      author={Hoo-Chang Shin and Yang Zhang and Evelina Bakhturina and Raul Puri and Mostofa Patwary and Mohammad Shoeybi and Raghav Mani},
      year={2020},
      eprint={2010.06060},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@inproceedings{vaswani2017attention,
  title={Attention is all you need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  booktitle={Advances in Neural Information Processing Systems},
  pages={6000--6010},
  year={2017}
}

@article{sennrich2015neural,
  title={Neural machine translation of rare words with subword units},
  author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
  journal={arXiv preprint arXiv:1508.07909},
  year={2015}
}

@article{provilkov2019bpe,
  title={Bpe-dropout: Simple and effective subword regularization},
  author={Provilkov, Ivan and Emelianenko, Dmitrii and Voita, Elena},
  journal={arXiv preprint arXiv:1910.13267},
  year={2019}
}

@article{post2018call,
  title={A call for clarity in reporting BLEU scores},
  author={Post, Matt},
  journal={arXiv preprint arXiv:1804.08771},
  year={2018}
}

@misc{zhang2021sgdqa,
      title={SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services},
      author={Yang Zhang and Vahid Noroozi and Evelina Bakhturina and Boris Ginsburg},
      year={2021},
      eprint={2105.08049},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@article{zhang2019neural,
  title={Neural Models of Text Normalization for Speech Applications},
  author={Hao Zhang and R. Sproat and Axel H. Ng and Felix Stahlberg and Xiaochang Peng and Kyle Gorman and B. Roark},
  journal={Computational Linguistics},
  year={2019},
  pages={293-338}
}

@misc{liu2021selfalignment,
      title={Self-Alignment Pretraining for Biomedical Entity Representations}, 
      author={Fangyu Liu and Ehsan Shareghi and Zaiqiao Meng and Marco Basaldella and Nigel Collier},
      year={2021},
      eprint={2010.11784},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
 }

@article{gulcehre2015using,
  title={On using monolingual corpora in neural machine translation},
  author={Gulcehre, Caglar and Firat, Orhan and Xu, Kelvin and Cho, Kyunghyun and Barrault, Loic and Lin, Huei-Chi and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1503.03535},
  year={2015}
}

@article{yee2019simple,
  title={Simple and effective noisy channel modeling for neural machine translation},
  author={Yee, Kyra and Ng, Nathan and Dauphin, Yann N and Auli, Michael},
  journal={arXiv preprint arXiv:1908.05731},
  year={2019}
}

@inproceedings{koehnetal2007moses,
    title = "{M}oses: Open Source Toolkit for Statistical Machine Translation",
    author = "Koehn, Philipp  and
      Hoang, Hieu  and
      Birch, Alexandra  and
      Callison-Burch, Chris  and
      Federico, Marcello  and
      Bertoldi, Nicola  and
      Cowan, Brooke  and
      Shen, Wade  and
      Moran, Christine  and
      Zens, Richard  and
      Dyer, Chris  and
      Bojar, Ond{\v{r}}ej  and
      Constantin, Alexandra  and
      Herbst, Evan",
    booktitle = "Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics Companion Volume Proceedings of the Demo and Poster Sessions",
    month = jun,
    year = "2007",
    address = "Prague, Czech Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P07-2045",
    pages = "177--180",
}

@inproceedings{sunkara20_interspeech,
  author={Monica Sunkara and Srikanth Ronanki and Dhanush Bekal and Sravan Bodapati and Katrin Kirchhoff},
  title={{Multimodal Semi-Supervised Learning Framework for Punctuation Prediction in Conversational Speech}},
  year=2020,
  booktitle={Proc. Interspeech 2020},
  pages={4911--4915},
  doi={10.21437/Interspeech.2020-3074}
}

@article{chen2019bert,
  title={Bert for joint intent classification and slot filling},
  author={Chen, Qian and Zhuo, Zhu and Wang, Wen},
  journal={arXiv preprint arXiv:1902.10909},
  year={2019}
}

@article{borgeaud2021improving,
  title={Improving language models by retrieving from trillions of tokens},
  author={Borgeaud, Sebastian and Mensch, Arthur and Hoffmann, Jordan and Cai, Trevor and Rutherford, Eliza and Millican, Katie and Driessche, George van den and Lespiau, Jean-Baptiste and Damoc, Bogdan and Clark, Aidan and others},
  journal={arXiv preprint arXiv:2112.04426},
  year={2021}
}

@article{su2021roformer,
  title={Roformer: Enhanced transformer with rotary position embedding},
  author={Su, Jianlin and Lu, Yu and Pan, Shengfeng and Wen, Bo and Liu, Yunfeng},
  journal={arXiv preprint arXiv:2104.09864},
  year={2021}
}

@article{reimers2019sentence,
  title={Sentence-bert: Sentence embeddings using siamese bert-networks},
  author={Reimers, Nils and Gurevych, Iryna},
  journal={arXiv preprint arXiv:1908.10084},
  year={2019}
}

@article{yang2022tensor,
  title={Tensor Programs V: Tuning Large Neural Networks via Zero-Shot Hyperparameter Transfer},
  author={Yang, Greg and Hu, Edward J and Babuschkin, Igor and Sidor, Szymon and Liu, Xiaodong and Farhi, David and Ryder, Nick and Pachocki, Jakub and Chen, Weizhu and Gao, Jianfeng},
  journal={arXiv preprint arXiv:2203.03466},
  year={2022}
}

@article{jegou2022faiss,
  title={Faiss: Similarity search and clustering of dense vectors library},
  author={J{\'e}gou, Herv{\'e} and Douze, Matthijs and Johnson, Jeff and Hosseini, Lucas and Deng, Chengqi},
  journal={Astrophysics Source Code Library},
  pages={ascl--2210},
  year={2022}
}