prithivida commited on
Commit
f0d4aa2
·
verified ·
1 Parent(s): a32312f

sentence_transformers_support (#2)

Browse files

- Add support for Sentence Transformer (fd892c44ece765cd2eea98f34a84a52cda185d3c)
- Update README.md (2a36dcec5365886f4487325e059223f9b5c65e0b)

1_SpladePooling/config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "pooling_strategy": "max",
3
+ "activation_function": "relu",
4
+ "word_embedding_dimension": null
5
+ }
README.md CHANGED
@@ -12,9 +12,12 @@ tags:
12
  - passage-retrieval
13
  - knowledge-distillation
14
  - document encoder
 
 
 
15
  pretty_name: Independent Implementation of SPLADE++ Model with some efficiency tweaks for Industry setting.
16
- library_name: transformers
17
- pipeline_tag: fill-mask
18
  ---
19
 
20
  <center>
@@ -198,9 +201,46 @@ sparse_rep = expander.expand(
198
  ["The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."])
199
  ```
200
 
201
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- ## 6c. With HuggingFace
204
 
205
  **NOTEBOOK user? Login first**
206
 
 
12
  - passage-retrieval
13
  - knowledge-distillation
14
  - document encoder
15
+ - sparse-encoder
16
+ - sparse
17
+ - splade
18
  pretty_name: Independent Implementation of SPLADE++ Model with some efficiency tweaks for Industry setting.
19
+ library_name: sentence-transformers
20
+ pipeline_tag: feature-extraction
21
  ---
22
 
23
  <center>
 
201
  ["The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."])
202
  ```
203
 
204
+ ## 6c. With Sentence Transformers
205
+
206
+ First install the Sentence Transformers library:
207
+
208
+ ```bash
209
+ pip install -U sentence-transformers
210
+ ```
211
+
212
+ Then you can load this model and run inference.
213
+ ```python
214
+ from sentence_transformers import SparseEncoder
215
+
216
+ # Download from the 🤗 Hub
217
+ model = SparseEncoder("prithivida/Splade_PP_en_v2")
218
+ # Run inference
219
+ sentence = [
220
+ "The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."
221
+ ]
222
+ embeddings = model.encode(sentence)
223
+ print(embeddings.shape)
224
+ # [1, 30522]
225
+
226
+ decoded_sentence = model.decode(embeddings[0])
227
+ print(f"Number of actual dimensions: {len(decoded_sentence)}")
228
+ decoded_sentence_rounded = [(token, round(score, 2)) for token, score in decoded_sentence]
229
+ print("SPLADE BOW rep:\n", decoded_sentence_rounded)
230
+
231
+ # Number of actual dimensions: 103
232
+ # SPLADE BOW rep:
233
+ # [('manhattan', 2.59), ('project', 2.1), ('atomic', 1.65), ('legacy', 1.62), ('bomb', 1.5), ('peaceful', 1.47), ('end', 1.42), ('helped', 1.37), ('wwii', 1.36), ('energy', 1.36), ('war', 1.29), ('1942', 1.29), ('bring', 1.21), ('impact', 1.14),
234
+ # ('help', 1.09), ('bombs', 1.05), ('ny', 0.93), ('scientist', 0.91), ('nuclear', 0.89), ('history', 0.87), ('projects', 0.87), ('mission', 0.83), ('stop', 0.77), ('wars', 0.76), ('peace', 0.76), ('ii', 0.76), ('affect', 0.76), ('power', 0.73),
235
+ # ('science', 0.72), ('bombing', 0.72), ('atom', 0.72), ('use', 0.7), ('did', 0.69), ('brought', 0.67), ('still', 0.66), ('purpose', 0.65), ('was', 0.65), ('effect', 0.59), ('scientists', 0.59), ('uses', 0.57), ('because', 0.53), ('historical', 0.48),
236
+ # ('experiment', 0.47), ('scientific', 0.47), ('safe', 0.46), ('w', 0.45), ('message', 0.44), ('##w', 0.42), ('ended', 0.41), ('hudson', 0.39), ('roosevelt', 0.38), ('were', 0.36), ('##nik', 0.35), ('continue', 0.34), ('hiroshima', 0.33), ('important', 0.33),
237
+ # ('benefit', 0.32), ('destruction', 0.31), ('used', 0.3), ('nazi', 0.3), ('destroyed', 0.29), ('story', 0.29), ('assisted', 0.27), ('close', 0.27), ('influenced', 0.25), ('world', 0.25), ('invented', 0.24), ('contribution', 0.24), ('military', 0.24), ('conflict', 0.22),
238
+ # ('1939', 0.22), ('success', 0.22), ('1940s', 0.21), ('nasa', 0.2), ('harry', 0.2), ('revolution', 0.2), ('today', 0.18), ('rescue', 0.17), ('radiation', 0.16), ('destiny', 0.16), ('last', 0.15), ('allies', 0.14), ('the', 0.14), ('created', 0.13), ('hess', 0.13), ('weapon', 0.13),
239
+ # ('started', 0.11), ('us', 0.1), ('secret', 0.1), ('campaign', 0.09), ('2', 0.08), ('cause', 0.08), ('and', 0.07), ('propaganda', 0.06), ('noah', 0.05), ('theory', 0.05), ('significance', 0.02), ('berlin', 0.01), ('fuel', 0.01), ('columbia', 0.01), ('strategy', 0.01), ('usage', 0.01), ('symbol', 0.0)]
240
+
241
+ ```
242
 
243
+ ## 6d. With HuggingFace
244
 
245
  **NOTEBOOK user? Login first**
246
 
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SparseEncoder",
3
+ "__version__": {
4
+ "sentence_transformers": "5.0.0",
5
+ "transformers": "4.50.3",
6
+ "pytorch": "2.6.0+cu124"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "dot"
14
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.sparse_encoder.models.MLMTransformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_SpladePooling",
12
+ "type": "sentence_transformers.sparse_encoder.models.SpladePooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }