kovacsvi commited on
Commit
99646de
·
1 Parent(s): 8d3cc6e

use max length of 64 with padding

Browse files
interfaces/cap_media2.py CHANGED
@@ -47,9 +47,9 @@ def predict(text, model_id, tokenizer_id):
47
  # Tokenize input
48
  inputs = tokenizer(
49
  text,
50
- max_length=256,
51
  truncation=True,
52
- padding="do_not_pad",
53
  return_tensors="pt"
54
  )
55
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
47
  # Tokenize input
48
  inputs = tokenizer(
49
  text,
50
+ max_length=64,
51
  truncation=True,
52
+ padding=True,
53
  return_tensors="pt"
54
  )
55
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/cap_media_demo.py CHANGED
@@ -47,9 +47,9 @@ def predict(text, model_id, tokenizer_id):
47
  # Tokenize input
48
  inputs = tokenizer(
49
  text,
50
- max_length=256,
51
  truncation=True,
52
- padding="do_not_pad",
53
  return_tensors="pt"
54
  )
55
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
47
  # Tokenize input
48
  inputs = tokenizer(
49
  text,
50
+ max_length=64,
51
  truncation=True,
52
+ padding=True,
53
  return_tensors="pt"
54
  )
55
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/cap_minor.py CHANGED
@@ -79,9 +79,9 @@ def predict(text, model_id, tokenizer_id):
79
  # Tokenize input
80
  inputs = tokenizer(
81
  text,
82
- max_length=256,
83
  truncation=True,
84
- padding="do_not_pad",
85
  return_tensors="pt"
86
  )
87
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
79
  # Tokenize input
80
  inputs = tokenizer(
81
  text,
82
+ max_length=64,
83
  truncation=True,
84
+ padding=True,
85
  return_tensors="pt"
86
  )
87
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/cap_minor_media.py CHANGED
@@ -85,7 +85,7 @@ def predict(text, major_model_id, minor_model_id, tokenizer_id, HF_TOKEN=None):
85
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
86
 
87
  # Tokenize input
88
- inputs = tokenizer(text, max_length=256, truncation=True, padding="do_not_pad", return_tensors="pt").to(device)
89
 
90
  # Predict major topic
91
  major_model.eval()
@@ -162,9 +162,9 @@ def predict_flat(text, model_id, tokenizer_id, HF_TOKEN=None):
162
  # Tokenize input
163
  inputs = tokenizer(
164
  text,
165
- max_length=256,
166
  truncation=True,
167
- padding="do_not_pad",
168
  return_tensors="pt"
169
  )
170
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
85
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
86
 
87
  # Tokenize input
88
+ inputs = tokenizer(text, max_length=64, truncation=True, padding=True, return_tensors="pt").to(device)
89
 
90
  # Predict major topic
91
  major_model.eval()
 
162
  # Tokenize input
163
  inputs = tokenizer(
164
  text,
165
+ max_length=64,
166
  truncation=True,
167
+ padding=True,
168
  return_tensors="pt"
169
  )
170
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/emotion.py CHANGED
@@ -39,9 +39,9 @@ def predict(text, model_id, tokenizer_id):
39
  # Tokenize input
40
  inputs = tokenizer(
41
  text,
42
- max_length=256,
43
  truncation=True,
44
- padding="do_not_pad",
45
  return_tensors="pt"
46
  )
47
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
39
  # Tokenize input
40
  inputs = tokenizer(
41
  text,
42
+ max_length=64,
43
  truncation=True,
44
+ padding=True,
45
  return_tensors="pt"
46
  )
47
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/emotion9.py CHANGED
@@ -38,9 +38,9 @@ def predict(text, model_id, tokenizer_id):
38
  # Tokenize input
39
  inputs = tokenizer(
40
  text,
41
- max_length=256,
42
  truncation=True,
43
- padding="do_not_pad",
44
  return_tensors="pt"
45
  )
46
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
38
  # Tokenize input
39
  inputs = tokenizer(
40
  text,
41
+ max_length=64,
42
  truncation=True,
43
+ padding=True,
44
  return_tensors="pt"
45
  )
46
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/illframes.py CHANGED
@@ -70,9 +70,9 @@ def predict(text, model_id, tokenizer_id, label_names):
70
  # Tokenize input
71
  inputs = tokenizer(
72
  text,
73
- max_length=256,
74
  truncation=True,
75
- padding="do_not_pad",
76
  return_tensors="pt"
77
  )
78
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
70
  # Tokenize input
71
  inputs = tokenizer(
72
  text,
73
+ max_length=64,
74
  truncation=True,
75
+ padding=True,
76
  return_tensors="pt"
77
  )
78
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/manifesto.py CHANGED
@@ -38,9 +38,9 @@ def predict(text, model_id, tokenizer_id):
38
  # Tokenize input
39
  inputs = tokenizer(
40
  text,
41
- max_length=256,
42
  truncation=True,
43
- padding="do_not_pad",
44
  return_tensors="pt"
45
  )
46
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
38
  # Tokenize input
39
  inputs = tokenizer(
40
  text,
41
+ max_length=64,
42
  truncation=True,
43
+ padding=True,
44
  return_tensors="pt"
45
  )
46
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/ontolisst.py CHANGED
@@ -56,9 +56,9 @@ def predict(text, model_id, tokenizer_id):
56
  # Tokenize input
57
  inputs = tokenizer(
58
  text,
59
- max_length=256,
60
  truncation=True,
61
- padding="do_not_pad",
62
  return_tensors="pt"
63
  )
64
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
56
  # Tokenize input
57
  inputs = tokenizer(
58
  text,
59
+ max_length=64,
60
  truncation=True,
61
+ padding=True,
62
  return_tensors="pt"
63
  )
64
  inputs = {k: v.to(device) for k, v in inputs.items()}
interfaces/sentiment.py CHANGED
@@ -42,9 +42,9 @@ def predict(text, model_id, tokenizer_id):
42
  # Tokenize input
43
  inputs = tokenizer(
44
  text,
45
- max_length=256,
46
  truncation=True,
47
- padding="do_not_pad",
48
  return_tensors="pt"
49
  )
50
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
42
  # Tokenize input
43
  inputs = tokenizer(
44
  text,
45
+ max_length=64,
46
  truncation=True,
47
+ padding=True,
48
  return_tensors="pt"
49
  )
50
  inputs = {k: v.to(device) for k, v in inputs.items()}