Spaces:
Paused
Paused
Commit
·
76b8b87
1
Parent(s):
90de499
Update llm.py
Browse files
llm.py
CHANGED
@@ -31,7 +31,7 @@ import argparse
|
|
31 |
model_path = "wizardLM-7B.ggml.q5_0.bin"
|
32 |
|
33 |
|
34 |
-
def query_llm(index, prompt, service_context, retriever_mode='embedding', response_mode='
|
35 |
response_synthesizer = ResponseSynthesizer.from_args(
|
36 |
service_context=service_context,
|
37 |
node_postprocessors=[
|
@@ -75,7 +75,7 @@ def construct_index(
|
|
75 |
index_name,
|
76 |
index_type,
|
77 |
max_input_size=2048,
|
78 |
-
num_outputs=
|
79 |
max_chunk_overlap=20,
|
80 |
chunk_size_limit=None,
|
81 |
embedding_limit=None,
|
@@ -87,16 +87,17 @@ def construct_index(
|
|
87 |
embedding_limit = None if embedding_limit == 0 else embedding_limit
|
88 |
separator = " " if separator == "" else separator
|
89 |
|
90 |
-
llm = LlamaCpp(
|
91 |
-
|
|
|
92 |
use_mlock=True,
|
93 |
n_parts=-1,
|
94 |
temperature=0.7,
|
95 |
top_p=0.40,
|
96 |
-
last_n_tokens_size=
|
97 |
-
n_threads=
|
98 |
f16_kv=True,
|
99 |
-
max_tokens=
|
100 |
)
|
101 |
llm_predictor = LLMPredictor(
|
102 |
llm=llm
|
@@ -181,12 +182,12 @@ def ask_ai(
|
|
181 |
):
|
182 |
logging.debug("Querying index...")
|
183 |
prompt_helper = PromptHelper(
|
184 |
-
|
185 |
-
|
186 |
-20000
|
187 |
)
|
188 |
llm = LlamaCpp(model_path=model_path,
|
189 |
-
n_ctx=
|
190 |
use_mlock=True,
|
191 |
n_parts=-1,
|
192 |
temperature=temprature,
|
@@ -216,11 +217,6 @@ def ask_ai(
|
|
216 |
if response is not None:
|
217 |
logging.info(f"Response: {response}")
|
218 |
ret_text = response.response
|
219 |
-
ret_text += "\n----------\n"
|
220 |
-
nodes = []
|
221 |
-
for index, node in enumerate(response.source_nodes):
|
222 |
-
nodes.append(f"[{index+1}] {node.source_text}")
|
223 |
-
ret_text += "\n\n".join(nodes)
|
224 |
return ret_text
|
225 |
else:
|
226 |
logging.debug("No response found, returning None")
|
@@ -230,15 +226,15 @@ def ask_ai(
|
|
230 |
def search_construct(question, search_mode, index_select):
|
231 |
print(f"You asked: {question}")
|
232 |
llm = LlamaCpp(model_path=model_path,
|
233 |
-
n_ctx=
|
234 |
use_mlock=True,
|
235 |
n_parts=-1,
|
236 |
-
temperature=
|
237 |
top_p=0.40,
|
238 |
-
last_n_tokens_size=
|
239 |
-
n_threads=
|
240 |
f16_kv=True,
|
241 |
-
max_tokens=
|
242 |
)
|
243 |
chat = llm
|
244 |
search_terms = (
|
|
|
31 |
model_path = "wizardLM-7B.ggml.q5_0.bin"
|
32 |
|
33 |
|
34 |
+
def query_llm(index, prompt, service_context, retriever_mode='embedding', response_mode='compact'):
|
35 |
response_synthesizer = ResponseSynthesizer.from_args(
|
36 |
service_context=service_context,
|
37 |
node_postprocessors=[
|
|
|
75 |
index_name,
|
76 |
index_type,
|
77 |
max_input_size=2048,
|
78 |
+
num_outputs=2048,
|
79 |
max_chunk_overlap=20,
|
80 |
chunk_size_limit=None,
|
81 |
embedding_limit=None,
|
|
|
87 |
embedding_limit = None if embedding_limit == 0 else embedding_limit
|
88 |
separator = " " if separator == "" else separator
|
89 |
|
90 |
+
llm = LlamaCpp(
|
91 |
+
model_path=model_path,
|
92 |
+
n_ctx=4096,
|
93 |
use_mlock=True,
|
94 |
n_parts=-1,
|
95 |
temperature=0.7,
|
96 |
top_p=0.40,
|
97 |
+
last_n_tokens_size=100,
|
98 |
+
n_threads=8,
|
99 |
f16_kv=True,
|
100 |
+
max_tokens=150
|
101 |
)
|
102 |
llm_predictor = LLMPredictor(
|
103 |
llm=llm
|
|
|
182 |
):
|
183 |
logging.debug("Querying index...")
|
184 |
prompt_helper = PromptHelper(
|
185 |
+
4096,
|
186 |
+
150,
|
187 |
-20000
|
188 |
)
|
189 |
llm = LlamaCpp(model_path=model_path,
|
190 |
+
n_ctx=4096,
|
191 |
use_mlock=True,
|
192 |
n_parts=-1,
|
193 |
temperature=temprature,
|
|
|
217 |
if response is not None:
|
218 |
logging.info(f"Response: {response}")
|
219 |
ret_text = response.response
|
|
|
|
|
|
|
|
|
|
|
220 |
return ret_text
|
221 |
else:
|
222 |
logging.debug("No response found, returning None")
|
|
|
226 |
def search_construct(question, search_mode, index_select):
|
227 |
print(f"You asked: {question}")
|
228 |
llm = LlamaCpp(model_path=model_path,
|
229 |
+
n_ctx=400,
|
230 |
use_mlock=True,
|
231 |
n_parts=-1,
|
232 |
+
temperature=1,
|
233 |
top_p=0.40,
|
234 |
+
last_n_tokens_size=100,
|
235 |
+
n_threads=6,
|
236 |
f16_kv=True,
|
237 |
+
max_tokens=100
|
238 |
)
|
239 |
chat = llm
|
240 |
search_terms = (
|