matthoffner commited on
Commit
76b8b87
·
1 Parent(s): 90de499

Update llm.py

Browse files
Files changed (1) hide show
  1. llm.py +16 -20
llm.py CHANGED
@@ -31,7 +31,7 @@ import argparse
31
  model_path = "wizardLM-7B.ggml.q5_0.bin"
32
 
33
 
34
- def query_llm(index, prompt, service_context, retriever_mode='embedding', response_mode='tree_summarize'):
35
  response_synthesizer = ResponseSynthesizer.from_args(
36
  service_context=service_context,
37
  node_postprocessors=[
@@ -75,7 +75,7 @@ def construct_index(
75
  index_name,
76
  index_type,
77
  max_input_size=2048,
78
- num_outputs=256,
79
  max_chunk_overlap=20,
80
  chunk_size_limit=None,
81
  embedding_limit=None,
@@ -87,16 +87,17 @@ def construct_index(
87
  embedding_limit = None if embedding_limit == 0 else embedding_limit
88
  separator = " " if separator == "" else separator
89
 
90
- llm = LlamaCpp(model_path=model_path,
91
- n_ctx=2048,
 
92
  use_mlock=True,
93
  n_parts=-1,
94
  temperature=0.7,
95
  top_p=0.40,
96
- last_n_tokens_size=200,
97
- n_threads=4,
98
  f16_kv=True,
99
- max_tokens=400
100
  )
101
  llm_predictor = LLMPredictor(
102
  llm=llm
@@ -181,12 +182,12 @@ def ask_ai(
181
  ):
182
  logging.debug("Querying index...")
183
  prompt_helper = PromptHelper(
184
- 300,
185
- 200,
186
  -20000
187
  )
188
  llm = LlamaCpp(model_path=model_path,
189
- n_ctx=512,
190
  use_mlock=True,
191
  n_parts=-1,
192
  temperature=temprature,
@@ -216,11 +217,6 @@ def ask_ai(
216
  if response is not None:
217
  logging.info(f"Response: {response}")
218
  ret_text = response.response
219
- ret_text += "\n----------\n"
220
- nodes = []
221
- for index, node in enumerate(response.source_nodes):
222
- nodes.append(f"[{index+1}] {node.source_text}")
223
- ret_text += "\n\n".join(nodes)
224
  return ret_text
225
  else:
226
  logging.debug("No response found, returning None")
@@ -230,15 +226,15 @@ def ask_ai(
230
  def search_construct(question, search_mode, index_select):
231
  print(f"You asked: {question}")
232
  llm = LlamaCpp(model_path=model_path,
233
- n_ctx=500,
234
  use_mlock=True,
235
  n_parts=-1,
236
- temperature=0.5,
237
  top_p=0.40,
238
- last_n_tokens_size=400,
239
- n_threads=4,
240
  f16_kv=True,
241
- max_tokens=400
242
  )
243
  chat = llm
244
  search_terms = (
 
31
  model_path = "wizardLM-7B.ggml.q5_0.bin"
32
 
33
 
34
+ def query_llm(index, prompt, service_context, retriever_mode='embedding', response_mode='compact'):
35
  response_synthesizer = ResponseSynthesizer.from_args(
36
  service_context=service_context,
37
  node_postprocessors=[
 
75
  index_name,
76
  index_type,
77
  max_input_size=2048,
78
+ num_outputs=2048,
79
  max_chunk_overlap=20,
80
  chunk_size_limit=None,
81
  embedding_limit=None,
 
87
  embedding_limit = None if embedding_limit == 0 else embedding_limit
88
  separator = " " if separator == "" else separator
89
 
90
+ llm = LlamaCpp(
91
+ model_path=model_path,
92
+ n_ctx=4096,
93
  use_mlock=True,
94
  n_parts=-1,
95
  temperature=0.7,
96
  top_p=0.40,
97
+ last_n_tokens_size=100,
98
+ n_threads=8,
99
  f16_kv=True,
100
+ max_tokens=150
101
  )
102
  llm_predictor = LLMPredictor(
103
  llm=llm
 
182
  ):
183
  logging.debug("Querying index...")
184
  prompt_helper = PromptHelper(
185
+ 4096,
186
+ 150,
187
  -20000
188
  )
189
  llm = LlamaCpp(model_path=model_path,
190
+ n_ctx=4096,
191
  use_mlock=True,
192
  n_parts=-1,
193
  temperature=temprature,
 
217
  if response is not None:
218
  logging.info(f"Response: {response}")
219
  ret_text = response.response
 
 
 
 
 
220
  return ret_text
221
  else:
222
  logging.debug("No response found, returning None")
 
226
  def search_construct(question, search_mode, index_select):
227
  print(f"You asked: {question}")
228
  llm = LlamaCpp(model_path=model_path,
229
+ n_ctx=400,
230
  use_mlock=True,
231
  n_parts=-1,
232
+ temperature=1,
233
  top_p=0.40,
234
+ last_n_tokens_size=100,
235
+ n_threads=6,
236
  f16_kv=True,
237
+ max_tokens=100
238
  )
239
  chat = llm
240
  search_terms = (