suryadev1 commited on
Commit
0eaac58
·
1 Parent(s): 3bb3d26

changes as comments

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -18,7 +18,7 @@ import csv
18
  def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
19
  # progress = gr.Progress(track_tqdm=True)
20
 
21
- progress(0, desc="Starting the processing")
22
  # with open(file.name, 'r') as f:
23
  # content = f.read()
24
  # saved_test_dataset = "train.txt"
@@ -78,14 +78,15 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
78
  # Load the test file and select rows based on indices
79
  test = pd.read_csv(test_location, sep=',', header=None, engine='python')
80
  selected_rows_df2 = test.loc[indices]
81
- # label=pd.read_csv(label_location, header=None, engine='python')
82
- # test_label=label.loc[indices]
83
- # test_label.to_csv(parent_location+'highGRschool10/test_label.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
84
  # Save the selected rows to a file
85
  selected_rows_df2.to_csv('fileHandler/selected_rows.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
86
  # ✅ Get the first 20% and last 20% of instances for each student ID within selected schools
87
 
88
  selected_test_info = test_info.loc[indices]
 
89
 
90
  # # First 20%
91
  # first_20_percent_indices = selected_test_info.groupby(3).apply(
@@ -113,7 +114,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
113
  ]
114
  # Group data by opt_task1 and opt_task2 based on test_info[6]
115
  opt_task_groups = ['opt_task1' if test_info.loc[idx, 6] == 0 else 'opt_task2' for idx in selected_rows_df2.index]
116
- progress(0.2, desc="Files create and saved!! Now Executing models")
117
  print("finetuned task: ",finetune_task)
118
  subprocess.run([
119
  "python", "new_test_saved_finetuned_model.py",
@@ -126,7 +127,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
126
  "-e",str(1),
127
  "-b",str(1000)
128
  ])
129
- progress(0.5,desc="Model execution completed!! Now performing analysis on the results")
130
 
131
  # Load tlb and plb
132
  with open("fileHandler/tlabels_plabels.pkl", "rb") as f:
@@ -288,7 +289,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
288
  # Read data from test_info.txt
289
  with open(test_info_location, "r") as file:
290
  data = file.readlines()
291
-
292
  # Assuming test_info[7] is a list with ideal tasks for each instance
293
  ideal_tasks = test_info[6] # A list where each element is either 1 or 2
294
 
@@ -304,7 +305,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
304
  opt2_ratios = []
305
  final_total=[]
306
  opt1_total=[]
307
- for i, row in enumerate(data):
308
  row = row.strip()
309
  if not row:
310
  continue
@@ -664,9 +665,9 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
664
  Model: {model_name}
665
  ---------------------------\n
666
  Time Taken: {result['time_taken_from_start']:.2f} seconds
667
- Total Schools in test: {len(unique_schools):.4f}
668
- Total number of instances having Schools with HGR : {len(high_indices):.4f}
669
- Total number of instances having Schools with LGR: {len(low_indices):.4f}
670
 
671
  ROC score of HGR: {high_roc_auc:.4f}
672
  ROC score of LGR: {low_roc_auc:.4f}
@@ -675,7 +676,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
675
  ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
676
  ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
677
  """
678
- progress(0.5,desc="first k '%' sampling")
679
  # subprocess.run([
680
  # "python", "new_test_saved_finetuned_model.py",
681
  # "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
@@ -694,7 +695,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
694
  # print(roc_auc_first_k)
695
 
696
 
697
- progress(0.5,desc="last '%' sampling")
698
  # subprocess.run([
699
  # "python", "new_test_saved_finetuned_model.py",
700
  # "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
 
18
  def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
19
  # progress = gr.Progress(track_tqdm=True)
20
 
21
+ progress(0, desc="Pre-Processing...")
22
  # with open(file.name, 'r') as f:
23
  # content = f.read()
24
  # saved_test_dataset = "train.txt"
 
78
  # Load the test file and select rows based on indices
79
  test = pd.read_csv(test_location, sep=',', header=None, engine='python')
80
  selected_rows_df2 = test.loc[indices]
81
+ label=pd.read_csv(label_location, header=None, engine='python')
82
+ test_label=label.loc[indices]
83
+ test_label.to_csv(parent_location+'highGRschool10/test_label.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
84
  # Save the selected rows to a file
85
  selected_rows_df2.to_csv('fileHandler/selected_rows.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
86
  # ✅ Get the first 20% and last 20% of instances for each student ID within selected schools
87
 
88
  selected_test_info = test_info.loc[indices]
89
+
90
 
91
  # # First 20%
92
  # first_20_percent_indices = selected_test_info.groupby(3).apply(
 
114
  ]
115
  # Group data by opt_task1 and opt_task2 based on test_info[6]
116
  opt_task_groups = ['opt_task1' if test_info.loc[idx, 6] == 0 else 'opt_task2' for idx in selected_rows_df2.index]
117
+ progress(0.2, desc="Running fine-tuned models...")
118
  print("finetuned task: ",finetune_task)
119
  subprocess.run([
120
  "python", "new_test_saved_finetuned_model.py",
 
127
  "-e",str(1),
128
  "-b",str(1000)
129
  ])
130
+ progress(0.5,desc="Saving output files..")
131
 
132
  # Load tlb and plb
133
  with open("fileHandler/tlabels_plabels.pkl", "rb") as f:
 
289
  # Read data from test_info.txt
290
  with open(test_info_location, "r") as file:
291
  data = file.readlines()
292
+ selected_data = [data[i] for i in indices if i < len(data)]
293
  # Assuming test_info[7] is a list with ideal tasks for each instance
294
  ideal_tasks = test_info[6] # A list where each element is either 1 or 2
295
 
 
305
  opt2_ratios = []
306
  final_total=[]
307
  opt1_total=[]
308
+ for i, row in enumerate(selected_data):
309
  row = row.strip()
310
  if not row:
311
  continue
 
665
  Model: {model_name}
666
  ---------------------------\n
667
  Time Taken: {result['time_taken_from_start']:.2f} seconds
668
+ Number of schools sampled: {len(unique_schools)}
669
+ Total number of instances from HGR schools : {len(high_indices)}
670
+ Total number of instances from LGR schools: {len(low_indices)}
671
 
672
  ROC score of HGR: {high_roc_auc:.4f}
673
  ROC score of LGR: {low_roc_auc:.4f}
 
676
  ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
677
  ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
678
  """
679
+ # progress(0.5,desc="first k '%' sampling")
680
  # subprocess.run([
681
  # "python", "new_test_saved_finetuned_model.py",
682
  # "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
 
695
  # print(roc_auc_first_k)
696
 
697
 
698
+ # progress(0.5,desc="last '%' sampling")
699
  # subprocess.run([
700
  # "python", "new_test_saved_finetuned_model.py",
701
  # "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
assests/__pycache__/metrics.cpython-312.pyc CHANGED
Binary files a/assests/__pycache__/metrics.cpython-312.pyc and b/assests/__pycache__/metrics.cpython-312.pyc differ
 
assests/__pycache__/recalibration.cpython-312.pyc CHANGED
Binary files a/assests/__pycache__/recalibration.cpython-312.pyc and b/assests/__pycache__/recalibration.cpython-312.pyc differ
 
assests/__pycache__/visualization.cpython-312.pyc CHANGED
Binary files a/assests/__pycache__/visualization.cpython-312.pyc and b/assests/__pycache__/visualization.cpython-312.pyc differ
 
fileHandler/ER-successful-strategies.csv CHANGED
The diff for this file is too large to render. See raw diff
 
fileHandler/ER-unsuccessful-strategies.csv CHANGED
The diff for this file is too large to render. See raw diff
 
fileHandler/ME-successful-strategies.csv CHANGED
The diff for this file is too large to render. See raw diff
 
fileHandler/ME-unsuccessful-strategies.csv CHANGED
The diff for this file is too large to render. See raw diff
 
fileHandler/result.txt CHANGED
@@ -1,7 +1,7 @@
1
- avg_loss: 0.5841353535652161
2
- total_acc: 69.00702106318957
3
- precisions: 0.7236623191454734
4
- recalls: 0.6900702106318957
5
- f1_scores: 0.6802420656474512
6
- time_taken_from_start: 2.079533815383911
7
- auc_score: 0.7457100293916334
 
1
+ avg_loss: 0.5839772423108419
2
+ total_acc: 69.6584058941728
3
+ precisions: 0.7224570437809088
4
+ recalls: 0.696584058941728
5
+ f1_scores: 0.6872024231270459
6
+ time_taken_from_start: 4.869930028915405
7
+ auc_score: 0.7470103507185207
fileHandler/roc_data.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2483f35aa06ef8983623602f690eb3fe006654c79d448f4f82a913b4862e34e9
3
- size 9437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2550042df149bd1483f0ddb2f0af449d7b679115a6f2bcc7c2af6600c20bfead
3
+ size 27485
fileHandler/roc_data2.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0578bbc08b428a1f54707fc3aca6aa1063e045033cb007f8ba3361f1aace43df
3
- size 28023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169a215fb6971ef69cb8a703b24f2afe9aae9383becccdbe2aaf9ad4c9abf005
3
+ size 83732
fileHandler/selected_rows.txt CHANGED
The diff for this file is too large to render. See raw diff
 
fileHandler/tlabels_plabels.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1aabcfeb64b7645738d0507dd755822b92f2a256a2f0bdee28b2916268078eb
3
- size 37993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71046b1d75a84d0f02d04aa725087ba339eb954fc9cf158d4d17331e73bfd48b
3
+ size 113592
ratio_proportion_change3_2223/.DS_Store ADDED
Binary file (6.15 kB). View file
 
ratio_proportion_change3_2223/sch_largest_100-coded/.DS_Store ADDED
Binary file (6.15 kB). View file
 
ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/highGRschool10/test_label.txt CHANGED
The diff for this file is too large to render. See raw diff
 
src/__pycache__/attention.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/attention.cpython-312.pyc and b/src/__pycache__/attention.cpython-312.pyc differ
 
src/__pycache__/bert.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/bert.cpython-312.pyc and b/src/__pycache__/bert.cpython-312.pyc differ
 
src/__pycache__/classifier_model.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/classifier_model.cpython-312.pyc and b/src/__pycache__/classifier_model.cpython-312.pyc differ
 
src/__pycache__/dataset.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/dataset.cpython-312.pyc and b/src/__pycache__/dataset.cpython-312.pyc differ
 
src/__pycache__/embedding.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/embedding.cpython-312.pyc and b/src/__pycache__/embedding.cpython-312.pyc differ
 
src/__pycache__/seq_model.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/seq_model.cpython-312.pyc and b/src/__pycache__/seq_model.cpython-312.pyc differ
 
src/__pycache__/transformer.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/transformer.cpython-312.pyc and b/src/__pycache__/transformer.cpython-312.pyc differ
 
src/__pycache__/transformer_component.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/transformer_component.cpython-312.pyc and b/src/__pycache__/transformer_component.cpython-312.pyc differ
 
src/__pycache__/vocab.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/vocab.cpython-312.pyc and b/src/__pycache__/vocab.cpython-312.pyc differ