Canstralian commited on
Commit
bbbca4f
·
verified ·
1 Parent(s): e511bc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import streamlit as st
2
  import requests
 
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import torch
5
  import pandas as pd
6
  from datasets import Dataset
 
7
 
8
  # Title and description
9
  st.title("OSINT Tool 🏢")
@@ -11,7 +13,7 @@ st.markdown("""
11
  This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
12
  It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
13
  """)
14
-
15
  # Sidebar for navigation
16
  st.sidebar.title("Navigation")
17
  app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
@@ -98,18 +100,30 @@ elif app_mode == "Dataset Upload & Fine-Tuning":
98
 
99
  tokenized_datasets = dataset.map(preprocess_function, batched=True)
100
 
101
- # Training loop (example)
102
- train_args = {
103
- "output_dir": "./results",
104
- "num_train_epochs": 3,
105
- "per_device_train_batch_size": 16,
106
- "logging_dir": "./logs",
107
- }
 
 
 
 
 
108
 
109
- # Fine-tuning logic (for demonstration purposes, actual fine-tuning will need Hugging Face Trainer)
110
- # model.train()
 
 
 
 
111
 
112
- st.success("Fine-tuning started (demo)!")
 
 
 
113
  except Exception as e:
114
  st.error(f"Error during fine-tuning: {e}")
115
  else:
 
1
  import streamlit as st
2
  import requests
3
+ import re
4
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
  import torch
6
  import pandas as pd
7
  from datasets import Dataset
8
+ from huggingface_hub import hf_api
9
 
10
  # Title and description
11
  st.title("OSINT Tool 🏢")
 
13
  This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
14
  It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
15
  """)
16
+
17
  # Sidebar for navigation
18
  st.sidebar.title("Navigation")
19
  app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
 
100
 
101
  tokenized_datasets = dataset.map(preprocess_function, batched=True)
102
 
103
+ # Fine-tuning setup (using Hugging Face Trainer for a complete setup)
104
+ from transformers import Trainer, TrainingArguments
105
+
106
+ training_args = TrainingArguments(
107
+ output_dir="./results",
108
+ evaluation_strategy="epoch",
109
+ learning_rate=2e-5,
110
+ per_device_train_batch_size=16,
111
+ per_device_eval_batch_size=16,
112
+ num_train_epochs=3,
113
+ weight_decay=0.01,
114
+ )
115
 
116
+ trainer = Trainer(
117
+ model=model,
118
+ args=training_args,
119
+ train_dataset=tokenized_datasets,
120
+ eval_dataset=tokenized_datasets,
121
+ )
122
 
123
+ # Train the model
124
+ trainer.train()
125
+
126
+ st.success("Fine-tuning completed successfully!")
127
  except Exception as e:
128
  st.error(f"Error during fine-tuning: {e}")
129
  else: