File size: 5,534 Bytes
c954503
e511bc5
bbbca4f
e511bc5
c954503
e511bc5
 
bbbca4f
e511bc5
 
 
 
 
 
 
bbbca4f
e511bc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a46dba
e511bc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbbca4f
 
 
 
 
 
 
 
 
 
 
 
e511bc5
bbbca4f
 
 
 
 
 
e511bc5
bbbca4f
 
 
 
e511bc5
 
6a09dd7
e511bc5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
import requests
import re
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd
from datasets import Dataset
from huggingface_hub import hf_api

# Title and description
st.title("OSINT Tool 🏢")
st.markdown("""
    This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
    It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
    """)

# Sidebar for navigation
st.sidebar.title("Navigation")
app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])

# GitHub Repository Analysis
if app_mode == "GitHub Repository Analysis":
    st.header("GitHub Repository Analysis")
    repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface")
    repo_name = st.text_input("Enter GitHub Repository Name", "transformers")
    
    if st.button("Analyze Repository"):
        if repo_owner and repo_name:
            try:
                response = requests.get(f"https://api.github.com/repos/{repo_owner}/{repo_name}")
                data = response.json()
                
                if response.status_code == 200:
                    st.subheader("Repository Details")
                    st.write(f"**Name**: {data['name']}")
                    st.write(f"**Owner**: {data['owner']['login']}")
                    st.write(f"**Stars**: {data['stargazers_count']}")
                    st.write(f"**Forks**: {data['forks_count']}")
                    st.write(f"**Language**: {data['language']}")
                    st.write(f"**Description**: {data['description']}")
                else:
                    st.error(f"Error: {data.get('message', 'Something went wrong with the request')}")
            except Exception as e:
                st.error(f"Error occurred: {e}")
        else:
            st.warning("Please enter both repository owner and name.")

# URL Title Fetcher
elif app_mode == "URL Title Fetcher":
    st.header("URL Title Fetcher")
    url = st.text_input("Enter URL", "https://www.huggingface.co")
    
    if st.button("Fetch Title"):
        if url:
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    # Try to extract the title from the HTML
                    match = re.search('<title>(.*?)</title>', response.text)
                    if match:
                        title = match.group(1)
                        st.write(f"**Page Title**: {title}")
                    else:
                        st.warning("Title tag not found in the page")
                else:
                    st.error(f"Failed to retrieve the page. Status code: {response.status_code}")
            except Exception as e:
                st.error(f"Error occurred: {e}")
        else:
            st.warning("Please enter a valid URL.")

# Dataset Upload & Fine-Tuning
elif app_mode == "Dataset Upload & Fine-Tuning":
    st.header("Dataset Upload & Fine-Tuning")
    
    uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")
    
    if uploaded_file is not None:
        # Load the CSV into a pandas DataFrame
        df = pd.read_csv(uploaded_file)
        
        # Display dataset preview
        st.subheader("Dataset Preview")
        st.write(df.head())

        # Convert CSV to Hugging Face dataset format
        dataset = Dataset.from_pandas(df)
        
        model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"])
        
        if st.button("Fine-tune Model"):
            if model_name:
                try:
                    model = AutoModelForSequenceClassification.from_pretrained(model_name)
                    tokenizer = AutoTokenizer.from_pretrained(model_name)
                    
                    # Prepare the dataset
                    def preprocess_function(examples):
                        return tokenizer(examples['text'], truncation=True, padding=True)
                    
                    tokenized_datasets = dataset.map(preprocess_function, batched=True)
                    
                    # Fine-tuning setup (using Hugging Face Trainer for a complete setup)
                    from transformers import Trainer, TrainingArguments

                    training_args = TrainingArguments(
                        output_dir="./results",
                        evaluation_strategy="epoch",
                        learning_rate=2e-5,
                        per_device_train_batch_size=16,
                        per_device_eval_batch_size=16,
                        num_train_epochs=3,
                        weight_decay=0.01,
                    )
                    
                    trainer = Trainer(
                        model=model,
                        args=training_args,
                        train_dataset=tokenized_datasets,
                        eval_dataset=tokenized_datasets,
                    )
                    
                    # Train the model
                    trainer.train()

                    st.success("Fine-tuning completed successfully!")
                except Exception as e:
                    st.error(f"Error during fine-tuning: {e}")
            else:
                st.warning("Please select a model for fine-tuning.")
    
    else:
        st.warning("Please upload a dataset.")