imseldrith commited on
Commit
7609276
·
1 Parent(s): 15040ef
Files changed (1) hide show
  1. app.py +38 -22
app.py CHANGED
@@ -1,27 +1,43 @@
1
- from flask import Flask, request
2
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
3
-
 
 
 
4
 
5
  app = Flask(__name__)
6
 
7
- # Load the GPT-2 model and tokenizer
8
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
9
- model = GPT2LMHeadModel.from_pretrained('gpt2')
10
-
11
- app.route('/paraphrase', methods=['POST'])
12
- def paraphrase():
13
- # Get the input text from the request
14
- input_text = request.form.get('input_text')
15
-
16
- # Encode the input text using the tokenizer
17
- input_ids = tokenizer.encode(input_text, return_tensors='pt')
18
-
19
- # Generate the paraphrased text using the model
20
- output = model.generate(input_ids, max_length=1024, top_k=5, top_p=0.95, num_return_sequences=1)
21
- output_text = tokenizer.decode(output[0], skip_special_tokens=True)
22
-
23
- # Return the paraphrased text
24
- return output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  if __name__ == '__main__':
27
- app.run(host="0.0.0.0", port=7860)
 
1
+ import nltk
2
+ import re
3
+ from nltk.tokenize import sent_tokenize, word_tokenize
4
+ from nltk.corpus import stopwords
5
+ from nltk.probability import FreqDist
6
+ from flask import Flask, request, render_template
7
 
8
  app = Flask(__name__)
9
 
10
+ nltk.download('stopwords')
11
+ stop_words = set(stopwords.words("english"))
12
+
13
+ @app.route('/')
14
+ def index():
15
+ return render_template('index.html')
16
+
17
+ @app.route('/detect', methods=['POST'])
18
+ def detect_ai():
19
+ content = request.form['content']
20
+ # Tokenize the content into sentences
21
+ sentences = sent_tokenize(content)
22
+ # Check for the number of sentences
23
+ if len(sentences) < 3:
24
+ return "This content is likely generated by AI"
25
+ # Tokenize each sentence into words
26
+ words = [word_tokenize(sentence) for sentence in sentences]
27
+ # Remove stop words and special characters
28
+ words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words]
29
+ # Create a frequency distribution of the words
30
+ fdist = FreqDist([word for sentence in words for word in sentence])
31
+ # Check the average frequency of words in the content
32
+ avg_freq = sum(fdist.values())/len(fdist)
33
+ if avg_freq < 2:
34
+ return "This content is likely generated by AI"
35
+ # Check for the use of common regex patterns
36
+ regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b']
37
+ for pattern in regex_patterns:
38
+ if re.search(pattern, content):
39
+ return "This content is likely generated by AI"
40
+ return "This content is likely not generated by AI"
41
 
42
  if __name__ == '__main__':
43
+ app.run(host='0.0.0.0',port=7860)