Spaces:
Running
Running
Commit
·
7609276
1
Parent(s):
15040ef
app.py
CHANGED
@@ -1,27 +1,43 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
4 |
|
5 |
app = Flask(__name__)
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
if __name__ == '__main__':
|
27 |
-
app.run(host=
|
|
|
1 |
+
import nltk
|
2 |
+
import re
|
3 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
4 |
+
from nltk.corpus import stopwords
|
5 |
+
from nltk.probability import FreqDist
|
6 |
+
from flask import Flask, request, render_template
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
|
10 |
+
nltk.download('stopwords')
|
11 |
+
stop_words = set(stopwords.words("english"))
|
12 |
+
|
13 |
+
@app.route('/')
|
14 |
+
def index():
|
15 |
+
return render_template('index.html')
|
16 |
+
|
17 |
+
@app.route('/detect', methods=['POST'])
|
18 |
+
def detect_ai():
|
19 |
+
content = request.form['content']
|
20 |
+
# Tokenize the content into sentences
|
21 |
+
sentences = sent_tokenize(content)
|
22 |
+
# Check for the number of sentences
|
23 |
+
if len(sentences) < 3:
|
24 |
+
return "This content is likely generated by AI"
|
25 |
+
# Tokenize each sentence into words
|
26 |
+
words = [word_tokenize(sentence) for sentence in sentences]
|
27 |
+
# Remove stop words and special characters
|
28 |
+
words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words]
|
29 |
+
# Create a frequency distribution of the words
|
30 |
+
fdist = FreqDist([word for sentence in words for word in sentence])
|
31 |
+
# Check the average frequency of words in the content
|
32 |
+
avg_freq = sum(fdist.values())/len(fdist)
|
33 |
+
if avg_freq < 2:
|
34 |
+
return "This content is likely generated by AI"
|
35 |
+
# Check for the use of common regex patterns
|
36 |
+
regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b']
|
37 |
+
for pattern in regex_patterns:
|
38 |
+
if re.search(pattern, content):
|
39 |
+
return "This content is likely generated by AI"
|
40 |
+
return "This content is likely not generated by AI"
|
41 |
|
42 |
if __name__ == '__main__':
|
43 |
+
app.run(host='0.0.0.0',port=7860)
|