Spaces:
Running
Running
import nltk | |
import re | |
from nltk.tokenize import sent_tokenize, word_tokenize | |
from nltk.corpus import stopwords | |
from nltk.probability import FreqDist | |
from flask import Flask, request, render_template | |
app = Flask(__name__) | |
nltk.download('stopwords') | |
stop_words = set(stopwords.words("english")) | |
def index(): | |
return render_template('index.html') | |
def detect_ai(): | |
content = request.form['content'] | |
# Tokenize the content into sentences | |
sentences = sent_tokenize(content) | |
# Check for the number of sentences | |
if len(sentences) < 3: | |
return "This content is likely generated by AI" | |
# Tokenize each sentence into words | |
words = [word_tokenize(sentence) for sentence in sentences] | |
# Remove stop words and special characters | |
words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words] | |
# Create a frequency distribution of the words | |
fdist = FreqDist([word for sentence in words for word in sentence]) | |
# Check the average frequency of words in the content | |
avg_freq = sum(fdist.values())/len(fdist) | |
if avg_freq < 2: | |
return "This content is likely generated by AI" | |
# Check for the use of common regex patterns | |
regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b'] | |
for pattern in regex_patterns: | |
if re.search(pattern, content): | |
return "This content is likely generated by AI" | |
return "This content is likely not generated by AI" | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0',port=7860) | |