import nltk import re from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from nltk.probability import FreqDist from flask import Flask, request, render_template app = Flask(__name__) nltk.download('stopwords') stop_words = set(stopwords.words("english")) @app.route('/') def index(): return render_template('index.html') @app.route('/detect', methods=['POST']) def detect_ai(): content = request.form['content'] # Tokenize the content into sentences sentences = sent_tokenize(content) # Check for the number of sentences if len(sentences) < 3: return "This content is likely generated by AI" # Tokenize each sentence into words words = [word_tokenize(sentence) for sentence in sentences] # Remove stop words and special characters words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words] # Create a frequency distribution of the words fdist = FreqDist([word for sentence in words for word in sentence]) # Check the average frequency of words in the content avg_freq = sum(fdist.values())/len(fdist) if avg_freq < 2: return "This content is likely generated by AI" # Check for the use of common regex patterns regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b'] for pattern in regex_patterns: if re.search(pattern, content): return "This content is likely generated by AI" return "This content is likely not generated by AI" if __name__ == '__main__': app.run(host='0.0.0.0',port=7860)