AI-Rephraser / app.py
imseldrith's picture
a
7609276
raw
history blame
1.63 kB
import nltk
import re
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from flask import Flask, request, render_template
app = Flask(__name__)
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))
@app.route('/')
def index():
return render_template('index.html')
@app.route('/detect', methods=['POST'])
def detect_ai():
content = request.form['content']
# Tokenize the content into sentences
sentences = sent_tokenize(content)
# Check for the number of sentences
if len(sentences) < 3:
return "This content is likely generated by AI"
# Tokenize each sentence into words
words = [word_tokenize(sentence) for sentence in sentences]
# Remove stop words and special characters
words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words]
# Create a frequency distribution of the words
fdist = FreqDist([word for sentence in words for word in sentence])
# Check the average frequency of words in the content
avg_freq = sum(fdist.values())/len(fdist)
if avg_freq < 2:
return "This content is likely generated by AI"
# Check for the use of common regex patterns
regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b']
for pattern in regex_patterns:
if re.search(pattern, content):
return "This content is likely generated by AI"
return "This content is likely not generated by AI"
if __name__ == '__main__':
app.run(host='0.0.0.0',port=7860)