Spaces:

imseldrith
/

AI-Rephraser

Running

App Files Files Community

imseldrith commited on Feb 8, 2023

Commit

7609276

1 Parent(s): 15040ef

a

Browse files

Files changed (1) hide show

app.py +38 -22

app.py CHANGED Viewed

@@ -1,27 +1,43 @@
-from flask import Flask, request
-from transformers import GPT2Tokenizer, GPT2LMHeadModel
 app = Flask(__name__)
-# Load the GPT-2 model and tokenizer
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-model = GPT2LMHeadModel.from_pretrained('gpt2')
-app.route('/paraphrase', methods=['POST'])
-def paraphrase():
-    # Get the input text from the request
-    input_text = request.form.get('input_text')
-    # Encode the input text using the tokenizer
-    input_ids = tokenizer.encode(input_text, return_tensors='pt')
-    # Generate the paraphrased text using the model
-    output = model.generate(input_ids, max_length=1024, top_k=5, top_p=0.95, num_return_sequences=1)
-    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Return the paraphrased text
-    return output_text
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

+import nltk
+import re
+from nltk.tokenize import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
+from nltk.probability import FreqDist
+from flask import Flask, request, render_template
 app = Flask(__name__)
+nltk.download('stopwords')
+stop_words = set(stopwords.words("english"))
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/detect', methods=['POST'])
+def detect_ai():
+    content = request.form['content']
+    # Tokenize the content into sentences
+    sentences = sent_tokenize(content)
+    # Check for the number of sentences
+    if len(sentences) < 3:
+        return "This content is likely generated by AI"
+    # Tokenize each sentence into words
+    words = [word_tokenize(sentence) for sentence in sentences]
+    # Remove stop words and special characters
+    words = [[word.lower() for word in sentence if word.isalpha() and word.lower() not in stop_words] for sentence in words]
+    # Create a frequency distribution of the words
+    fdist = FreqDist([word for sentence in words for word in sentence])
+    # Check the average frequency of words in the content
+    avg_freq = sum(fdist.values())/len(fdist)
+    if avg_freq < 2:
+        return "This content is likely generated by AI"
+    # Check for the use of common regex patterns
+    regex_patterns = [r'\b\w{5,}\b', r'\b\d{1,}\b', r'\b\w{5,}\b \b\w{5,}\b']
+    for pattern in regex_patterns:
+        if re.search(pattern, content):
+            return "This content is likely generated by AI"
+    return "This content is likely not generated by AI"
 if __name__ == '__main__':
+    app.run(host='0.0.0.0',port=7860)