Spaces:
Sleeping
Sleeping
| import warnings | |
| import torchvision.transforms as transforms | |
| from google_img_source_search import ReverseImageSearcher | |
| # from src.images.CNN_model_classifier import predict_cnn | |
| # from src.images.diffusion_model_classifier import ( | |
| # ImageClassifier, | |
| # predict_single_image, | |
| # ) | |
| warnings.simplefilter( | |
| action="ignore", | |
| category=FutureWarning, | |
| ) # disable FutureWarning | |
| import gradio as gr # noqa: E402 | |
| from transformers import ( # noqa: E402 | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| pipeline, | |
| ) | |
| from src.texts.MAGE.deployment import ( # noqa: E402 | |
| detect, | |
| preprocess, | |
| ) | |
| from src.texts.PASTED.pasted_lexicon import Detector # noqa: E402 | |
| from src.texts.Search_Text.search import ( # noqa: E402 | |
| get_important_sentences, | |
| get_keywords, | |
| is_human_written, | |
| ) | |
| from src.images.Search_Image.search import ( | |
| compare_images, | |
| get_image_from_path, | |
| get_image_from_url, | |
| ) | |
| def convert_score_range(score): | |
| """ | |
| Converts a score from the range [0, 1] to [-1, 1]. | |
| Args: | |
| score: The original score in the range [0, 1]. | |
| Returns: | |
| The converted score in the range [-1, 1]. | |
| """ | |
| return 2 * score - 1 | |
| def generate_highlighted_text(text_scores): | |
| """ | |
| Generates a highlighted text string based on the given text and scores. | |
| Args: | |
| text_scores: A list of tuples, where each tuple contains a text | |
| segment and its score. | |
| Returns: | |
| A string of HTML code with highlighted text. | |
| """ | |
| highlighted_text = "" | |
| for text, score in text_scores: | |
| # Map score to a color using a gradient | |
| color = f"rgba(255, 0, 0, {1 - score})" # Red to green gradient | |
| highlighted_text += ( | |
| f"<span style='background-color: {color}'>{text}</span>" # noqa | |
| ) | |
| return highlighted_text | |
| def separate_characters_with_mask(text, mask): | |
| """Separates characters in a string and pairs them with a mask sign. | |
| Args: | |
| text: The input string. | |
| Returns: | |
| A list of tuples, where each tuple contains a character and a mask. | |
| """ | |
| return [(char, mask) for char in text] | |
| def detect_ai_text(model_name, search_engine, text): | |
| if search_engine is True: | |
| keywords = get_keywords(text) | |
| important_sentences = get_important_sentences(text, keywords) | |
| predictions = is_human_written(important_sentences[0]) | |
| print("keywords: ", keywords) | |
| print("important_sentences: ", important_sentences) | |
| print("predictions: ", predictions) | |
| if predictions == -1: | |
| caption = "[Found exact match] " | |
| text_scores = list(zip([caption, text], [0, predictions])) | |
| print("text_scores: ", text_scores) | |
| return text_scores | |
| if model_name == "SimLLM": | |
| tokenize_input = SimLLM_tokenizer(text, return_tensors="pt") | |
| outputs = SimLLM_model(**tokenize_input) | |
| predictions = outputs.logits.argmax(dim=-1).item() | |
| if predictions == 0: | |
| predictions = "human-written" | |
| else: | |
| predictions = "machine-generated" | |
| elif model_name == "MAGE": | |
| processed_text = preprocess(text) | |
| predictions = detect( | |
| processed_text, | |
| MAGE_tokenizer, | |
| MAGE_model, | |
| device, | |
| ) | |
| elif model_name == "chatgpt-detector-roberta": | |
| predictions = roberta_pipeline_en(text)[0]["label"] | |
| if predictions == "Human": | |
| predictions = "human-written" | |
| else: # ChatGPT | |
| predictions = "machine-generated" | |
| elif model_name == "PASTED-Lexical": | |
| predictions = detector(text) | |
| if model_name != "PASTED-Lexical": | |
| text_scores = list(zip([text], [predictions])) | |
| else: | |
| text_scores = [] | |
| for text, score in predictions: | |
| new_score = convert_score_range(score) # normalize score | |
| text_scores.append((text, new_score)) | |
| return text_scores | |
| diffusion_model_path = ( | |
| "src/images/Diffusion/model_checkpoints/" | |
| "image-classifier-step=7007-val_loss=0.09.ckpt" | |
| ) | |
| cnn_model_path = "src/images/CNN/model_checkpoints/blur_jpg_prob0.5.pth" | |
| def detect_ai_image(input_image_path, search_engine): | |
| # if search_engine is True: | |
| # Search image | |
| rev_img_searcher = ReverseImageSearcher() | |
| search_items = rev_img_searcher.search_by_file(input_image_path) | |
| min_result_difference = 5000 | |
| result_image_url = "" | |
| input_image = get_image_from_path(input_image_path) | |
| for search_item in search_items: | |
| # print(f'Title: {search_item.page_title}') | |
| # print(f'Site: {search_item.page_url}') | |
| # print(f'Img: {search_item.image_url}\n') | |
| # Compare each search result image with the input image | |
| result_image = get_image_from_url(search_item.image_url) | |
| # input_image = get_image_from_url(search_item.image_url) | |
| result_difference = compare_images(result_image, input_image) | |
| print(f"Difference with search result: {result_difference}") | |
| print(f"Result image url: {search_item.page_url}\n") | |
| if min_result_difference > result_difference: | |
| min_result_difference = result_difference | |
| result_image_url = search_item.image_url | |
| result_page_url = search_item.page_url | |
| if result_difference == 0: | |
| break | |
| if min_result_difference == 0: | |
| result = f"<h1>Input image is LIKELY SIMILAR to image from:</h1>"\ | |
| f"<ul>"\ | |
| f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\ | |
| f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\ | |
| f"<li>\nDifference score: {min_result_difference}</li>"\ | |
| f"</ul>" | |
| elif 10 > min_result_difference > 0: | |
| result = f"<h1>Input image is potentially a VARIATRION from:</h1>"\ | |
| f"<ul>"\ | |
| f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\ | |
| f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\ | |
| f"<li>\nDifference score: {min_result_difference}</li>"\ | |
| f"</ul>" | |
| elif min_result_difference < 5000: | |
| result = f"<h1>Input image is not similar to any search results.</h1>"\ | |
| f"<ul>"\ | |
| f'<li>\nPage URL: <a href="url">{result_page_url}</a></li>'\ | |
| f'<li>\nImage URL: <a href="url">{result_image_url}</a></li>'\ | |
| f"<li>\nDifference score: {min_result_difference}</li>"\ | |
| f"</ul>" | |
| else: | |
| result = f"<h1>No search result found.</h1>"\ | |
| return result | |
| # def get_prediction_diffusion(image): | |
| # model = ImageClassifier.load_from_checkpoint(diffusion_model_path) | |
| # prediction = predict_single_image(image, model) | |
| # return (prediction >= 0.5, prediction) | |
| # def get_prediction_cnn(image): | |
| # prediction = predict_cnn(image, cnn_model_path) | |
| # return (prediction >= 0.5, prediction) | |
| # # Define the transformations for the image | |
| # transform = transforms.Compose( | |
| # [ | |
| # transforms.Resize((224, 224)), # Image size expected by ResNet50 | |
| # transforms.ToTensor(), | |
| # transforms.Normalize( | |
| # mean=[0.485, 0.456, 0.406], | |
| # std=[0.229, 0.224, 0.225], | |
| # ), | |
| # ], | |
| # ) | |
| # image_tensor = transform(inp) | |
| # pred_diff, prob_diff = get_prediction_diffusion(image_tensor) | |
| # pred_cnn, prob_cnn = get_prediction_cnn(image_tensor) | |
| # verdict = ( | |
| # "AI Generated" if (pred_diff or pred_cnn) else "No GenAI detected" | |
| # ) | |
| # return ( | |
| # f"<h1>{verdict}</h1>" | |
| # f"<ul>" | |
| # f"<li>Diffusion detection score: {prob_diff:.1%} " | |
| # f"{'(MATCH)' if pred_diff else ''}</li>" | |
| # f"<li>CNN detection score: {prob_cnn:.1%} " | |
| # f"{'(MATCH)' if pred_cnn else ''}</li>" | |
| # f"</ul>" | |
| # ) | |
| # Define GPUs | |
| device = "cpu" # use 'cuda:0' if GPU is available | |
| # init MAGE | |
| model_dir = "yaful/MAGE" # model in huggingface | |
| MAGE_tokenizer = AutoTokenizer.from_pretrained(model_dir) | |
| MAGE_model = AutoModelForSequenceClassification.from_pretrained(model_dir).to( | |
| device, | |
| ) | |
| # init chatgpt-detector-roberta | |
| model_dir = "Hello-SimpleAI/chatgpt-detector-roberta" # model in huggingface | |
| roberta_pipeline_en = pipeline(task="text-classification", model=model_dir) | |
| # init PASTED | |
| model_dir = "linzw/PASTED-Lexical" | |
| detector = Detector(model_dir, device) | |
| # init SimLLM | |
| model_path = "./models/single_model_detector" | |
| SimLLM_tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| SimLLM_model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| # Init variable for UI | |
| title = """ | |
| <center> | |
| <h1> AI-generated content detection </h1> | |
| <b> Demo by NICT & Tokyo Techies <b> | |
| </center> | |
| """ | |
| examples = [ | |
| [ | |
| "SimLLM", | |
| False, | |
| """\ | |
| The BBC's long-running consumer rights series Watchdog is to end as a \ | |
| standalone programme, instead becoming part of The One Show. Watchdog \ | |
| began in 1980 as a strand of Nationwide, but proved so popular it \ | |
| became a separate programme in 1985. Co-host Steph McGovern has moved \ | |
| to Channel 4, but Matt Allwright and Nikki Fox will stay to front the \ | |
| new strand. The BBC said they would investigate viewer complaints all \ | |
| year round rather than for two series a year. | |
| """, | |
| ], | |
| [ | |
| "chatgpt-detector-roberta", | |
| False, | |
| """\ | |
| Artificial intelligence (AI) is the science of making machines \ | |
| intelligent. It enables computers to learn from data, recognize \ | |
| patterns, and make decisions. AI powers many technologies we use \ | |
| daily, from voice assistants to self-driving cars. It's rapidly \ | |
| evolving, promising to revolutionize various industries and reshape \ | |
| the future.""", | |
| ], | |
| ] | |
| model_remark = """<left> | |
| Model sources: | |
| <a href="https://github.com/Tokyo-Techies/prj-nict-ai-content-detection">SimLLM</a>, | |
| <a href="https://github.com/yafuly/MAGE">MAGE</a>, | |
| <a href="https://huggingface.co/Hello-SimpleAI/chatgpt-detector-roberta">chatgpt-detector-roberta</a>, | |
| <a href="https://github.com/Linzwcs/PASTED">PASTED-Lexical</a>. | |
| </left> | |
| """ # noqa: E501 | |
| image_samples = [ | |
| ["src/images/samples/fake_dalle.jpg", "Generated (Dall-E)"], | |
| ["src/images/samples/fake_midjourney.png", "Generated (MidJourney)"], | |
| ["src/images/samples/fake_stable.jpg", "Generated (Stable Diffusion)"], | |
| ["src/images/samples/fake_cnn.png", "Generated (GAN)"], | |
| ["src/images/samples/real.png", "Organic"], | |
| [ | |
| "https://p.potaufeu.asahi.com/1831-p/picture/27695628/89644a996fdd0cfc9e06398c64320fbe.jpg", # noqa E501 | |
| "Internet GenAI", | |
| ], | |
| ] | |
| image_samples_path = [i[0] for i in image_samples] | |
| # UI | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.HTML(title) | |
| with gr.Row(): | |
| with gr.Tab("Text"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| model = gr.Dropdown( | |
| [ | |
| "SimLLM", | |
| "MAGE", | |
| "chatgpt-detector-roberta", | |
| "PASTED-Lexical", | |
| ], | |
| label="Detection model", | |
| ) | |
| search_engine = gr.Checkbox(label="Use search engine") | |
| gr.HTML(model_remark) | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Input text", | |
| placeholder="Enter text here...", | |
| lines=5, | |
| ) | |
| output = gr.HighlightedText( | |
| label="Detection results", | |
| combine_adjacent=True, | |
| show_legend=True, | |
| color_map={ | |
| "human-written": "#7d58cf", | |
| "machine-generated": "#e34242", | |
| }, | |
| ) | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[model, search_engine, text_input], | |
| ) | |
| model.change( | |
| detect_ai_text, | |
| inputs=[model, search_engine, text_input], | |
| outputs=output, | |
| ) | |
| search_engine.change( | |
| detect_ai_text, | |
| inputs=[model, search_engine, text_input], | |
| outputs=output, | |
| ) | |
| text_input.change( | |
| detect_ai_text, | |
| inputs=[model, search_engine, text_input], | |
| outputs=output, | |
| ) | |
| with gr.Tab("Images"): | |
| with gr.Row(): | |
| input_image = gr.Image(type="filepath") | |
| with gr.Column(): | |
| output_image = gr.Markdown(height=400) | |
| gr.Examples( | |
| examples=image_samples, | |
| inputs=input_image, | |
| ) | |
| input_image.change( | |
| detect_ai_image, | |
| inputs=input_image, | |
| outputs=output_image, | |
| ) | |
| # demo.launch(share=True) | |
| demo.launch(allowed_paths=image_samples_path, share=True) | |