Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- app.py +180 -0
- best.pt +3 -0
- requirements.txt +29 -0
- utils/__init__.py +0 -0
- utils/detector.py +95 -0
- utils/medical_agent.py +126 -0
- utils/sign_generator.py +10 -0
- utils/speech.py +29 -0
- utils/translator.py +33 -0
app.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import base64
|
| 4 |
+
import io
|
| 5 |
+
import cv2
|
| 6 |
+
import numpy as np
|
| 7 |
+
import tempfile
|
| 8 |
+
import os
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
from utils.detector import ArabicSignDetector
|
| 13 |
+
from utils.translator import MedicalTranslator
|
| 14 |
+
from utils.medical_agent import MedicalAgent
|
| 15 |
+
from utils.speech import SpeechProcessor
|
| 16 |
+
from utils.sign_generator import SignGenerator
|
| 17 |
+
|
| 18 |
+
logging.basicConfig(level=logging.INFO)
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
app = Flask(__name__)
|
| 22 |
+
CORS(app)
|
| 23 |
+
|
| 24 |
+
# Global instances
|
| 25 |
+
detector = None
|
| 26 |
+
translator = None
|
| 27 |
+
medical_agent = None
|
| 28 |
+
speech_processor = None
|
| 29 |
+
sign_generator = None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def initialize_models():
|
| 33 |
+
global detector, translator, medical_agent, speech_processor, sign_generator
|
| 34 |
+
|
| 35 |
+
logger.info("🔄 Initializing models...")
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
detector = ArabicSignDetector()
|
| 39 |
+
translator = MedicalTranslator()
|
| 40 |
+
medical_agent = MedicalAgent()
|
| 41 |
+
speech_processor = SpeechProcessor()
|
| 42 |
+
sign_generator = SignGenerator()
|
| 43 |
+
|
| 44 |
+
logger.info("🎉 All models initialized!")
|
| 45 |
+
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.error(f"❌ Initialization failed: {e}")
|
| 48 |
+
raise
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@app.route('/')
|
| 52 |
+
def index():
|
| 53 |
+
return "Medical Agent API is running!"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@app.route('/health')
|
| 57 |
+
def health_check():
|
| 58 |
+
return jsonify({"status": "healthy"})
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@app.route('/api/process-sign', methods=['POST'])
|
| 62 |
+
def process_sign_language():
|
| 63 |
+
try:
|
| 64 |
+
data = request.json
|
| 65 |
+
image_data = data.get('image')
|
| 66 |
+
|
| 67 |
+
if image_data.startswith('data:image'):
|
| 68 |
+
image_data = image_data.split(',')[1]
|
| 69 |
+
|
| 70 |
+
image_bytes = base64.b64decode(image_data)
|
| 71 |
+
image = Image.open(io.BytesIO(image_bytes))
|
| 72 |
+
image_np = np.array(image)
|
| 73 |
+
|
| 74 |
+
if len(image_np.shape) == 3 and image_np.shape[2] == 3:
|
| 75 |
+
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
|
| 76 |
+
|
| 77 |
+
# Detect Arabic letters (NO medical mapping)
|
| 78 |
+
detection_result = detector.detect_letters(image_np)
|
| 79 |
+
|
| 80 |
+
if not detection_result['success']:
|
| 81 |
+
return jsonify({
|
| 82 |
+
'error': 'No letters detected',
|
| 83 |
+
'arabic_text': '',
|
| 84 |
+
'english_text': ''
|
| 85 |
+
})
|
| 86 |
+
|
| 87 |
+
# Get the actual Arabic text from letters
|
| 88 |
+
arabic_text = detection_result['arabic_text']
|
| 89 |
+
|
| 90 |
+
# Translate to English for medical agent
|
| 91 |
+
english_text = translator.ar_to_en(arabic_text)
|
| 92 |
+
|
| 93 |
+
# Process with medical agent (agent understands actual text)
|
| 94 |
+
agent_response = medical_agent.process_input(
|
| 95 |
+
english_text,
|
| 96 |
+
session_id=data.get('session_id', 'default')
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Translate response back to Arabic
|
| 100 |
+
arabic_response = translator.en_to_ar(agent_response['response'])
|
| 101 |
+
|
| 102 |
+
# Generate sign animation
|
| 103 |
+
sign_data = sign_generator.text_to_sign(arabic_response)
|
| 104 |
+
|
| 105 |
+
return jsonify({
|
| 106 |
+
'success': True,
|
| 107 |
+
'detected_letters': detection_result['letters'],
|
| 108 |
+
'arabic_text': arabic_text,
|
| 109 |
+
'english_translation': english_text,
|
| 110 |
+
'agent_response_english': agent_response['response'],
|
| 111 |
+
'agent_response_arabic': arabic_response,
|
| 112 |
+
'sign_data': sign_data,
|
| 113 |
+
'question_count': agent_response.get('question_count', 0),
|
| 114 |
+
'conversation_state': agent_response.get('state', 'questioning')
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.error(f"Error in process-sign: {e}")
|
| 119 |
+
return jsonify({'error': str(e)}), 500
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@app.route('/api/process-audio', methods=['POST'])
|
| 123 |
+
def process_audio():
|
| 124 |
+
try:
|
| 125 |
+
data = request.json
|
| 126 |
+
audio_data = data.get('audio')
|
| 127 |
+
|
| 128 |
+
if audio_data.startswith('data:audio'):
|
| 129 |
+
audio_data = audio_data.split(',')[1]
|
| 130 |
+
|
| 131 |
+
audio_bytes = base64.b64decode(audio_data)
|
| 132 |
+
|
| 133 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as f:
|
| 134 |
+
f.write(audio_bytes)
|
| 135 |
+
audio_path = f.name
|
| 136 |
+
|
| 137 |
+
doctor_text = speech_processor.speech_to_text(audio_path)
|
| 138 |
+
patient_question = medical_agent.process_doctor_input(doctor_text)
|
| 139 |
+
arabic_question = translator.en_to_ar(patient_question)
|
| 140 |
+
sign_data = sign_generator.text_to_sign(arabic_question)
|
| 141 |
+
|
| 142 |
+
os.unlink(audio_path)
|
| 143 |
+
|
| 144 |
+
return jsonify({
|
| 145 |
+
'success': True,
|
| 146 |
+
'doctor_text': doctor_text,
|
| 147 |
+
'patient_question_english': patient_question,
|
| 148 |
+
'patient_question_arabic': arabic_question,
|
| 149 |
+
'sign_data': sign_data
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
return jsonify({'error': str(e)}), 500
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
@app.route('/api/text-to-speech', methods=['POST'])
|
| 157 |
+
def text_to_speech():
|
| 158 |
+
try:
|
| 159 |
+
data = request.json
|
| 160 |
+
text = data.get('text')
|
| 161 |
+
|
| 162 |
+
audio_path = speech_processor.text_to_speech(text, "summary")
|
| 163 |
+
|
| 164 |
+
with open(audio_path, 'rb') as f:
|
| 165 |
+
audio_bytes = f.read()
|
| 166 |
+
|
| 167 |
+
audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
|
| 168 |
+
|
| 169 |
+
return jsonify({
|
| 170 |
+
'success': True,
|
| 171 |
+
'audio': f"data:audio/wav;base64,{audio_b64}"
|
| 172 |
+
})
|
| 173 |
+
|
| 174 |
+
except Exception as e:
|
| 175 |
+
return jsonify({'error': str(e)}), 500
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
if __name__ == '__main__':
|
| 179 |
+
initialize_models()
|
| 180 |
+
app.run(host='0.0.0.0', port=7860, debug=True)
|
best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1126bf72b6b69eb9e608ad6132a9a9411c37854e1b08f5bb6ccbe8f6f0418c0
|
| 3 |
+
size 52045963
|
requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core
|
| 2 |
+
torch>=2.0.0
|
| 3 |
+
torchvision>=0.15.0
|
| 4 |
+
ultralytics>=8.0.0
|
| 5 |
+
transformers>=4.30.0
|
| 6 |
+
accelerate>=0.20.0
|
| 7 |
+
|
| 8 |
+
# Audio
|
| 9 |
+
speechbrain>=0.5.15
|
| 10 |
+
librosa>=0.10.0
|
| 11 |
+
pydub>=0.25.1
|
| 12 |
+
soundfile>=0.12.0
|
| 13 |
+
|
| 14 |
+
# Vision
|
| 15 |
+
opencv-python>=4.8.0
|
| 16 |
+
mediapipe>=0.10.0
|
| 17 |
+
Pillow>=10.0.0
|
| 18 |
+
|
| 19 |
+
# Web & API
|
| 20 |
+
flask>=2.3.0
|
| 21 |
+
flask-cors>=4.0.0
|
| 22 |
+
flask-socketio>=5.3.0
|
| 23 |
+
gradio>=3.40.0
|
| 24 |
+
requests>=2.31.0
|
| 25 |
+
|
| 26 |
+
# Utilities
|
| 27 |
+
numpy>=1.24.0
|
| 28 |
+
pydantic>=2.0.0
|
| 29 |
+
python-multipart>=0.0.6
|
utils/__init__.py
ADDED
|
File without changes
|
utils/detector.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
from ultralytics import YOLO
|
| 4 |
+
import torch
|
| 5 |
+
from typing import Dict, List, Any
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ArabicSignDetector:
|
| 9 |
+
def __init__(self, model_path: str = 'best.pt'):
|
| 10 |
+
self.model = YOLO(model_path)
|
| 11 |
+
self.confidence_threshold = 0.5
|
| 12 |
+
|
| 13 |
+
def detect_letters(self, image: np.ndarray) -> Dict[str, Any]:
|
| 14 |
+
"""Detect Arabic letters and form text (NO medical mapping)"""
|
| 15 |
+
try:
|
| 16 |
+
results = self.model(image, verbose=False)
|
| 17 |
+
|
| 18 |
+
detected_letters = []
|
| 19 |
+
confidences = []
|
| 20 |
+
bboxes = []
|
| 21 |
+
|
| 22 |
+
for result in results:
|
| 23 |
+
if hasattr(result, 'boxes') and result.boxes is not None:
|
| 24 |
+
boxes = result.boxes
|
| 25 |
+
for i in range(len(boxes.cls)):
|
| 26 |
+
class_id = int(boxes.cls[i])
|
| 27 |
+
confidence = float(boxes.conf[i])
|
| 28 |
+
|
| 29 |
+
if confidence > self.confidence_threshold:
|
| 30 |
+
letter = self.model.names.get(class_id, "")
|
| 31 |
+
detected_letters.append(letter)
|
| 32 |
+
confidences.append(confidence)
|
| 33 |
+
|
| 34 |
+
# Get bounding box coordinates
|
| 35 |
+
if hasattr(boxes, 'xyxy'):
|
| 36 |
+
bbox = boxes.xyxy[i].cpu().numpy()
|
| 37 |
+
bboxes.append(bbox.tolist())
|
| 38 |
+
|
| 39 |
+
# Sort letters from right to left for Arabic
|
| 40 |
+
sorted_letters = self._sort_arabic_letters(detected_letters, bboxes)
|
| 41 |
+
|
| 42 |
+
# Form Arabic text from detected letters
|
| 43 |
+
arabic_text = self._form_arabic_text(sorted_letters)
|
| 44 |
+
|
| 45 |
+
return {
|
| 46 |
+
'success': len(detected_letters) > 0,
|
| 47 |
+
'arabic_text': arabic_text,
|
| 48 |
+
'letters': detected_letters,
|
| 49 |
+
'sorted_letters': sorted_letters,
|
| 50 |
+
'confidences': confidences,
|
| 51 |
+
'bboxes': bboxes,
|
| 52 |
+
'total_detections': len(detected_letters)
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
return {
|
| 57 |
+
'success': False,
|
| 58 |
+
'error': str(e),
|
| 59 |
+
'arabic_text': '',
|
| 60 |
+
'letters': [],
|
| 61 |
+
'sorted_letters': [],
|
| 62 |
+
'confidences': [],
|
| 63 |
+
'bboxes': []
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
def _sort_arabic_letters(self, letters: List[str], bboxes: List) -> List[str]:
|
| 67 |
+
"""Sort Arabic letters from right to left based on x-coordinate"""
|
| 68 |
+
if not bboxes or len(letters) != len(bboxes):
|
| 69 |
+
return letters
|
| 70 |
+
|
| 71 |
+
# Get x-coordinates (right side of bbox for Arabic RTL)
|
| 72 |
+
letter_positions = []
|
| 73 |
+
for i, bbox in enumerate(bboxes):
|
| 74 |
+
x_right = bbox[2] # x2 coordinate (right side)
|
| 75 |
+
letter_positions.append((x_right, letters[i]))
|
| 76 |
+
|
| 77 |
+
# Sort by x-coordinate descending (right to left)
|
| 78 |
+
letter_positions.sort(key=lambda x: x[0], reverse=True)
|
| 79 |
+
|
| 80 |
+
return [letter for _, letter in letter_positions]
|
| 81 |
+
|
| 82 |
+
def _form_arabic_text(self, letters: List[str]) -> str:
|
| 83 |
+
"""Form Arabic text from detected letters"""
|
| 84 |
+
if not letters:
|
| 85 |
+
return ""
|
| 86 |
+
|
| 87 |
+
# Simply join the letters - the agent will understand the actual words
|
| 88 |
+
arabic_text = "".join(letters)
|
| 89 |
+
|
| 90 |
+
# Add basic Arabic sentence structure if it makes sense
|
| 91 |
+
if len(letters) >= 2:
|
| 92 |
+
# For demo, if we have multiple letters, assume it's a word
|
| 93 |
+
return arabic_text
|
| 94 |
+
else:
|
| 95 |
+
return arabic_text
|
utils/medical_agent.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class MedicalAgent:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.llm = self._load_llm()
|
| 10 |
+
self.conversation_history = []
|
| 11 |
+
self.question_count = 0
|
| 12 |
+
self.max_questions = 3
|
| 13 |
+
|
| 14 |
+
def _load_llm(self):
|
| 15 |
+
try:
|
| 16 |
+
return pipeline(
|
| 17 |
+
"text-generation",
|
| 18 |
+
model="microsoft/DialoGPT-medium",
|
| 19 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 20 |
+
device_map="auto"
|
| 21 |
+
)
|
| 22 |
+
except:
|
| 23 |
+
return None
|
| 24 |
+
|
| 25 |
+
def process_input(self, english_text: str, session_id: str) -> Dict[str, Any]:
|
| 26 |
+
self.conversation_history.append(f"Patient: {english_text}")
|
| 27 |
+
|
| 28 |
+
if self.question_count >= self.max_questions:
|
| 29 |
+
return self._generate_summary()
|
| 30 |
+
|
| 31 |
+
# Agent analyzes the ACTUAL English text from translation
|
| 32 |
+
analysis = self._analyze_input(english_text)
|
| 33 |
+
|
| 34 |
+
if analysis["needs_follow_up"]:
|
| 35 |
+
question = self._generate_question(analysis, english_text)
|
| 36 |
+
self.question_count += 1
|
| 37 |
+
|
| 38 |
+
return {
|
| 39 |
+
'response': question,
|
| 40 |
+
'question_count': self.question_count,
|
| 41 |
+
'state': 'questioning'
|
| 42 |
+
}
|
| 43 |
+
else:
|
| 44 |
+
return self._generate_summary()
|
| 45 |
+
|
| 46 |
+
def _analyze_input(self, text: str) -> Dict[str, Any]:
|
| 47 |
+
"""Analyze the actual translated text"""
|
| 48 |
+
prompt = f"""
|
| 49 |
+
Patient says: "{text}"
|
| 50 |
+
|
| 51 |
+
As a medical assistant, analyze this and decide:
|
| 52 |
+
1. Do we need follow-up questions? (true/false)
|
| 53 |
+
2. What specific information is missing?
|
| 54 |
+
3. What would be good follow-up questions?
|
| 55 |
+
|
| 56 |
+
Respond in JSON format only:
|
| 57 |
+
{{
|
| 58 |
+
"needs_follow_up": true/false,
|
| 59 |
+
"missing_info": ["item1", "item2"],
|
| 60 |
+
"possible_questions": ["question1", "question2"]
|
| 61 |
+
}}
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
if self.llm:
|
| 66 |
+
response = self.llm(prompt, max_length=300, do_sample=True)[0]['generated_text']
|
| 67 |
+
# Extract JSON from response
|
| 68 |
+
start = response.find('{')
|
| 69 |
+
end = response.find('}') + 1
|
| 70 |
+
if start != -1 and end != -1:
|
| 71 |
+
json_str = response[start:end]
|
| 72 |
+
return json.loads(json_str)
|
| 73 |
+
except:
|
| 74 |
+
pass
|
| 75 |
+
|
| 76 |
+
# Fallback: always ask follow-up unless we have enough info
|
| 77 |
+
return {
|
| 78 |
+
"needs_follow_up": self.question_count < 2,
|
| 79 |
+
"missing_info": ["more details", "duration", "severity"],
|
| 80 |
+
"possible_questions": ["Can you describe more?", "How long?", "How severe?"]
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
def _generate_question(self, analysis: Dict, original_text: str) -> str:
|
| 84 |
+
"""Generate follow-up question based on actual content"""
|
| 85 |
+
questions = analysis.get("possible_questions", ["Can you tell me more?"])
|
| 86 |
+
question = questions[0]
|
| 87 |
+
|
| 88 |
+
# Limit to 5 words
|
| 89 |
+
words = question.split()[:5]
|
| 90 |
+
return " ".join(words)
|
| 91 |
+
|
| 92 |
+
def _generate_summary(self) -> Dict[str, Any]:
|
| 93 |
+
conversation = "\n".join(self.conversation_history[-3:])
|
| 94 |
+
|
| 95 |
+
prompt = f"""
|
| 96 |
+
Patient conversation summary:
|
| 97 |
+
{conversation}
|
| 98 |
+
|
| 99 |
+
Create a brief medical summary for a doctor in 2-3 sentences.
|
| 100 |
+
Focus on main symptoms and key information.
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
if self.llm:
|
| 104 |
+
summary = self.llm(prompt, max_length=150)[0]['generated_text']
|
| 105 |
+
else:
|
| 106 |
+
summary = f"Summary: {conversation}. Patient needs medical consultation."
|
| 107 |
+
|
| 108 |
+
return {
|
| 109 |
+
'response': summary,
|
| 110 |
+
'question_count': self.question_count,
|
| 111 |
+
'state': 'summary'
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
def process_doctor_input(self, doctor_text: str) -> str:
|
| 115 |
+
prompt = f"""
|
| 116 |
+
Doctor asks: "{doctor_text}"
|
| 117 |
+
|
| 118 |
+
Rephrase this as a simple, clear question for the patient.
|
| 119 |
+
Use maximum 5 words and easy language.
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
if self.llm:
|
| 123 |
+
response = self.llm(prompt, max_length=50)[0]['generated_text']
|
| 124 |
+
return response.strip()
|
| 125 |
+
else:
|
| 126 |
+
return "Can you describe more?"
|
utils/sign_generator.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class SignGenerator:
|
| 2 |
+
def __init__(self):
|
| 3 |
+
pass
|
| 4 |
+
|
| 5 |
+
def text_to_sign(self, text: str) -> dict:
|
| 6 |
+
return {
|
| 7 |
+
"animation_data": f"Sign for: {text}",
|
| 8 |
+
"duration": 3.0,
|
| 9 |
+
"type": "placeholder"
|
| 10 |
+
}
|
utils/speech.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
import torch
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class SpeechProcessor:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.stt = pipeline(
|
| 10 |
+
"automatic-speech-recognition",
|
| 11 |
+
model="facebook/wav2vec2-base-960h",
|
| 12 |
+
device=0 if torch.cuda.is_available() else -1
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# For demo, we'll use a simple TTS approach
|
| 16 |
+
self.tts_available = False
|
| 17 |
+
|
| 18 |
+
def speech_to_text(self, audio_path: str) -> str:
|
| 19 |
+
try:
|
| 20 |
+
result = self.stt(audio_path)
|
| 21 |
+
return result.get('text', '')
|
| 22 |
+
except:
|
| 23 |
+
return "Audio processed"
|
| 24 |
+
|
| 25 |
+
def text_to_speech(self, text: str, filename: str) -> str:
|
| 26 |
+
# For demo, create a placeholder audio file
|
| 27 |
+
output_path = f"/tmp/{filename}.wav"
|
| 28 |
+
# In production, integrate with real TTS
|
| 29 |
+
return output_path
|
utils/translator.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import MarianMTModel, MarianTokenizer
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class MedicalTranslator:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.ar_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
|
| 8 |
+
self.ar_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
|
| 9 |
+
|
| 10 |
+
self.en_ar_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
|
| 11 |
+
self.en_ar_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
|
| 12 |
+
|
| 13 |
+
def ar_to_en(self, text: str) -> str:
|
| 14 |
+
"""Translate Arabic to English"""
|
| 15 |
+
if not text.strip():
|
| 16 |
+
return ""
|
| 17 |
+
|
| 18 |
+
inputs = self.ar_en_tokenizer(text, return_tensors="pt", padding=True, max_length=512, truncation=True)
|
| 19 |
+
with torch.no_grad():
|
| 20 |
+
outputs = self.ar_en_model.generate(**inputs)
|
| 21 |
+
|
| 22 |
+
return self.ar_en_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 23 |
+
|
| 24 |
+
def en_to_ar(self, text: str) -> str:
|
| 25 |
+
"""Translate English to Arabic"""
|
| 26 |
+
if not text.strip():
|
| 27 |
+
return ""
|
| 28 |
+
|
| 29 |
+
inputs = self.en_ar_tokenizer(text, return_tensors="pt", padding=True, max_length=512, truncation=True)
|
| 30 |
+
with torch.no_grad():
|
| 31 |
+
outputs = self.en_ar_model.generate(**inputs)
|
| 32 |
+
|
| 33 |
+
return self.en_ar_tokenizer.decode(outputs[0], skip_special_tokens=True)
|