diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index a38f5148..ed9a5e5a 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -26,8 +26,12 @@ + + + + android:exported="true" + android:enabled="true"> diff --git a/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt new file mode 100644 index 00000000..444e2069 --- /dev/null +++ b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt @@ -0,0 +1,41 @@ +package com.google.ai.sample + +import kotlinx.coroutines.CoroutineExceptionHandler +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import android.app.Application +import android.util.Log + +/** + * Application class for maintaining application-wide state and resources + */ +class PhotoReasoningApplication : Application() { + + companion object { + private const val TAG = "PhotoReasoningApp" + + // Application-wide CoroutineScope that is not tied to any lifecycle + // This scope will continue to run even when the app is in the background + val applicationScope = CoroutineScope( + SupervisorJob() + + Dispatchers.Default + + CoroutineExceptionHandler { _, throwable -> + Log.e(TAG, "Uncaught exception in application scope: ${throwable.message}", throwable) + } + ) + + // Instance of the application for global access + private lateinit var instance: PhotoReasoningApplication + + fun getInstance(): PhotoReasoningApplication { + return instance + } + } + + override fun onCreate() { + super.onCreate() + instance = this + Log.d(TAG, "Application created") + } +} diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt index f6b7d367..4e55e1ea 100644 --- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt +++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt @@ -604,12 +604,15 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { showToast("Nehme Screenshot auf durch Simulation der Hardware-Tasten...", false) try { + // Capture screen information before taking the screenshot + val screenInfo = captureScreenInformation() + // Simulate pressing Power + Volume Down buttons to take a screenshot simulateScreenshotButtonCombination() // Wait a moment for the screenshot to be saved, then retrieve it handler.postDelayed({ - retrieveLatestScreenshot() + retrieveLatestScreenshot(screenInfo) }, 1000) // Wait 1 second for the screenshot to be saved } catch (e: Exception) { Log.e(TAG, "Error taking screenshot: ${e.message}") @@ -617,6 +620,265 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } } + /** + * Capture information about all interactive elements on the screen + */ + private fun captureScreenInformation(): String { + Log.d(TAG, "Capturing screen information") + + // Refresh the root node to ensure we have the latest information + refreshRootNode() + + // Check if root node is available + if (rootNode == null) { + Log.e(TAG, "Root node is null, cannot capture screen information") + return "Keine Bildschirminformationen verfügbar (Root-Knoten ist null)" + } + + // Build a string with information about all interactive elements + val screenInfo = StringBuilder() + screenInfo.append("Bildschirmelemente:\n") + + // Capture information about all interactive elements + val interactiveElements = findAllInteractiveElements(rootNode!!) + + if (interactiveElements.isEmpty()) { + screenInfo.append("Keine interaktiven Elemente gefunden.") + } else { + screenInfo.append("Gefundene interaktive Elemente (${interactiveElements.size}):\n\n") + + interactiveElements.forEachIndexed { index, element -> + screenInfo.append("${index + 1}. ") + + // Get element ID if available + val elementId = getNodeId(element) + if (elementId.isNotEmpty()) { + screenInfo.append("ID: \"$elementId\" ") + } + + // Add element text if available + if (!element.text.isNullOrEmpty()) { + screenInfo.append("Text: \"${element.text}\" ") + } + + // Add element content description if available + if (!element.contentDescription.isNullOrEmpty()) { + screenInfo.append("Beschreibung: \"${element.contentDescription}\" ") + } + + // Try to get the button name from the view hierarchy + val buttonName = getButtonName(element) + if (buttonName.isNotEmpty()) { + screenInfo.append("Name: \"$buttonName\" ") + } + + // Add element class name + screenInfo.append("Klasse: ${element.className} ") + + // Add element bounds + val rect = Rect() + element.getBoundsInScreen(rect) + screenInfo.append("Position: (${rect.centerX()}, ${rect.centerY()}) ") + + // Add element clickable status + screenInfo.append("Klickbar: ${if (element.isClickable) "Ja" else "Nein"}") + + screenInfo.append("\n") + + // Recycle the element to avoid memory leaks + element.recycle() + } + } + + Log.d(TAG, "Screen information captured: ${screenInfo.length} characters") + return screenInfo.toString() + } + + /** + * Get the ID of a node if available + */ + private fun getNodeId(node: AccessibilityNodeInfo): String { + try { + val viewIdResourceName = node.viewIdResourceName + if (!viewIdResourceName.isNullOrEmpty()) { + // Extract the ID name from the resource name (package:id/name) + val parts = viewIdResourceName.split("/") + if (parts.size > 1) { + return parts[1] + } + return viewIdResourceName + } + } catch (e: Exception) { + Log.e(TAG, "Error getting node ID: ${e.message}") + } + return "" + } + + /** + * Try to get the button name from various properties + */ + private fun getButtonName(node: AccessibilityNodeInfo): String { + try { + // First check if the node has text + if (!node.text.isNullOrEmpty()) { + return node.text.toString() + } + + // Then check content description + if (!node.contentDescription.isNullOrEmpty()) { + return node.contentDescription.toString() + } + + // Get the node ID which might contain a name + val nodeId = getNodeId(node) + if (nodeId.isNotEmpty() && !nodeId.startsWith("android:")) { + // Convert camelCase or snake_case to readable format + val readableName = nodeId + .replace("_", " ") + .replace(Regex("([a-z])([A-Z])"), "$1 $2") + .lowercase(Locale.getDefault()) + .capitalize(Locale.getDefault()) + + // If it contains common button names like "new", "add", etc., return it + val commonButtonNames = listOf("new", "add", "edit", "delete", "save", "cancel", "ok", "send") + for (buttonName in commonButtonNames) { + if (readableName.contains(buttonName, ignoreCase = true)) { + return readableName + } + } + + // Return the readable ID name + return readableName + } + + // Check if it's a known button type by class name + val className = node.className?.toString() ?: "" + if (className.contains("Button", ignoreCase = true) || + className.contains("ImageButton", ignoreCase = true) || + className.contains("FloatingActionButton", ignoreCase = true)) { + + // For buttons without text, try to infer name from siblings or parent + val parent = node.parent + if (parent != null) { + // Check if parent has text that might describe this button + if (!parent.text.isNullOrEmpty()) { + val parentText = parent.text.toString() + parent.recycle() + return parentText + } + + // Check siblings for text that might be related + for (i in 0 until parent.childCount) { + val sibling = parent.getChild(i) ?: continue + if (sibling != node && !sibling.text.isNullOrEmpty()) { + val siblingText = sibling.text.toString() + sibling.recycle() + parent.recycle() + return siblingText + } + sibling.recycle() + } + + // Check if this is a FAB (Floating Action Button) which is often used as "New" or "Add" + if (className.contains("FloatingActionButton", ignoreCase = true)) { + parent.recycle() + return "New" + } + + parent.recycle() + } + + // Special case for circular buttons at the bottom of the screen (likely navigation or action buttons) + val rect = Rect() + node.getBoundsInScreen(rect) + val displayMetrics = resources.displayMetrics + val screenHeight = displayMetrics.heightPixels + + // If it's a circular button near the bottom of the screen + if (rect.height() == rect.width() && rect.height() < displayMetrics.densityDpi / 4 && + rect.bottom > screenHeight * 0.8) { + + // Check if it's in the bottom left corner (often "New" or "Add") + if (rect.centerX() < displayMetrics.widthPixels * 0.3) { + return "New" + } + } + + // If it's a button but we couldn't find a name, use a generic name + return "Button" + } + + // For EditText fields, try to get hint text + if (className.contains("EditText", ignoreCase = true)) { + // Try to get hint text using reflection (not always available) + try { + val hintTextMethod = node.javaClass.getMethod("getHintText") + val hintText = hintTextMethod.invoke(node)?.toString() + if (!hintText.isNullOrEmpty()) { + return "Textfeld: $hintText" + } + } catch (e: Exception) { + // Reflection failed, ignore + } + + return "Textfeld" + } + + // For specific view types that are commonly used as buttons + if (className == "android.view.View" || className == "android.widget.ImageView") { + // Check if it's in a position commonly used for specific buttons + val rect = Rect() + node.getBoundsInScreen(rect) + val displayMetrics = resources.displayMetrics + val screenHeight = displayMetrics.heightPixels + val screenWidth = displayMetrics.widthPixels + + // Check if it's a small circular element at the bottom of the screen + if (rect.width() == rect.height() && rect.width() < displayMetrics.densityDpi / 3 && + rect.bottom > screenHeight * 0.9) { + + // Bottom left is often "New" or "Add" + if (rect.centerX() < screenWidth * 0.2) { + return "New" + } + + // Bottom right is often "Send" or "Next" + if (rect.centerX() > screenWidth * 0.8) { + return "Send" + } + } + } + } catch (e: Exception) { + Log.e(TAG, "Error getting button name: ${e.message}") + } + return "" + } + + /** + * Find all interactive elements on the screen + */ + private fun findAllInteractiveElements(node: AccessibilityNodeInfo): List { + val elements = mutableListOf() + + try { + // Check if this node is interactive (clickable, long clickable, or focusable) + if (node.isClickable || node.isLongClickable || node.isFocusable) { + elements.add(AccessibilityNodeInfo.obtain(node)) + } + + // Check all child nodes + for (i in 0 until node.childCount) { + val child = node.getChild(i) ?: continue + elements.addAll(findAllInteractiveElements(child)) + child.recycle() + } + } catch (e: Exception) { + Log.e(TAG, "Error finding interactive elements: ${e.message}") + } + + return elements + } + /** * Simulate pressing Power + Volume Down buttons to take a screenshot */ @@ -686,7 +948,7 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { /** * Retrieve the latest screenshot from the standard screenshot folder */ - private fun retrieveLatestScreenshot() { + private fun retrieveLatestScreenshot(screenInfo: String) { try { Log.d(TAG, "Retrieving latest screenshot") showToast("Suche nach dem aufgenommenen Screenshot...", false) @@ -701,8 +963,8 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { // Convert file to URI val screenshotUri = Uri.fromFile(screenshotFile) - // Add the screenshot to the conversation - addScreenshotToConversation(screenshotUri) + // Add the screenshot to the conversation with screen information + addScreenshotToConversation(screenshotUri, screenInfo) } else { Log.e(TAG, "No screenshot file found") showToast("Kein Screenshot gefunden. Bitte prüfen Sie die Berechtigungen.", true) @@ -834,11 +1096,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { } /** - * Add the screenshot to the conversation + * Add the screenshot to the conversation with screen information */ - private fun addScreenshotToConversation(screenshotUri: Uri) { + private fun addScreenshotToConversation(screenshotUri: Uri, screenInfo: String) { try { - Log.d(TAG, "Adding screenshot to conversation: $screenshotUri") + Log.d(TAG, "Adding screenshot to conversation with screen information: $screenshotUri") // Get the MainActivity instance val mainActivity = MainActivity.getInstance() @@ -856,11 +1118,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() { return } - // Add the screenshot to the conversation - photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext) + // Add the screenshot to the conversation with screen information + photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext, screenInfo) - Log.d(TAG, "Screenshot added to conversation") - showToast("Screenshot zur Konversation hinzugefügt", false) + Log.d(TAG, "Screenshot added to conversation with screen information") + showToast("Screenshot mit Bildschirminformationen zur Konversation hinzugefügt", false) } catch (e: Exception) { Log.e(TAG, "Error adding screenshot to conversation: ${e.message}") showToast("Fehler beim Hinzufügen des Screenshots zur Konversation: ${e.message}", true) diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt index b5a087d2..84e0c1a1 100644 --- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt +++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt @@ -14,16 +14,19 @@ import coil.size.Precision import com.google.ai.client.generativeai.GenerativeModel import com.google.ai.client.generativeai.type.content import com.google.ai.sample.MainActivity +import com.google.ai.sample.PhotoReasoningApplication import com.google.ai.sample.ScreenOperatorAccessibilityService import com.google.ai.sample.util.ChatHistoryPreferences import com.google.ai.sample.util.Command import com.google.ai.sample.util.CommandParser import com.google.ai.sample.util.SystemMessagePreferences import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext class PhotoReasoningViewModel( private val generativeModel: GenerativeModel @@ -68,6 +71,9 @@ class PhotoReasoningViewModel( // ImageLoader and ImageRequestBuilder for processing images private var imageLoader: ImageLoader? = null private var imageRequestBuilder: ImageRequest.Builder? = null + + // Keep track of active jobs to prevent cancellation + private val activeJobs = mutableListOf() fun reason( userInput: String, @@ -111,7 +117,8 @@ class PhotoReasoningViewModel( _chatState.addMessage(pendingAiMessage) _chatMessagesFlow.value = chatMessages - viewModelScope.launch(Dispatchers.IO) { + // Use application scope instead of viewModelScope to prevent cancellation when app goes to background + val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.IO) { try { val inputContent = content { for (bitmap in selectedImages) { @@ -126,36 +133,54 @@ class PhotoReasoningViewModel( .collect { response -> val newText = response.text ?: "" outputContent += newText - _uiState.value = PhotoReasoningUiState.Success(outputContent) - - // Update the AI message in chat history - updateAiMessage(outputContent) - // Parse and execute commands from the response - processCommands(newText) + // Update UI on main thread + withContext(Dispatchers.Main) { + _uiState.value = PhotoReasoningUiState.Success(outputContent) + + // Update the AI message in chat history + updateAiMessage(outputContent) + + // Parse and execute commands from the response + processCommands(newText) + } } // Save chat history after successful response - saveChatHistory(MainActivity.getInstance()?.applicationContext) + withContext(Dispatchers.Main) { + saveChatHistory(MainActivity.getInstance()?.applicationContext) + } } catch (e: Exception) { Log.e(TAG, "Error generating content: ${e.message}", e) - _uiState.value = PhotoReasoningUiState.Error(e.localizedMessage ?: "Unknown error") - _commandExecutionStatus.value = "Fehler bei der Generierung: ${e.localizedMessage}" - // Update chat with error message - _chatState.replaceLastPendingMessage() - _chatState.addMessage( - PhotoReasoningMessage( - text = e.localizedMessage ?: "Unknown error", - participant = PhotoParticipant.ERROR + // Update UI on main thread + withContext(Dispatchers.Main) { + _uiState.value = PhotoReasoningUiState.Error(e.localizedMessage ?: "Unknown error") + _commandExecutionStatus.value = "Fehler bei der Generierung: ${e.localizedMessage}" + + // Update chat with error message + _chatState.replaceLastPendingMessage() + _chatState.addMessage( + PhotoReasoningMessage( + text = e.localizedMessage ?: "Unknown error", + participant = PhotoParticipant.ERROR + ) ) - ) - _chatMessagesFlow.value = chatMessages - - // Save chat history even after error - saveChatHistory(MainActivity.getInstance()?.applicationContext) + _chatMessagesFlow.value = chatMessages + + // Save chat history even after error + saveChatHistory(MainActivity.getInstance()?.applicationContext) + } } } + + // Track the job to prevent cancellation + synchronized(activeJobs) { + activeJobs.add(job) + + // Clean up completed jobs to prevent memory leaks + activeJobs.removeAll { it.isCompleted } + } } /** @@ -207,7 +232,8 @@ class PhotoReasoningViewModel( * Process commands found in the AI response */ private fun processCommands(text: String) { - viewModelScope.launch(Dispatchers.Main) { + // Use application scope instead of viewModelScope to prevent cancellation when app goes to background + val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) { try { // Parse commands from the text val commands = CommandParser.parseCommands(text) @@ -313,13 +339,30 @@ class PhotoReasoningViewModel( ) } } + + // Track the job to prevent cancellation + synchronized(activeJobs) { + activeJobs.add(job) + + // Clean up completed jobs to prevent memory leaks + activeJobs.removeAll { it.isCompleted } + } } /** * Add a screenshot to the conversation + * + * @param screenshotUri URI of the screenshot + * @param context Application context + * @param screenInfo Optional information about screen elements (null if not available) */ - fun addScreenshotToConversation(screenshotUri: Uri, context: android.content.Context) { - viewModelScope.launch(Dispatchers.Main) { + fun addScreenshotToConversation( + screenshotUri: Uri, + context: android.content.Context, + screenInfo: String? = null + ) { + // Use application scope instead of viewModelScope to prevent cancellation when app goes to background + val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) { try { Log.d(TAG, "Adding screenshot to conversation: $screenshotUri") @@ -340,9 +383,16 @@ class PhotoReasoningViewModel( // Show toast Toast.makeText(context, "Verarbeite Screenshot...", Toast.LENGTH_SHORT).show() + // Create message text with screen information if available + val messageText = if (screenInfo != null) { + "Screenshot aufgenommen\n\n$screenInfo" + } else { + "Screenshot aufgenommen" + } + // Add screenshot message to chat history val screenshotMessage = PhotoReasoningMessage( - text = "Screenshot aufgenommen", + text = messageText, participant = PhotoParticipant.USER, imageUris = listOf(screenshotUri.toString()) ) @@ -377,8 +427,15 @@ class PhotoReasoningViewModel( // Show toast Toast.makeText(context, "Screenshot hinzugefügt, sende an KI...", Toast.LENGTH_SHORT).show() + // Create prompt with screen information if available + val prompt = if (screenInfo != null) { + "Analysiere diesen Screenshot. Hier sind die verfügbaren Bildschirmelemente: $screenInfo" + } else { + "Analysiere diesen Screenshot" + } + // Re-send the query with the updated images - reason("Analysiere diesen Screenshot", listOf(bitmap)) + reason(prompt, listOf(bitmap)) // Show a toast to indicate the screenshot was added Toast.makeText(context, "Screenshot zur Konversation hinzugefügt", Toast.LENGTH_SHORT).show() @@ -434,6 +491,14 @@ class PhotoReasoningViewModel( saveChatHistory(context) } } + + // Track the job to prevent cancellation + synchronized(activeJobs) { + activeJobs.add(job) + + // Clean up completed jobs to prevent memory leaks + activeJobs.removeAll { it.isCompleted } + } } /** @@ -470,6 +535,17 @@ class PhotoReasoningViewModel( } } + /** + * Called when ViewModel is cleared + * We override this to ensure our background jobs continue running + */ + override fun onCleared() { + super.onCleared() + Log.d(TAG, "ViewModel cleared, but background jobs will continue running") + // We intentionally do NOT cancel the jobs in activeJobs + // This allows them to continue running in the background + } + /** * Chat state management class */