diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index a38f5148..ed9a5e5a 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -26,8 +26,12 @@
+
+
+
+ android:exported="true"
+ android:enabled="true">
diff --git a/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt
new file mode 100644
index 00000000..444e2069
--- /dev/null
+++ b/app/src/main/kotlin/com/google/ai/sample/PhotoReasoningApplication.kt
@@ -0,0 +1,41 @@
+package com.google.ai.sample
+
+import kotlinx.coroutines.CoroutineExceptionHandler
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.SupervisorJob
+import android.app.Application
+import android.util.Log
+
+/**
+ * Application class for maintaining application-wide state and resources
+ */
+class PhotoReasoningApplication : Application() {
+
+ companion object {
+ private const val TAG = "PhotoReasoningApp"
+
+ // Application-wide CoroutineScope that is not tied to any lifecycle
+ // This scope will continue to run even when the app is in the background
+ val applicationScope = CoroutineScope(
+ SupervisorJob() +
+ Dispatchers.Default +
+ CoroutineExceptionHandler { _, throwable ->
+ Log.e(TAG, "Uncaught exception in application scope: ${throwable.message}", throwable)
+ }
+ )
+
+ // Instance of the application for global access
+ private lateinit var instance: PhotoReasoningApplication
+
+ fun getInstance(): PhotoReasoningApplication {
+ return instance
+ }
+ }
+
+ override fun onCreate() {
+ super.onCreate()
+ instance = this
+ Log.d(TAG, "Application created")
+ }
+}
diff --git a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt
index f6b7d367..4e55e1ea 100644
--- a/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt
+++ b/app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt
@@ -604,12 +604,15 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
showToast("Nehme Screenshot auf durch Simulation der Hardware-Tasten...", false)
try {
+ // Capture screen information before taking the screenshot
+ val screenInfo = captureScreenInformation()
+
// Simulate pressing Power + Volume Down buttons to take a screenshot
simulateScreenshotButtonCombination()
// Wait a moment for the screenshot to be saved, then retrieve it
handler.postDelayed({
- retrieveLatestScreenshot()
+ retrieveLatestScreenshot(screenInfo)
}, 1000) // Wait 1 second for the screenshot to be saved
} catch (e: Exception) {
Log.e(TAG, "Error taking screenshot: ${e.message}")
@@ -617,6 +620,265 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
}
}
+ /**
+ * Capture information about all interactive elements on the screen
+ */
+ private fun captureScreenInformation(): String {
+ Log.d(TAG, "Capturing screen information")
+
+ // Refresh the root node to ensure we have the latest information
+ refreshRootNode()
+
+ // Check if root node is available
+ if (rootNode == null) {
+ Log.e(TAG, "Root node is null, cannot capture screen information")
+ return "Keine Bildschirminformationen verfügbar (Root-Knoten ist null)"
+ }
+
+ // Build a string with information about all interactive elements
+ val screenInfo = StringBuilder()
+ screenInfo.append("Bildschirmelemente:\n")
+
+ // Capture information about all interactive elements
+ val interactiveElements = findAllInteractiveElements(rootNode!!)
+
+ if (interactiveElements.isEmpty()) {
+ screenInfo.append("Keine interaktiven Elemente gefunden.")
+ } else {
+ screenInfo.append("Gefundene interaktive Elemente (${interactiveElements.size}):\n\n")
+
+ interactiveElements.forEachIndexed { index, element ->
+ screenInfo.append("${index + 1}. ")
+
+ // Get element ID if available
+ val elementId = getNodeId(element)
+ if (elementId.isNotEmpty()) {
+ screenInfo.append("ID: \"$elementId\" ")
+ }
+
+ // Add element text if available
+ if (!element.text.isNullOrEmpty()) {
+ screenInfo.append("Text: \"${element.text}\" ")
+ }
+
+ // Add element content description if available
+ if (!element.contentDescription.isNullOrEmpty()) {
+ screenInfo.append("Beschreibung: \"${element.contentDescription}\" ")
+ }
+
+ // Try to get the button name from the view hierarchy
+ val buttonName = getButtonName(element)
+ if (buttonName.isNotEmpty()) {
+ screenInfo.append("Name: \"$buttonName\" ")
+ }
+
+ // Add element class name
+ screenInfo.append("Klasse: ${element.className} ")
+
+ // Add element bounds
+ val rect = Rect()
+ element.getBoundsInScreen(rect)
+ screenInfo.append("Position: (${rect.centerX()}, ${rect.centerY()}) ")
+
+ // Add element clickable status
+ screenInfo.append("Klickbar: ${if (element.isClickable) "Ja" else "Nein"}")
+
+ screenInfo.append("\n")
+
+ // Recycle the element to avoid memory leaks
+ element.recycle()
+ }
+ }
+
+ Log.d(TAG, "Screen information captured: ${screenInfo.length} characters")
+ return screenInfo.toString()
+ }
+
+ /**
+ * Get the ID of a node if available
+ */
+ private fun getNodeId(node: AccessibilityNodeInfo): String {
+ try {
+ val viewIdResourceName = node.viewIdResourceName
+ if (!viewIdResourceName.isNullOrEmpty()) {
+ // Extract the ID name from the resource name (package:id/name)
+ val parts = viewIdResourceName.split("/")
+ if (parts.size > 1) {
+ return parts[1]
+ }
+ return viewIdResourceName
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error getting node ID: ${e.message}")
+ }
+ return ""
+ }
+
+ /**
+ * Try to get the button name from various properties
+ */
+ private fun getButtonName(node: AccessibilityNodeInfo): String {
+ try {
+ // First check if the node has text
+ if (!node.text.isNullOrEmpty()) {
+ return node.text.toString()
+ }
+
+ // Then check content description
+ if (!node.contentDescription.isNullOrEmpty()) {
+ return node.contentDescription.toString()
+ }
+
+ // Get the node ID which might contain a name
+ val nodeId = getNodeId(node)
+ if (nodeId.isNotEmpty() && !nodeId.startsWith("android:")) {
+ // Convert camelCase or snake_case to readable format
+ val readableName = nodeId
+ .replace("_", " ")
+ .replace(Regex("([a-z])([A-Z])"), "$1 $2")
+ .lowercase(Locale.getDefault())
+ .capitalize(Locale.getDefault())
+
+ // If it contains common button names like "new", "add", etc., return it
+ val commonButtonNames = listOf("new", "add", "edit", "delete", "save", "cancel", "ok", "send")
+ for (buttonName in commonButtonNames) {
+ if (readableName.contains(buttonName, ignoreCase = true)) {
+ return readableName
+ }
+ }
+
+ // Return the readable ID name
+ return readableName
+ }
+
+ // Check if it's a known button type by class name
+ val className = node.className?.toString() ?: ""
+ if (className.contains("Button", ignoreCase = true) ||
+ className.contains("ImageButton", ignoreCase = true) ||
+ className.contains("FloatingActionButton", ignoreCase = true)) {
+
+ // For buttons without text, try to infer name from siblings or parent
+ val parent = node.parent
+ if (parent != null) {
+ // Check if parent has text that might describe this button
+ if (!parent.text.isNullOrEmpty()) {
+ val parentText = parent.text.toString()
+ parent.recycle()
+ return parentText
+ }
+
+ // Check siblings for text that might be related
+ for (i in 0 until parent.childCount) {
+ val sibling = parent.getChild(i) ?: continue
+ if (sibling != node && !sibling.text.isNullOrEmpty()) {
+ val siblingText = sibling.text.toString()
+ sibling.recycle()
+ parent.recycle()
+ return siblingText
+ }
+ sibling.recycle()
+ }
+
+ // Check if this is a FAB (Floating Action Button) which is often used as "New" or "Add"
+ if (className.contains("FloatingActionButton", ignoreCase = true)) {
+ parent.recycle()
+ return "New"
+ }
+
+ parent.recycle()
+ }
+
+ // Special case for circular buttons at the bottom of the screen (likely navigation or action buttons)
+ val rect = Rect()
+ node.getBoundsInScreen(rect)
+ val displayMetrics = resources.displayMetrics
+ val screenHeight = displayMetrics.heightPixels
+
+ // If it's a circular button near the bottom of the screen
+ if (rect.height() == rect.width() && rect.height() < displayMetrics.densityDpi / 4 &&
+ rect.bottom > screenHeight * 0.8) {
+
+ // Check if it's in the bottom left corner (often "New" or "Add")
+ if (rect.centerX() < displayMetrics.widthPixels * 0.3) {
+ return "New"
+ }
+ }
+
+ // If it's a button but we couldn't find a name, use a generic name
+ return "Button"
+ }
+
+ // For EditText fields, try to get hint text
+ if (className.contains("EditText", ignoreCase = true)) {
+ // Try to get hint text using reflection (not always available)
+ try {
+ val hintTextMethod = node.javaClass.getMethod("getHintText")
+ val hintText = hintTextMethod.invoke(node)?.toString()
+ if (!hintText.isNullOrEmpty()) {
+ return "Textfeld: $hintText"
+ }
+ } catch (e: Exception) {
+ // Reflection failed, ignore
+ }
+
+ return "Textfeld"
+ }
+
+ // For specific view types that are commonly used as buttons
+ if (className == "android.view.View" || className == "android.widget.ImageView") {
+ // Check if it's in a position commonly used for specific buttons
+ val rect = Rect()
+ node.getBoundsInScreen(rect)
+ val displayMetrics = resources.displayMetrics
+ val screenHeight = displayMetrics.heightPixels
+ val screenWidth = displayMetrics.widthPixels
+
+ // Check if it's a small circular element at the bottom of the screen
+ if (rect.width() == rect.height() && rect.width() < displayMetrics.densityDpi / 3 &&
+ rect.bottom > screenHeight * 0.9) {
+
+ // Bottom left is often "New" or "Add"
+ if (rect.centerX() < screenWidth * 0.2) {
+ return "New"
+ }
+
+ // Bottom right is often "Send" or "Next"
+ if (rect.centerX() > screenWidth * 0.8) {
+ return "Send"
+ }
+ }
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error getting button name: ${e.message}")
+ }
+ return ""
+ }
+
+ /**
+ * Find all interactive elements on the screen
+ */
+ private fun findAllInteractiveElements(node: AccessibilityNodeInfo): List {
+ val elements = mutableListOf()
+
+ try {
+ // Check if this node is interactive (clickable, long clickable, or focusable)
+ if (node.isClickable || node.isLongClickable || node.isFocusable) {
+ elements.add(AccessibilityNodeInfo.obtain(node))
+ }
+
+ // Check all child nodes
+ for (i in 0 until node.childCount) {
+ val child = node.getChild(i) ?: continue
+ elements.addAll(findAllInteractiveElements(child))
+ child.recycle()
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error finding interactive elements: ${e.message}")
+ }
+
+ return elements
+ }
+
/**
* Simulate pressing Power + Volume Down buttons to take a screenshot
*/
@@ -686,7 +948,7 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
/**
* Retrieve the latest screenshot from the standard screenshot folder
*/
- private fun retrieveLatestScreenshot() {
+ private fun retrieveLatestScreenshot(screenInfo: String) {
try {
Log.d(TAG, "Retrieving latest screenshot")
showToast("Suche nach dem aufgenommenen Screenshot...", false)
@@ -701,8 +963,8 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
// Convert file to URI
val screenshotUri = Uri.fromFile(screenshotFile)
- // Add the screenshot to the conversation
- addScreenshotToConversation(screenshotUri)
+ // Add the screenshot to the conversation with screen information
+ addScreenshotToConversation(screenshotUri, screenInfo)
} else {
Log.e(TAG, "No screenshot file found")
showToast("Kein Screenshot gefunden. Bitte prüfen Sie die Berechtigungen.", true)
@@ -834,11 +1096,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
}
/**
- * Add the screenshot to the conversation
+ * Add the screenshot to the conversation with screen information
*/
- private fun addScreenshotToConversation(screenshotUri: Uri) {
+ private fun addScreenshotToConversation(screenshotUri: Uri, screenInfo: String) {
try {
- Log.d(TAG, "Adding screenshot to conversation: $screenshotUri")
+ Log.d(TAG, "Adding screenshot to conversation with screen information: $screenshotUri")
// Get the MainActivity instance
val mainActivity = MainActivity.getInstance()
@@ -856,11 +1118,11 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
return
}
- // Add the screenshot to the conversation
- photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext)
+ // Add the screenshot to the conversation with screen information
+ photoReasoningViewModel.addScreenshotToConversation(screenshotUri, applicationContext, screenInfo)
- Log.d(TAG, "Screenshot added to conversation")
- showToast("Screenshot zur Konversation hinzugefügt", false)
+ Log.d(TAG, "Screenshot added to conversation with screen information")
+ showToast("Screenshot mit Bildschirminformationen zur Konversation hinzugefügt", false)
} catch (e: Exception) {
Log.e(TAG, "Error adding screenshot to conversation: ${e.message}")
showToast("Fehler beim Hinzufügen des Screenshots zur Konversation: ${e.message}", true)
diff --git a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
index b5a087d2..84e0c1a1 100644
--- a/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
+++ b/app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningViewModel.kt
@@ -14,16 +14,19 @@ import coil.size.Precision
import com.google.ai.client.generativeai.GenerativeModel
import com.google.ai.client.generativeai.type.content
import com.google.ai.sample.MainActivity
+import com.google.ai.sample.PhotoReasoningApplication
import com.google.ai.sample.ScreenOperatorAccessibilityService
import com.google.ai.sample.util.ChatHistoryPreferences
import com.google.ai.sample.util.Command
import com.google.ai.sample.util.CommandParser
import com.google.ai.sample.util.SystemMessagePreferences
import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.Job
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
class PhotoReasoningViewModel(
private val generativeModel: GenerativeModel
@@ -68,6 +71,9 @@ class PhotoReasoningViewModel(
// ImageLoader and ImageRequestBuilder for processing images
private var imageLoader: ImageLoader? = null
private var imageRequestBuilder: ImageRequest.Builder? = null
+
+ // Keep track of active jobs to prevent cancellation
+ private val activeJobs = mutableListOf()
fun reason(
userInput: String,
@@ -111,7 +117,8 @@ class PhotoReasoningViewModel(
_chatState.addMessage(pendingAiMessage)
_chatMessagesFlow.value = chatMessages
- viewModelScope.launch(Dispatchers.IO) {
+ // Use application scope instead of viewModelScope to prevent cancellation when app goes to background
+ val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.IO) {
try {
val inputContent = content {
for (bitmap in selectedImages) {
@@ -126,36 +133,54 @@ class PhotoReasoningViewModel(
.collect { response ->
val newText = response.text ?: ""
outputContent += newText
- _uiState.value = PhotoReasoningUiState.Success(outputContent)
-
- // Update the AI message in chat history
- updateAiMessage(outputContent)
- // Parse and execute commands from the response
- processCommands(newText)
+ // Update UI on main thread
+ withContext(Dispatchers.Main) {
+ _uiState.value = PhotoReasoningUiState.Success(outputContent)
+
+ // Update the AI message in chat history
+ updateAiMessage(outputContent)
+
+ // Parse and execute commands from the response
+ processCommands(newText)
+ }
}
// Save chat history after successful response
- saveChatHistory(MainActivity.getInstance()?.applicationContext)
+ withContext(Dispatchers.Main) {
+ saveChatHistory(MainActivity.getInstance()?.applicationContext)
+ }
} catch (e: Exception) {
Log.e(TAG, "Error generating content: ${e.message}", e)
- _uiState.value = PhotoReasoningUiState.Error(e.localizedMessage ?: "Unknown error")
- _commandExecutionStatus.value = "Fehler bei der Generierung: ${e.localizedMessage}"
- // Update chat with error message
- _chatState.replaceLastPendingMessage()
- _chatState.addMessage(
- PhotoReasoningMessage(
- text = e.localizedMessage ?: "Unknown error",
- participant = PhotoParticipant.ERROR
+ // Update UI on main thread
+ withContext(Dispatchers.Main) {
+ _uiState.value = PhotoReasoningUiState.Error(e.localizedMessage ?: "Unknown error")
+ _commandExecutionStatus.value = "Fehler bei der Generierung: ${e.localizedMessage}"
+
+ // Update chat with error message
+ _chatState.replaceLastPendingMessage()
+ _chatState.addMessage(
+ PhotoReasoningMessage(
+ text = e.localizedMessage ?: "Unknown error",
+ participant = PhotoParticipant.ERROR
+ )
)
- )
- _chatMessagesFlow.value = chatMessages
-
- // Save chat history even after error
- saveChatHistory(MainActivity.getInstance()?.applicationContext)
+ _chatMessagesFlow.value = chatMessages
+
+ // Save chat history even after error
+ saveChatHistory(MainActivity.getInstance()?.applicationContext)
+ }
}
}
+
+ // Track the job to prevent cancellation
+ synchronized(activeJobs) {
+ activeJobs.add(job)
+
+ // Clean up completed jobs to prevent memory leaks
+ activeJobs.removeAll { it.isCompleted }
+ }
}
/**
@@ -207,7 +232,8 @@ class PhotoReasoningViewModel(
* Process commands found in the AI response
*/
private fun processCommands(text: String) {
- viewModelScope.launch(Dispatchers.Main) {
+ // Use application scope instead of viewModelScope to prevent cancellation when app goes to background
+ val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) {
try {
// Parse commands from the text
val commands = CommandParser.parseCommands(text)
@@ -313,13 +339,30 @@ class PhotoReasoningViewModel(
)
}
}
+
+ // Track the job to prevent cancellation
+ synchronized(activeJobs) {
+ activeJobs.add(job)
+
+ // Clean up completed jobs to prevent memory leaks
+ activeJobs.removeAll { it.isCompleted }
+ }
}
/**
* Add a screenshot to the conversation
+ *
+ * @param screenshotUri URI of the screenshot
+ * @param context Application context
+ * @param screenInfo Optional information about screen elements (null if not available)
*/
- fun addScreenshotToConversation(screenshotUri: Uri, context: android.content.Context) {
- viewModelScope.launch(Dispatchers.Main) {
+ fun addScreenshotToConversation(
+ screenshotUri: Uri,
+ context: android.content.Context,
+ screenInfo: String? = null
+ ) {
+ // Use application scope instead of viewModelScope to prevent cancellation when app goes to background
+ val job = PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) {
try {
Log.d(TAG, "Adding screenshot to conversation: $screenshotUri")
@@ -340,9 +383,16 @@ class PhotoReasoningViewModel(
// Show toast
Toast.makeText(context, "Verarbeite Screenshot...", Toast.LENGTH_SHORT).show()
+ // Create message text with screen information if available
+ val messageText = if (screenInfo != null) {
+ "Screenshot aufgenommen\n\n$screenInfo"
+ } else {
+ "Screenshot aufgenommen"
+ }
+
// Add screenshot message to chat history
val screenshotMessage = PhotoReasoningMessage(
- text = "Screenshot aufgenommen",
+ text = messageText,
participant = PhotoParticipant.USER,
imageUris = listOf(screenshotUri.toString())
)
@@ -377,8 +427,15 @@ class PhotoReasoningViewModel(
// Show toast
Toast.makeText(context, "Screenshot hinzugefügt, sende an KI...", Toast.LENGTH_SHORT).show()
+ // Create prompt with screen information if available
+ val prompt = if (screenInfo != null) {
+ "Analysiere diesen Screenshot. Hier sind die verfügbaren Bildschirmelemente: $screenInfo"
+ } else {
+ "Analysiere diesen Screenshot"
+ }
+
// Re-send the query with the updated images
- reason("Analysiere diesen Screenshot", listOf(bitmap))
+ reason(prompt, listOf(bitmap))
// Show a toast to indicate the screenshot was added
Toast.makeText(context, "Screenshot zur Konversation hinzugefügt", Toast.LENGTH_SHORT).show()
@@ -434,6 +491,14 @@ class PhotoReasoningViewModel(
saveChatHistory(context)
}
}
+
+ // Track the job to prevent cancellation
+ synchronized(activeJobs) {
+ activeJobs.add(job)
+
+ // Clean up completed jobs to prevent memory leaks
+ activeJobs.removeAll { it.isCompleted }
+ }
}
/**
@@ -470,6 +535,17 @@ class PhotoReasoningViewModel(
}
}
+ /**
+ * Called when ViewModel is cleared
+ * We override this to ensure our background jobs continue running
+ */
+ override fun onCleared() {
+ super.onCleared()
+ Log.d(TAG, "ViewModel cleared, but background jobs will continue running")
+ // We intentionally do NOT cancel the jobs in activeJobs
+ // This allows them to continue running in the background
+ }
+
/**
* Chat state management class
*/