diff --git a/droidbot/app.py b/droidbot/app.py index a6db7292..dfb25795 100644 --- a/droidbot/app.py +++ b/droidbot/app.py @@ -28,6 +28,7 @@ def __init__(self, app_path, output_dir=None): from androguard.core.bytecodes.apk import APK self.apk = APK(self.app_path) self.package_name = self.apk.get_package() + self.app_name = self.apk.get_app_name() self.main_activity = self.apk.get_main_activity() self.permissions = self.apk.get_permissions() self.activities = self.apk.get_activities() diff --git a/droidbot/device_state.py b/droidbot/device_state.py index 3fdd1eb7..5cb85555 100644 --- a/droidbot/device_state.py +++ b/droidbot/device_state.py @@ -29,6 +29,7 @@ def __init__(self, device, views, foreground_activity, activity_stack, backgroun self.state_str = self.__get_state_str() self.structure_str = self.__get_content_free_state_str() self.search_content = self.__get_search_content() + self.text_representation = self.get_text_representation() self.possible_events = None self.width = device.get_width(refresh=True) self.height = device.get_height(refresh=False) @@ -327,7 +328,8 @@ def __key_if_true(view_dict, key): @staticmethod def __safe_dict_get(view_dict, key, default=None): - return view_dict[key] if (key in view_dict) else default + value = view_dict[key] if key in view_dict else None + return value if value is not None else default @staticmethod def get_view_center(view_dict): @@ -428,10 +430,10 @@ def get_possible_input(self): for view_id in enabled_view_ids: if self.__safe_dict_get(self.views[view_id], 'scrollable'): - possible_events.append(ScrollEvent(view=self.views[view_id], direction="UP")) - possible_events.append(ScrollEvent(view=self.views[view_id], direction="DOWN")) - possible_events.append(ScrollEvent(view=self.views[view_id], direction="LEFT")) - possible_events.append(ScrollEvent(view=self.views[view_id], direction="RIGHT")) + possible_events.append(ScrollEvent(view=self.views[view_id], direction="up")) + possible_events.append(ScrollEvent(view=self.views[view_id], direction="down")) + possible_events.append(ScrollEvent(view=self.views[view_id], direction="left")) + possible_events.append(ScrollEvent(view=self.views[view_id], direction="right")) for view_id in enabled_view_ids: if self.__safe_dict_get(self.views[view_id], 'checkable'): @@ -463,3 +465,171 @@ def get_possible_input(self): self.possible_events = possible_events return [] + possible_events + + def get_text_representation(self, merge_buttons=False): + """ + Get a text representation of current state + """ + enabled_view_ids = [] + for view_dict in self.views: + # exclude navigation bar if exists + if self.__safe_dict_get(view_dict, 'visible') and \ + self.__safe_dict_get(view_dict, 'resource_id') not in \ + ['android:id/navigationBarBackground', + 'android:id/statusBarBackground']: + enabled_view_ids.append(view_dict['temp_id']) + + text_frame = "

#

" + btn_frame = "" + checkbox_frame = "#" + input_frame = "#" + scroll_frame = "" + + view_descs = [] + indexed_views = [] + # available_actions = [] + removed_view_ids = [] + + for view_id in enabled_view_ids: + if view_id in removed_view_ids: + continue + # print(view_id) + view = self.views[view_id] + clickable = self._get_self_ancestors_property(view, 'clickable') + scrollable = self.__safe_dict_get(view, 'scrollable') + checkable = self._get_self_ancestors_property(view, 'checkable') + long_clickable = self._get_self_ancestors_property(view, 'long_clickable') + editable = self.__safe_dict_get(view, 'editable') + actionable = clickable or scrollable or checkable or long_clickable or editable + checked = self.__safe_dict_get(view, 'checked', default=False) + selected = self.__safe_dict_get(view, 'selected', default=False) + content_description = self.__safe_dict_get(view, 'content_description', default='') + view_text = self.__safe_dict_get(view, 'text', default='') + view_class = self.__safe_dict_get(view, 'class').split('.')[-1] + bounds = self.__safe_dict_get(view, 'bounds') + view_bounds = f'{bounds[0][0]},{bounds[0][1]},{bounds[1][0]},{bounds[1][1]}' + if not content_description and not view_text and not scrollable: # actionable? + continue + + # text = self._merge_text(view_text, content_description) + # view_status = '' + view_local_id = str(len(view_descs)) + if editable: + view_desc = input_frame.replace('@', view_local_id).replace('#', view_text) + if content_description: + view_desc = view_desc.replace('&', content_description) + else: + view_desc = view_desc.replace(" text='&'", "") + # available_actions.append(SetTextEvent(view=view, text='HelloWorld')) + elif checkable: + view_desc = checkbox_frame.replace('@', view_local_id).replace('#', view_text) + if content_description: + view_desc = view_desc.replace('&', content_description) + else: + view_desc = view_desc.replace(" text='&'", "") + # available_actions.append(TouchEvent(view=view)) + elif clickable: # or long_clickable + if merge_buttons: + # below is to merge buttons, led to bugs + clickable_ancestor_id = self._get_ancestor_id(view=view, key='clickable') + if not clickable_ancestor_id: + clickable_ancestor_id = self._get_ancestor_id(view=view, key='checkable') + clickable_children_ids = self._extract_all_children(id=clickable_ancestor_id) + if view_id not in clickable_children_ids: + clickable_children_ids.append(view_id) + view_text, content_description = self._merge_text(clickable_children_ids) + checked = self._get_children_checked(clickable_children_ids) + # end of merging buttons + view_desc = btn_frame.replace('@', view_local_id).replace('#', view_text) + if content_description: + view_desc = view_desc.replace('&', content_description) + else: + view_desc = view_desc.replace(" text='&'", "") + # available_actions.append(TouchEvent(view=view)) + if merge_buttons: + for clickable_child in clickable_children_ids: + if clickable_child in enabled_view_ids and clickable_child != view_id: + removed_view_ids.append(clickable_child) + elif scrollable: + # print(view_id, 'continued') + view_desc = scroll_frame.replace('@', view_local_id) + # available_actions.append(ScrollEvent(view=view, direction='DOWN')) + # available_actions.append(ScrollEvent(view=view, direction='UP')) + else: + view_desc = text_frame.replace('@', view_local_id).replace('#', view_text) + if content_description: + view_desc = view_desc.replace('&', content_description) + else: + view_desc = view_desc.replace(" text='&'", "") + # available_actions.append(TouchEvent(view=view)) + + allowed_actions = ['touch'] + special_attrs = [] + if editable: + allowed_actions.append('set_text') + if checkable: + allowed_actions.extend(['select', 'unselect']) + allowed_actions.remove('touch') + if scrollable: + allowed_actions.extend(['scroll up', 'scroll down']) + allowed_actions.remove('touch') + if long_clickable: + allowed_actions.append('long_touch') + if checked or selected: + special_attrs.append('selected') + view['allowed_actions'] = allowed_actions + view['special_attrs'] = special_attrs + view['local_id'] = view_local_id + if len(special_attrs) > 0: + special_attrs = ','.join(special_attrs) + view_desc = view_desc.replace("attr=null", f"attr={special_attrs}") + else: + view_desc = view_desc.replace(" attr=null", "") + view_desc = view_desc.replace("bounds=null", f"bound_box={view_bounds}") + view_descs.append(view_desc) + view['desc'] = view_desc.replace(f' id={view_local_id}', '').replace(f' attr={special_attrs}', '') + indexed_views.append(view) + + # prefix = 'The current state has the following UI elements: \n' #views and corresponding actions, with action id in parentheses:\n ' + state_desc = '\n'.join(view_descs) + activity = self.foreground_activity.split('/')[-1] + # print(views_without_id) + return state_desc, activity, indexed_views + + def _get_self_ancestors_property(self, view, key, default=None): + all_views = [view] + [self.views[i] for i in self.get_all_ancestors(view)] + for v in all_views: + value = self.__safe_dict_get(v, key) + if value: + return value + return default + + def _merge_text(self, children_ids): + texts, content_descriptions = [], [] + for childid in children_ids: + if not self.__safe_dict_get(self.views[childid], 'visible') or \ + self.__safe_dict_get(self.views[childid], 'resource_id') in \ + ['android:id/navigationBarBackground', + 'android:id/statusBarBackground']: + # if the successor is not visible, then ignore it! + continue + + text = self.__safe_dict_get(self.views[childid], 'text', default='') + if len(text) > 50: + text = text[:50] + + if text != '': + # text = text + ' {'+ str(childid)+ '}' + texts.append(text) + + content_description = self.__safe_dict_get(self.views[childid], 'content_description', default='') + if len(content_description) > 50: + content_description = content_description[:50] + + if content_description != '': + content_descriptions.append(content_description) + + merged_text = '
'.join(texts) if len(texts) > 0 else '' + merged_desc = '
'.join(content_descriptions) if len(content_descriptions) > 0 else '' + return merged_text, merged_desc + diff --git a/droidbot/input_event.py b/droidbot/input_event.py index 07373c96..2ef54467 100644 --- a/droidbot/input_event.py +++ b/droidbot/input_event.py @@ -74,6 +74,8 @@ KEY_ExitEvent = "exit" KEY_TouchEvent = "touch" KEY_LongTouchEvent = "long_touch" +KEY_SelectEvent = "select" +KEY_UnselectEvent = "unselect" KEY_SwipeEvent = "swipe" KEY_ScrollEvent = "scroll" KEY_SetTextEvent = "set_text" @@ -135,6 +137,8 @@ def from_dict(event_dict): return TouchEvent(event_dict=event_dict) elif event_type == KEY_LongTouchEvent: return LongTouchEvent(event_dict=event_dict) + elif event_type == KEY_SelectEvent or event_type == KEY_UnselectEvent: + return SelectEvent(event_dict=event_dict) elif event_type == KEY_SwipeEvent: return SwipeEvent(event_dict=event_dict) elif event_type == KEY_ScrollEvent: @@ -483,6 +487,44 @@ def get_views(self): return [self.view] if self.view else [] +class SelectEvent(UIEvent): + """ + select a checkbox + """ + + def __init__(self, event_type=KEY_SelectEvent, x=None, y=None, view=None, event_dict=None): + super().__init__() + self.event_type = event_type + self.x = x + self.y = y + self.view = view + if event_dict is not None: + self.__dict__.update(event_dict) + + def send(self, device): + x, y = UIEvent.get_xy(x=self.x, y=self.y, view=self.view) + if 'special_attr' in self.view: + if self.event_type == KEY_UnselectEvent and 'selected' in self.view['special_attr']: + device.view_long_touch(x=x, y=y, duration=200) + elif self.event_type == KEY_SelectEvent and 'selected' not in self.view['special_attr']: + device.view_long_touch(x=x, y=y, duration=200) + else: + device.view_long_touch(x=x, y=y, duration=200) + return True + + def get_event_str(self, state): + if self.view is not None: + return f"{self.__class__.__name__}(type={self.event_type}, {UIEvent.view_str(state, self.view)})" + elif self.x is not None and self.y is not None: + return "%s(type=%s, state=%s, x=%s, y=%s)" % (self.event_type, self.__class__.__name__, state.state_str, self.x, self.y) + else: + msg = "Invalid %s!" % self.__class__.__name__ + raise InvalidEventException(msg) + + def get_views(self): + return [self.view] if self.view else [] + + class LongTouchEvent(UIEvent): """ a long touch on screen @@ -594,7 +636,7 @@ class ScrollEvent(UIEvent): swipe gesture """ - def __init__(self, x=None, y=None, view=None, direction="DOWN", event_dict=None): + def __init__(self, x=None, y=None, view=None, direction="down", event_dict=None): super().__init__() self.event_type = KEY_ScrollEvent self.x = x @@ -609,7 +651,7 @@ def __init__(self, x=None, y=None, view=None, direction="DOWN", event_dict=None) def get_random_instance(device, app): x = random.uniform(0, device.get_width()) y = random.uniform(0, device.get_height()) - direction = random.choice(["UP", "DOWN", "LEFT", "RIGHT"]) + direction = random.choice(["up", "down", "left", "right"]) return ScrollEvent(x, y, direction) def send(self, device): diff --git a/droidbot/input_manager.py b/droidbot/input_manager.py index 0ef7a052..13a6c8c3 100644 --- a/droidbot/input_manager.py +++ b/droidbot/input_manager.py @@ -9,7 +9,7 @@ ManualPolicy, \ POLICY_NAIVE_DFS, POLICY_GREEDY_DFS, \ POLICY_NAIVE_BFS, POLICY_GREEDY_BFS, \ - POLICY_REPLAY, POLICY_MEMORY_GUIDED, \ + POLICY_REPLAY, POLICY_MEMORY_GUIDED, POLICY_LLM_GUIDED, \ POLICY_MANUAL, POLICY_MONKEY, POLICY_NONE DEFAULT_POLICY = POLICY_GREEDY_DFS @@ -75,6 +75,9 @@ def get_input_policy(self, device, app, master): elif self.policy_name == POLICY_MEMORY_GUIDED: from .input_policy2 import MemoryGuidedPolicy input_policy = MemoryGuidedPolicy(device, app, self.random_input) + elif self.policy_name == POLICY_LLM_GUIDED: + from .input_policy3 import LLM_Guided_Policy + input_policy = LLM_Guided_Policy(device, app, self.random_input) elif self.policy_name == POLICY_REPLAY: input_policy = UtgReplayPolicy(device, app, self.replay_output) elif self.policy_name == POLICY_MANUAL: diff --git a/droidbot/input_policy.py b/droidbot/input_policy.py index 0af0266e..547a18f4 100644 --- a/droidbot/input_policy.py +++ b/droidbot/input_policy.py @@ -33,6 +33,7 @@ POLICY_MONKEY = "monkey" POLICY_NONE = "none" POLICY_MEMORY_GUIDED = "memory_guided" # implemented in input_policy2 +POLICY_LLM_GUIDED = "llm_guided" # implemented in input_policy3 class InputInterruptedException(Exception):