@article{63834,
  abstract     = {{<jats:title>Abstract</jats:title>
                  <jats:p>
                    Many Android apps collect data from users, and the European Union’s General Data Protection Regulation (GDPR) mandates clear disclosures of such data collection. However, apps often use third-party code, complicating accurate disclosures. This paper investigates how accurately current Android apps fulfill these requirements. In this work, we present a multi-layered definition of privacy-related data to correctly report data collection in Android apps. We further create a dataset of privacy-sensitive data classes that may be used as input by an Android app. This dataset takes into account data collected both through the user interface and system APIs. Based on this, we implement a semi-automated prototype that detects and labels privacy-related data collected by a given Android app. We manually examine the data safety sections of 70 Android apps to observe how data collection is reported, identifying instances of over- and under-reporting. We compare our prototype’s results with the data safety sections of 20 apps revealing reporting discrepancies. Using the results from two Messaging and Social Media apps (Signal and Instagram), we discuss how app developers under-report and over-report data collection, respectively, and identify inaccurately reported data categories. A broader study of 7,500 Android apps reveals that apps most frequently collect data that can
                    <jats:italic>partially identify</jats:italic>
                    users. Although system APIs consistently collect large amounts of privacy-related data, user interfaces exhibit some more diverse data collection patterns. A more focused study on various domains of apps reveals that the largest fraction of apps collecting personal data belong to the domain of
                    <jats:italic>Messaging and Social Media</jats:italic>
                    . Our findings show that location is collected frequently by apps, specially from the
                    <jats:italic>E-commerce and Shopping</jats:italic>
                    domain. However, it is often under-reported in app data safety sections. Our results highlight the need for greater consistency in privacy-aware app development and reporting practices.
                  </jats:p>}},
  author       = {{Khedkar, Mugdha and Kumar Mondal, Ambuj and Bodden, Eric}},
  issn         = {{0928-8910}},
  journal      = {{Automated Software Engineering}},
  number       = {{2}},
  publisher    = {{Springer Science and Business Media LLC}},
  title        = {{{A study of privacy-related data collected by Android apps}}},
  doi          = {{10.1007/s10515-025-00589-3}},
  volume       = {{33}},
  year         = {{2026}},
}

@inproceedings{64823,
  abstract     = {{Current legal frameworks enforce that Android developers accurately report the data their apps collect. However, large codebases can make this reporting challenging. This paper employs an empirical approach to understand developers' experience with Google Play Store's Data Safety Section (DSS) form.

We first survey 41 Android developers to understand how they categorize privacy-related data into DSS categories and how confident they feel when completing the DSS form. To gain a broader and more detailed view of the challenges developers encounter during the process, we complement the survey with an analysis of 172 online developer discussions, capturing the perspectives of 642 additional developers. Together, these two data sources represent insights from 683 developers.

Our findings reveal that developers often manually classify the privacy-related data their apps collect into the data categories defined by Google-or, in some cases, omit classification entirely-and rely heavily on existing online resources when completing the form. Moreover, developers are generally confident in recognizing the data their apps collect, yet they lack confidence in translating this knowledge into DSS-compliant disclosures. Key challenges include issues in identifying privacy-relevant data to complete the form, limited understanding of the form, and concerns about app rejection due to discrepancies with Google's privacy requirements.
These results underscore the need for clearer guidance and more accessible tooling to support developers in meeting privacy-aware reporting obligations. }},
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Soliman, Mohamed Aboubakr Mohamed and Bodden, Eric}},
  booktitle    = {{Proceedings of the IEEE/ACM 13th International Conference on Mobile Software Engineering and Systems (MOBILESoft '26). Association for Computing Machinery, New York, NY, USA, 65–68.}},
  keywords     = {{static analysis, data collection, data protection, privacy-aware reporting}},
  location     = {{Rio de Janeiro, Brazil}},
  title        = {{{Challenges in Android Data Disclosure: An Empirical Study.}}},
  year         = {{2026}},
}

@article{64821,
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Atakishiyev, Nihad and Bodden, Eric}},
  journal      = {{Automated Software Engineering }},
  number       = {{2}},
  publisher    = {{Springer US}},
  title        = {{{Between Law and Code: Challenges and Opportunities for Automating Privacy Assessments}}},
  doi          = {{10.1007/s10515-026-00601-4}},
  volume       = {{33}},
  year         = {{2026}},
}

@inproceedings{64909,
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Bodden, Eric}},
  booktitle    = {{IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER 2026)}},
  title        = {{{Source Code-Driven GDPR Documentation: Supporting RoPA with Assessor View}}},
  year         = {{2026}},
}

@unpublished{65018,
  abstract     = {{Android applications collecting data from users must protect it according to the current legal frameworks. Such data protection has become even more important since in 2018 the European Union rolled out the General Data Protection Regulation (GDPR). Since app developers are not legal experts, they find it difficult to integrate privacy-aware practices into source code development. Despite these legal obligations, developers have limited tool support to reason about data protection throughout their app development process.
  This paper explores the use of static program slicing and software visualization to analyze privacy-relevant data flows in Android apps. We introduce SliceViz, a web tool that analyzes an Android app by slicing all privacy-relevant data sources detected in the source code on the back-end. It then helps developers by visualizing these privacy-relevant program slices.
  We conducted a user study with 12 participants demonstrating that SliceViz effectively aids developers in identifying privacy-relevant properties in Android apps.
  Our findings indicate that program slicing can be employed to identify and reason about privacy-relevant data flows in Android applications. With further usability improvements, developers can be better equipped to handle privacy-sensitive information.}},
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Mohan, Santhosh and Bodden, Eric}},
  booktitle    = {{arXiv:2503.16640}},
  title        = {{{Visualizing Privacy-Relevant Data Flows in Android Applications}}},
  year         = {{2025}},
}

@inproceedings{52235,
  abstract     = {{Android applications collecting data from users must protect it according to the current legal frameworks. Such data protection has become even more important since the European Union rolled out the General Data Protection Regulation (GDPR). Since app developers are not legal experts, they find it difficult to write privacy-aware source code. Moreover, they have limited tool support to reason about data protection throughout their app development process.
This paper motivates the need for a static analysis approach to diagnose and explain data protection in Android apps. The analysis will recognize personal data sources in the source code, and aims to further examine the data flow originating from these sources. App developers can then address key questions about data manipulation, derived data, and the presence of technical measures. Despite challenges, we explore to what extent one can realize this analysis through static taint analysis, a common method for identifying security vulnerabilities. This is a first step towards designing a tool-based approach that aids app developers and assessors in ensuring data protection in Android apps, based on automated static program analysis. }},
  author       = {{Khedkar, Mugdha and Bodden, Eric}},
  booktitle    = {{Proceedings of the IEEE/ACM 11th International Conference on Mobile Software Engineering and Systems (MOBILESoft '24). Association for Computing Machinery, New York, NY, USA, 65–68.}},
  keywords     = {{static program analysis, data protection and privacy, GDPR compliance}},
  location     = {{Lisbon, Portugal}},
  title        = {{{Toward an Android Static Analysis Approach for Data Protection}}},
  doi          = {{10.1145/3647632.3651389}},
  year         = {{2024}},
}

@inproceedings{56137,
  abstract     = {{Many Android applications collect data from users. The European Union's General Data Protection Regulation (GDPR) requires vendors to faithfully disclose which data their apps collect. This task is complicated because many apps use third-party code for which the same information is not readily available. Hence we ask: how accurately do current Android apps fulfill these requirements?
In this work, we first expose a multi-layered definition of privacy-related data to correctly report data collection in Android apps. We further create a dataset of privacy-sensitive data classes that may be used as input by an Android app. This dataset takes into account data collected both through the user interface and system APIs.
We manually examine the data safety sections of 70 Android apps to observe how data collection is reported, identifying instances of over- and under-reporting. Additionally, we develop a prototype to statically extract and label privacy-related data collected via app source code, user interfaces, and permissions. Comparing the prototype's results with the data safety sections of 20 apps reveals reporting discrepancies. Using the results from two Messaging and Social Media apps (Signal and Instagram), we discuss how app developers under-report and over-report data collection, respectively, and identify inaccurately reported data categories.
Our results show that app developers struggle to accurately report data collection, either due to Google's abstract definition of collected data or insufficient existing tool support. }},
  author       = {{Khedkar, Mugdha and Mondal, Ambuj Kumar and Bodden, Eric}},
  booktitle    = {{In Proceedings of the 39th IEEE/ACM International Conference on Automated Software Engineering Workshops (ASEW ’24)}},
  location     = {{Sacramento, California}},
  title        = {{{Do Android App Developers Accurately Report Collection of Privacy-Related Data?}}},
  doi          = {{10.1145/3691621.3694949}},
  year         = {{2024}},
}

@inproceedings{56140,
  abstract     = {{    Android apps collecting data from users must comply with legal frameworks to ensure data protection. This requirement has become even more important since the implementation of the General Data Protection Regulation (GDPR) by the European Union in 2018. Moreover, with the proposed Cyber Resilience Act on the horizon, stakeholders will soon need to assess software against even more stringent security and privacy standards. Effective privacy assessments require collaboration among groups with diverse expertise to function effectively as a cohesive unit.
    This paper motivates the need for an automated approach that enhances understanding of data protection in Android apps and improves communication between the various parties involved in privacy assessments. We propose the Assessor View, a tool designed to bridge the knowledge gap between these parties, facilitating more effective privacy assessments of Android applications. }},
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Bodden, Eric}},
  booktitle    = {{In Proceedings of the 39th IEEE/ACM International Conference on Automated Software Engineering Workshops (ASEW ’24)}},
  location     = {{Sacramento, California}},
  title        = {{{Advancing Android Privacy Assessments with Automation}}},
  doi          = {{10.1145/3691621.3694953}},
  year         = {{2024}},
}

@inproceedings{44146,
  abstract     = {{Many Android applications collect data from users. When they do, they must
protect this collected data according to the current legal frameworks. Such
data protection has become even more important since the European Union rolled
out the General Data Protection Regulation (GDPR). App developers have limited
tool support to reason about data protection throughout their app development
process. Although many Android applications state a privacy policy, privacy
policy compliance checks are currently manual, expensive, and prone to error.
One of the major challenges in privacy audits is the significant gap between
legal privacy statements (in English text) and technical measures that Android
apps use to protect their user's privacy. In this thesis, we will explore to
what extent we can use static analysis to answer important questions regarding
data protection. Our main goal is to design a tool based approach that aids app
developers and auditors in ensuring data protection in Android applications,
based on automated static program analysis.}},
  author       = {{Khedkar, Mugdha}},
  booktitle    = {{2023 IEEE/ACM 45th International Conference on Software Engineering: Companion Proceedings (ICSE-Companion), Melbourne, Australia, 2023, pp. 197-199}},
  keywords     = {{static analysis, data protection and privacy, GDPR compliance}},
  title        = {{{Static Analysis for Android GDPR Compliance Assurance}}},
  doi          = {{10.1109/ICSE-Companion58688.2023.00054}},
  year         = {{2023}},
}

