@inproceedings{64823,
  abstract     = {{Current legal frameworks enforce that Android developers accurately report the data their apps collect. However, large codebases can make this reporting challenging. This paper employs an empirical approach to understand developers' experience with Google Play Store's Data Safety Section (DSS) form.

We first survey 41 Android developers to understand how they categorize privacy-related data into DSS categories and how confident they feel when completing the DSS form. To gain a broader and more detailed view of the challenges developers encounter during the process, we complement the survey with an analysis of 172 online developer discussions, capturing the perspectives of 642 additional developers. Together, these two data sources represent insights from 683 developers.

Our findings reveal that developers often manually classify the privacy-related data their apps collect into the data categories defined by Google-or, in some cases, omit classification entirely-and rely heavily on existing online resources when completing the form. Moreover, developers are generally confident in recognizing the data their apps collect, yet they lack confidence in translating this knowledge into DSS-compliant disclosures. Key challenges include issues in identifying privacy-relevant data to complete the form, limited understanding of the form, and concerns about app rejection due to discrepancies with Google's privacy requirements.
These results underscore the need for clearer guidance and more accessible tooling to support developers in meeting privacy-aware reporting obligations. }},
  author       = {{Khedkar, Mugdha and Schlichtig, Michael and Soliman, Mohamed Aboubakr Mohamed and Bodden, Eric}},
  booktitle    = {{Proceedings of the IEEE/ACM 13th International Conference on Mobile Software Engineering and Systems (MOBILESoft '26). Association for Computing Machinery, New York, NY, USA, 65–68.}},
  keywords     = {{static analysis, data collection, data protection, privacy-aware reporting}},
  location     = {{Rio de Janeiro, Brazil}},
  title        = {{{Challenges in Android Data Disclosure: An Empirical Study.}}},
  year         = {{2026}},
}

@misc{52663,
  abstract     = {{Context
Static analyses are well-established to aid in understanding bugs or vulnerabilities during the development process or in large-scale studies. A low false-positive rate is essential for the adaption in practice and for precise results of empirical studies. Unfortunately, static analyses tend to report where a vulnerability manifests rather than the fix location. This can cause presumed false positives or imprecise results.
Method
To address this problem, we designed an adaption of an existing static analysis algorithm that can distinguish between a manifestation and fix location, and reports error chains. An error chain represents at least two interconnected errors that occur successively, thus building the connection between the fix and manifestation location. We used our tool CogniCryptSUBS for a case study on 471 GitHub repositories, a performance benchmark to compare different analysis configurations, and conducted an expert interview.
Result
We found that 50 % of the projects with a report had at least one error chain. Our runtime benchmark demonstrated that our improvement caused only a minimal runtime overhead of less than 4 %. The results of our expert interview indicate that with our adapted version participants require fewer executions of the analysis.
Conclusion
Our results indicate that error chains occur frequently in real-world projects, and ignoring them can lead to imprecise evaluation results. The runtime benchmark indicates that our tool is a feasible and efficient solution for detecting error chains in real-world projects. Further, our results gave a hint that the usability of static analyses may benefit from supporting error chains.}},
  author       = {{Wickert, Anna-Katharina and Schlichtig, Michael and Vogel, Marvin and Winter, Lukas and Mezini, Mira and Bodden, Eric}},
  keywords     = {{Static analysis, error chains, false positive re- duction, empirical studies}},
  title        = {{{Supporting Error Chains in Static Analysis for Precise Evaluation Results and Enhanced Usability}}},
  year         = {{2024}},
}

@inbook{52662,
  abstract     = {{Static analysis tools support developers in detecting potential coding issues, such as bugs or vulnerabilities. Research emphasizes technical challenges of such tools but also mentions severe usability shortcomings. These shortcomings hinder the adoption of static analysis tools, and user dissatisfaction may even lead to tool abandonment. To comprehensively assess the state of the art, we present the first systematic usability evaluation of a wide range of static analysis tools. We derived a set of 36 relevant criteria from the literature and used them to evaluate a total of 46 static analysis tools complying with our inclusion and exclusion criteria - a representative set of mainly non-proprietary tools. The evaluation against the usability criteria in a multiple-raters approach shows that two thirds of the considered tools off er poor warning messages, while about three-quarters provide hardly any fix support. Furthermore, the integration of user knowledge is strongly neglected, which could be used for instance, to improve handling of false positives. Finally, issues regarding workflow integration and specialized user interfaces are revealed. These findings should prove useful in guiding and focusing further research and development in user experience for static code analyses.}},
  author       = {{Nachtigall, Marcus and Schlichtig, Michael and Bodden, Eric}},
  booktitle    = {{Software Engineering 2023}},
  isbn         = {{978-3-88579-726-5}},
  keywords     = {{Automated static analysis, Software usability}},
  pages        = {{95–96}},
  publisher    = {{Gesellschaft für Informatik e.V.}},
  title        = {{{Evaluation of Usability Criteria Addressed by Static Analysis Tools on a Large Scale}}},
  year         = {{2023}},
}

@inbook{52660,
  abstract     = {{Application Programming Interfaces (APIs) are the primary mechanism developers use to obtain access to third-party algorithms and services. Unfortunately, APIs can be misused, which can have catastrophic consequences, especially if the APIs provide security-critical functionalities like cryptography. Understanding what API misuses are, and how they are caused, is important to prevent them, eg, with API misuse detectors. However, definitions for API misuses and related terms in literature vary. This paper presents a systematic literature review to clarify these terms and introduces FUM, a novel Framework for API Usage constraint and Misuse classification. The literature review revealed that API misuses are violations of API usage constraints. To address this, we provide unified definitions and use them to derive FUM. To assess the extent to which FUM aids in determining and guiding the improvement of an API misuses detector’s capabilities, we performed a case study on the state-of the-art misuse detection tool CogniCrypt. The study showed that FUM can be used to properly assess CogniCrypt’s capabilities, identify weaknesses and assist in deriving mitigations and improvements.}},
  author       = {{Schlichtig, Michael and Sassalla, Steffen and Narasimhan, Krishna and Bodden, Eric}},
  booktitle    = {{Software Engineering 2023}},
  isbn         = {{978-3-88579-726-5}},
  keywords     = {{API misuses  API usage constraints, classification framework, API misuse detection, static analysis}},
  pages        = {{105–106}},
  publisher    = {{Gesellschaft für Informatik e.V.}},
  title        = {{{Introducing FUM: A Framework for API Usage Constraint and Misuse Classification}}},
  year         = {{2023}},
}

@inproceedings{44146,
  abstract     = {{Many Android applications collect data from users. When they do, they must
protect this collected data according to the current legal frameworks. Such
data protection has become even more important since the European Union rolled
out the General Data Protection Regulation (GDPR). App developers have limited
tool support to reason about data protection throughout their app development
process. Although many Android applications state a privacy policy, privacy
policy compliance checks are currently manual, expensive, and prone to error.
One of the major challenges in privacy audits is the significant gap between
legal privacy statements (in English text) and technical measures that Android
apps use to protect their user's privacy. In this thesis, we will explore to
what extent we can use static analysis to answer important questions regarding
data protection. Our main goal is to design a tool based approach that aids app
developers and auditors in ensuring data protection in Android applications,
based on automated static program analysis.}},
  author       = {{Khedkar, Mugdha}},
  booktitle    = {{2023 IEEE/ACM 45th International Conference on Software Engineering: Companion Proceedings (ICSE-Companion), Melbourne, Australia, 2023, pp. 197-199}},
  keywords     = {{static analysis, data protection and privacy, GDPR compliance}},
  title        = {{{Static Analysis for Android GDPR Compliance Assurance}}},
  doi          = {{10.1109/ICSE-Companion58688.2023.00054}},
  year         = {{2023}},
}

@inproceedings{36522,
  abstract     = {{Jupyter notebooks enable developers to interleave code snippets with rich-text and in-line visualizations. Data scientists use Jupyter notebook as the de-facto standard for creating and sharing machine-learning based solutions, primarily written in Python. Recent studies have demonstrated, however, that a large portion of Jupyter notebooks available on public platforms are undocumented and lacks a narrative structure. This reduces the readability of these notebooks. To address this shortcoming, this paper presents HeaderGen, a novel tool-based approach that automatically annotates code cells with categorical markdown headers based on a taxonomy of machine-learning operations, and classifies and displays function calls according to this taxonomy. For this functionality to be realized, HeaderGen enhances an existing call graph analysis in PyCG. To improve precision, HeaderGen extends PyCG's analysis with support for handling external library code and flow-sensitivity. The former is realized by facilitating the resolution of function return-types. Furthermore, HeaderGen uses type information to perform pattern matching on code syntax to annotate code cells.
The evaluation on 15 real-world Jupyter notebooks from Kaggle shows that HeaderGen's underlying call graph analysis yields high accuracy (96.4% precision and 95.9% recall). This is because HeaderGen can resolve return-types of external libraries where existing type inference tools such as pytype (by Google), pyright (by Microsoft), and Jedi fall short. The header generation has a precision of 82.2% and a recall rate of 96.8% with regard to headers created manually by experts. In a user study, HeaderGen helps participants finish comprehension and navigation tasks faster. All participants clearly perceive HeaderGen as useful to their task.}},
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Wang, Jiawei and Li, Li and Bodden, Eric}},
  keywords     = {{static analysis, python, code comprehension, annotation, literate programming, jupyter notebook}},
  publisher    = {{IEEE SANER 2023 (International Conference on Software Analysis, Evolution and Reengineering)}},
  title        = {{{Enhancing Comprehension and Navigation in Jupyter Notebooks with Static Analysis}}},
  doi          = {{10.48550/ARXIV.2301.04419}},
  year         = {{2023}},
}

@misc{32409,
  abstract     = {{Context: Cryptographic APIs are often misused in real-world applications. Therefore, many cryptographic API misuse detection tools have been introduced. However, there exists no established reference benchmark for a fair and comprehensive comparison and evaluation of these tools. While there are benchmarks, they often only address a subset of the domain or were only used to evaluate a subset of existing misuse detection tools. Objective: To fairly compare cryptographic API misuse detection tools and to drive future development in this domain, we will devise such a benchmark. Openness and transparency in the generation process are key factors to fairly generate and establish the needed benchmark. Method: We propose an approach where we derive the benchmark generation methodology from the literature which consists of general best practices in benchmarking and domain-specific benchmark generation. A part of this methodology is transparency and openness of the generation process, which is achieved by pre-registering this work. Based on our methodology we design CamBench, a fair "Cryptographic API Misuse Detection Tool Benchmark Suite". We will implement the first version of CamBench limiting the domain to Java, the JCA, and static analyses. Finally, we will use CamBench to compare current misuse detection tools and compare CamBench to related benchmarks of its domain.}},
  author       = {{Schlichtig, Michael and Wickert, Anna-Katharina and Krüger, Stefan and Bodden, Eric and Mezini, Mira}},
  keywords     = {{cryptography, benchmark, API misuse, static analysis}},
  title        = {{{CamBench -- Cryptographic API Misuse Detection Tool Benchmark Suite}}},
  doi          = {{10.48550/ARXIV.2204.06447}},
  year         = {{2022}},
}

@inproceedings{32410,
  abstract     = {{Static analysis tools support developers in detecting potential coding issues, such as bugs or vulnerabilities. Research on static analysis emphasizes its technical challenges but also mentions severe usability shortcomings. These shortcomings hinder the adoption of static analysis tools, and in some cases, user dissatisfaction even leads to tool abandonment.
To comprehensively assess the current state of the art, this paper presents the first systematic usability evaluation in a wide range of static analysis tools. We derived a set of 36 relevant criteria from the scientific literature and gathered a collection of 46 static analysis tools complying with our inclusion and exclusion criteria - a representative set of mainly non-proprietary tools. Then, we evaluated how well these tools fulfill the aforementioned criteria.
The evaluation shows that more than half of the considered tools offer poor warning messages, while about three-quarters of the tools provide hardly any fix support. Furthermore, the integration of user knowledge is strongly neglected, which could be used for improved handling of false positives and tuning the results for the corresponding developer. Finally, issues regarding workflow integration and specialized user interfaces are proved further.
These findings should prove useful in guiding and focusing further research and development in the area of user experience for static code analyses.}},
  author       = {{Nachtigall, Marcus and Schlichtig, Michael and Bodden, Eric}},
  booktitle    = {{Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis}},
  isbn         = {{9781450393799}},
  keywords     = {{Automated static analysis, Software usability}},
  pages        = {{532 -- 543}},
  publisher    = {{ACM}},
  title        = {{{A Large-Scale Study of Usability Criteria Addressed by Static Analysis Tools}}},
  doi          = {{10.1145/3533767}},
  year         = {{2022}},
}

@inproceedings{31133,
  abstract     = {{Application Programming Interfaces (APIs) are the primary mechanism that developers use to obtain access to third-party algorithms and services. Unfortunately, APIs can be misused, which can have catastrophic consequences, especially if the APIs provide security-critical functionalities like cryptography. Understanding what API misuses are, and for what reasons they are caused, is important to prevent them, e.g., with API misuse detectors. However, definitions and nominations for API misuses and related terms in literature vary and are diverse. This paper addresses the problem of scattered knowledge and definitions of API misuses by presenting a systematic literature review on the subject and introducing FUM, a novel Framework for API Usage constraint and Misuse classification. The literature review revealed that API misuses are violations of API usage constraints. To capture this, we provide unified definitions and use them to derive FUM. To assess the extent to which FUM aids in determining and guiding the improvement of an API misuses detectors' capabilities, we performed a case study on CogniCrypt, a state-of-the-art misuse detector for cryptographic APIs. The study showed that FUM can be used to properly assess CogniCrypt's capabilities, identify weaknesses and assist in deriving mitigations and improvements. And it appears that also more generally FUM can aid the development and improvement of misuse detection tools.}},
  author       = {{Schlichtig, Michael and Sassalla, Steffen and Narasimhan, Krishna and Bodden, Eric}},
  booktitle    = {{2022 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)}},
  keywords     = {{API misuses, API usage constraints, classification framework, API misuse detection, static analysis}},
  pages        = {{673 -- 684}},
  title        = {{{FUM - A Framework for API Usage constraint and Misuse Classification}}},
  doi          = {{https://doi.org/10.1109/SANER53432.2022.00085}},
  year         = {{2022}},
}

@article{30511,
  abstract     = {{<jats:title>Abstract</jats:title><jats:p>Many critical codebases are written in C, and most of them use preprocessor directives to encode variability, effectively encoding software product lines. These preprocessor directives, however, challenge any static code analysis. SPLlift, a previously presented approach for analyzing software product lines, is limited to Java programs that use a rather simple feature encoding and to analysis problems with a finite and ideally small domain. Other approaches that allow the analysis of real-world C software product lines use special-purpose analyses, preventing the reuse of existing analysis infrastructures and ignoring the progress made by the static analysis community. This work presents <jats:sc>VarAlyzer</jats:sc>, a novel static analysis approach for software product lines. <jats:sc>VarAlyzer</jats:sc> first transforms preprocessor constructs to plain C while preserving their variability and semantics. It then solves any given distributive analysis problem on transformed product lines in a variability-aware manner. <jats:sc>VarAlyzer</jats:sc> ’s analysis results are annotated with feature constraints that encode in which configurations each result holds. Our experiments with 95 compilation units of OpenSSL show that applying <jats:sc>VarAlyzer</jats:sc> enables one to conduct inter-procedural, flow-, field- and context-sensitive data-flow analyses on entire product lines for the first time, outperforming the product-based approach for highly-configurable systems.</jats:p>}},
  author       = {{Schubert, Philipp and Gazzillo, Paul and Patterson, Zach and Braha, Julian and Schiebel, Fabian Benedikt and Hermann, Ben and Wei, Shiyi and Bodden, Eric}},
  issn         = {{0928-8910}},
  journal      = {{Automated Software Engineering}},
  keywords     = {{inter-procedural static analysis, software product lines, preprocessor, LLVM, C/C++}},
  number       = {{1}},
  publisher    = {{Springer Science and Business Media LLC}},
  title        = {{{Static data-flow analysis for software product lines in C}}},
  doi          = {{10.1007/s10515-022-00333-1}},
  volume       = {{29}},
  year         = {{2022}},
}

@inproceedings{23388,
  abstract     = {{As one of the most popular programming languages, PYTHON has become a relevant target language for static analysis tools. The primary data structure for performing an inter-procedural static analysis is call-graph (CG), which links call sites to potential call targets in a program. There exists multiple algorithms for constructing callgraphs, tailored to specific languages. However, comparatively few implementations target PYTHON. Moreover, there is still lack of empirical evidence as to how these few algorithms perform in terms of precision and recall. This paper thus presents EVAL_CG, an extensible framework for comparative analysis of Python call-graphs. We conducted two experiments which run the CG algorithms on different Python programming constructs and real-world applications. In both experiments, we evaluate three CG generation frameworks namely, Code2flow, Pyan, and Wala. We record precision, recall, and running time, and identify sources of unsoundness of each framework. Our evaluation shows that none of the current CG construction frameworks produce a sound CG. Moreover, the static CGs contain many spurious edges. Code2flow is also comparatively slow. Hence, further research is needed to support CG generation for Python programs.}},
  author       = {{Kummita, Sriteja and Piskachev, Goran and Spaeth, Johannes and Bodden, Eric}},
  booktitle    = {{Proceedings of the 2021 International Conference on Code Quality (ICCQ)}},
  isbn         = {{978-1-7281-8477-7}},
  keywords     = {{Static Analysis, Callgraph Analysis, Python, Qualitative Analysis, Quantitative Analysis, Empirical Evaluation}},
  location     = {{Virtual}},
  title        = {{{Qualitative and Quantitative Analysis of Callgraph Algorithms for PYTHON}}},
  doi          = {{10.1109/ICCQ51190.2021.9392986}},
  year         = {{2021}},
}

@article{20533,
  author       = {{Krüger, Stefan and Späth, Johannes and Ali, Karim and Bodden, Eric and Mezini, Mira}},
  issn         = {{2326-3881}},
  journal      = {{IEEE Transactions on Software Engineering}},
  keywords     = {{Java, Encryption, Static analysis, Tools, Ciphers, Semantics, cryptography, domain-specific language, static analysis}},
  pages        = {{1--1}},
  title        = {{{CrySL: An Extensible Approach to Validating the Correct Usage of Cryptographic APIs}}},
  doi          = {{10.1109/TSE.2019.2948910}},
  year         = {{2019}},
}

@techreport{23389,
  abstract     = {{Background - Software companies increasingly rely on static analysis tools to detect potential bugs and security vulnerabilities in their software products. In the past decade, more and more commercial and open-source static analysis tools have been developed and are maintained. Each tool comes with its own reporting format, preventing an easy integration of multiple analysis tools in a single interface, such as the Static Analysis Server Protocol (SASP). In 2017, a collaborative effort in industry, including Microsoft and GrammaTech, has proposed the Static Analysis Results Interchange Format (SARIF) to address this issue. SARIF is a standardized format in which static analysis warnings can be encoded, to allow the import and export of analysis reports between different tools.
Purpose - This paper explains the SARIF format through examples and presents a proof of concept of the connector that allows the static analysis tool CogniCrypt to generate and export its results in SARIF format.
Design/Approach - We conduct a cross-sectional study between the SARIF format and CogniCrypt's output format before detailing the implementation of the connector. The study aims to find the components of interest in CogniCrypt that the SARIF export module can complete.
Originality/Value - The integration of SARIF into CogniCrypt described in this paper can be reused to integrate SARIF into other static analysis tools.
Conclusion - After detailing the SARIF format, we present an initial implementation to integrate SARIF into CogniCrypt. After taking advantage of all the features provided by SARIF, CogniCrypt will be able to support SASP.}},
  author       = {{Kummita, Sriteja and Piskachev, Goran}},
  keywords     = {{Static Analysis, Static Analysis Results Interchange Format, SARIF, Static Analysis Server Protocol, SASP}},
  title        = {{{Integration of the Static Analysis Results Interchange Format in CogniCrypt}}},
  year         = {{2019}},
}

@article{20543,
  author       = {{Nguyen Quang Do, Lisa and Krüger, Stefan and Hill, Patrick and Ali, Karim and Bodden, Eric}},
  issn         = {{2326-3881}},
  journal      = {{IEEE Transactions on Software Engineering}},
  keywords     = {{Debugging, Static analysis, Tools, Computer bugs, Standards, Writing, Encoding, Testing and Debugging, Program analysis, Development tools, Integrated environments, Graphical environments, Usability testing}},
  pages        = {{1--1}},
  title        = {{{Debugging Static Analysis}}},
  doi          = {{10.1109/TSE.2018.2868349}},
  year         = {{2018}},
}

@article{20557,
  author       = {{Lillack, Max and Kästner, Christian and Bodden, Eric}},
  issn         = {{0098-5589}},
  journal      = {{IEEE Transactions on Software Engineering}},
  keywords     = {{Androids, Bluetooth, Humanoid robots, Java, Software, Tools, Configuration options, Static analysis, Variability mining}},
  number       = {{99}},
  pages        = {{1--1}},
  title        = {{{Tracking Load-time Configuration Options}}},
  doi          = {{10.1109/TSE.2017.2756048}},
  volume       = {{PP}},
  year         = {{2017}},
}

@inproceedings{20559,
  author       = {{Do, Lisa Nguyen Quang and Ali, Karim and Livshits, Benjamin and Bodden, Eric and Smith, Justin and Murphy-Hill, Emerson}},
  booktitle    = {{Proceedings of the 26th ACM SIGSOFT International Symposium on Software Testing and Analysis}},
  isbn         = {{978-1-4503-5076-1}},
  keywords     = {{Just-in-Time, Layered analysis, Static analysis}},
  pages        = {{307--317}},
  publisher    = {{ACM}},
  title        = {{{Just-in-time Static Analysis}}},
  doi          = {{10.1145/3092703.3092705}},
  year         = {{2017}},
}

