@article{30511, abstract = {{AbstractMany critical codebases are written in C, and most of them use preprocessor directives to encode variability, effectively encoding software product lines. These preprocessor directives, however, challenge any static code analysis. SPLlift, a previously presented approach for analyzing software product lines, is limited to Java programs that use a rather simple feature encoding and to analysis problems with a finite and ideally small domain. Other approaches that allow the analysis of real-world C software product lines use special-purpose analyses, preventing the reuse of existing analysis infrastructures and ignoring the progress made by the static analysis community. This work presents VarAlyzer, a novel static analysis approach for software product lines. VarAlyzer first transforms preprocessor constructs to plain C while preserving their variability and semantics. It then solves any given distributive analysis problem on transformed product lines in a variability-aware manner. VarAlyzer ’s analysis results are annotated with feature constraints that encode in which configurations each result holds. Our experiments with 95 compilation units of OpenSSL show that applying VarAlyzer enables one to conduct inter-procedural, flow-, field- and context-sensitive data-flow analyses on entire product lines for the first time, outperforming the product-based approach for highly-configurable systems.}}, author = {{Schubert, Philipp and Gazzillo, Paul and Patterson, Zach and Braha, Julian and Schiebel, Fabian and Hermann, Ben and Wei, Shiyi and Bodden, Eric}}, issn = {{0928-8910}}, journal = {{Automated Software Engineering}}, keywords = {{inter-procedural static analysis, software product lines, preprocessor, LLVM, C/C++}}, number = {{1}}, publisher = {{Springer Science and Business Media LLC}}, title = {{{Static data-flow analysis for software product lines in C}}}, doi = {{10.1007/s10515-022-00333-1}}, volume = {{29}}, year = {{2022}}, } @article{33835, abstract = {{ Nowadays, an increasing number of applications uses deserialization. This technique, based on rebuilding the instance of objects from serialized byte streams, can be dangerous since it can open the application to attacks such as remote code execution (RCE) if the data to deserialize is originating from an untrusted source. Deserialization vulnerabilities are so critical that they are in OWASP’s list of top 10 security risks for web applications. This is mainly caused by faults in the development process of applications and by flaws in their dependencies, i.e., flaws in the libraries used by these applications. No previous work has studied deserialization attacks in-depth: How are they performed? How are weaknesses introduced and patched? And for how long are vulnerabilities present in the codebase? To yield a deeper understanding of this important kind of vulnerability, we perform two main analyses: one on attack gadgets, i.e., exploitable pieces of code, present in Java libraries, and one on vulnerabilities present in Java applications. For the first analysis, we conduct an exploratory large-scale study by running 256 515 experiments in which we vary the versions of libraries for each of the 19 publicly available exploits. Such attacks rely on a combination of gadgets present in one or multiple Java libraries. A gadget is a method which is using objects or fields that can be attacker-controlled. Our goal is to precisely identify library versions containing gadgets and to understand how gadgets have been introduced and how they have been patched. We observe that the modification of one innocent-looking detail in a class – such as making it public – can already introduce a gadget. Furthermore, we noticed that among the studied libraries, 37.5% are not patched, leaving gadgets available for future attacks. For the second analysis, we manually analyze 104 deserialization vulnerabilities CVEs to understand how vulnerabilities are introduced and patched in real-life Java applications. Results indicate that the vulnerabilities are not always completely patched or that a workaround solution is proposed. With a workaround solution, applications are still vulnerable since the code itself is unchanged.}}, author = {{Sayar, Imen and Bartel, Alexandre and Bodden, Eric and Le Traon, Yves}}, issn = {{1049-331X}}, journal = {{ACM Transactions on Software Engineering and Methodology}}, keywords = {{Software}}, publisher = {{Association for Computing Machinery (ACM)}}, title = {{{An In-depth Study of Java Deserialization Remote-Code Execution Exploits and Vulnerabilities}}}, doi = {{10.1145/3554732}}, year = {{2022}}, } @article{33836, author = {{Piskachev, Goran and Späth, Johannes and Budde, Ingo and Bodden, Eric}}, journal = {{Empirical Software Engineering}}, number = {{5}}, pages = {{1–33}}, publisher = {{Springer}}, title = {{{Fluently specifying taint-flow queries with fluentTQL}}}, volume = {{27}}, year = {{2022}}, } @inproceedings{33838, author = {{Krishnamurthy, Ranjith and Piskachev, Goran and Bodden, Eric}}, title = {{{To what extent can we analyze Kotlin programs using existing Java taint analysis tools?}}}, year = {{2022}}, } @inproceedings{33837, author = {{Piskachev, Goran and Dziwok, Stefan and Koch, Thorsten and Merschjohann, Sven and Bodden, Eric}}, title = {{{How far are German companies in improving security through static program analysis tools?}}}, year = {{2022}}, } @misc{33959, abstract = {{Recent studies have revealed that 87 % to 96 % of the Android apps using cryptographic APIs have a misuse which may cause security vulnerabilities. As previous studies did not conduct a qualitative examination of the validity and severity of the findings, our objective was to understand the findings in more depth. We analyzed a set of 936 open-source Java applications for cryptographic misuses. Our study reveals that 88.10 % of the analyzed applications fail to use cryptographic APIs securely. Through our manual analysis of a random sample, we gained new insights into effective false positives. For example, every fourth misuse of the frequently misused JCA class MessageDigest is an effective false positive due to its occurrence in a non-security context. As we wanted to gain deeper insights into the security implications of these misuses, we created an extensive vulnerability model for cryptographic API misuses. Our model includes previously undiscussed attacks in the context of cryptographic APIs such as DoS attacks. This model reveals that nearly half of the misuses are of high severity, e.g., hard-coded credentials and potential Man-in-the-Middle attacks.}}, author = {{Wickert, Anna-Katharina and Baumgärtner, Lars and Schlichtig, Michael and Mezini, Mira}}, title = {{{To Fix or Not to Fix: A Critical Study of Crypto-misuses in the Wild}}}, doi = {{10.48550/ARXIV.2209.11103}}, year = {{2022}}, } @article{27045, abstract = {{Due to the lack of established real-world benchmark suites for static taint analyses of Android applications, evaluations of these analyses are often restricted and hard to compare. Even in evaluations that do use real-world apps, details about the ground truth in those apps are rarely documented, which makes it difficult to compare and reproduce the results. To push Android taint analysis research forward, this paper thus recommends criteria for constructing real-world benchmark suites for this specific domain, and presents TaintBench, the first real-world malware benchmark suite with documented taint flows. TaintBench benchmark apps include taint flows with complex structures, and addresses static challenges that are commonly agreed on by the community. Together with the TaintBench suite, we introduce the TaintBench framework, whose goal is to simplify real-world benchmarking of Android taint analyses. First, a usability test shows that the framework improves experts’ performance and perceived usability when documenting and inspecting taint flows. Second, experiments using TaintBench reveal new insights for the taint analysis tools Amandroid and FlowDroid: (i) They are less effective on real-world malware apps than on synthetic benchmark apps. (ii) Predefined lists of sources and sinks heavily impact the tools’ accuracy. (iii) Surprisingly, up-to-date versions of both tools are less accurate than their predecessors.}}, author = {{Luo, Linghui and Pauck, Felix and Piskachev, Goran and Benz, Manuel and Pashchenko, Ivan and Mory, Martin and Bodden, Eric and Hermann, Ben and Massacci, Fabio}}, issn = {{1382-3256}}, journal = {{Empirical Software Engineering}}, title = {{{TaintBench: Automatic real-world malware benchmarking of Android taint analyses}}}, doi = {{10.1007/s10664-021-10013-5}}, year = {{2021}}, } @phdthesis{27158, author = {{Luo, Linghui}}, publisher = {{Universität Paderborn}}, title = {{{Improving Real-World Applicability of Static Taint Analysis}}}, year = {{2021}}, } @article{21595, author = {{Stockmann, Lars and Laux, Sven and Bodden, Eric}}, issn = {{2589-2258}}, journal = {{Journal of Automotive Software Engineering}}, title = {{{Using Architectural Runtime Verification for Offline Data Analysis}}}, doi = {{10.2991/jase.d.210205.001}}, year = {{2021}}, } @phdthesis{21596, author = {{Fischer, Andreas}}, publisher = {{Universität Paderborn}}, title = {{{Computing on Encrypted Data using Trusted Execution Environments}}}, year = {{2021}}, } @article{21597, author = {{Holzinger, Philipp and Bodden, Eric}}, journal = {{International Symposium on Advanced Security on Software and Systems (ASSS)}}, title = {{{A Systematic Hardening of Java's Information Hiding}}}, year = {{2021}}, } @article{21599, author = {{Bonifacio, Rodrigo and Krüger, Stefan and Narasimhan, Krishna and Bodden, Eric and Mezini, Mira}}, journal = {{European Conference on Object-Oriented Programming (ECOOP)}}, title = {{{Dealing with Variability in API Misuse Specification}}}, year = {{2021}}, } @inproceedings{22462, author = {{Shivarpatna Venkatesh, Ashwin Prasad and Bodden, Eric}}, booktitle = {{International Workshop on AI and Software Testing/Analysis (AISTA)}}, title = {{{Automated Cell Header Generator for Jupyter Notebooks}}}, doi = {{10.1145/3464968.3468410}}, year = {{2021}}, } @inproceedings{23374, author = {{Kummita, Sriteja and Piskachev, Goran and Spath, Johannes and Bodden, Eric}}, booktitle = {{2021 International Conference on Code Quality (ICCQ)}}, title = {{{Qualitative and Quantitative Analysis of Callgraph Algorithms for Python}}}, doi = {{10.1109/iccq51190.2021.9392986}}, year = {{2021}}, } @inproceedings{30084, author = {{Karakaya, Kadiray and Bodden, Eric}}, booktitle = {{2021 IEEE 21st International Working Conference on Source Code Analysis and Manipulation (SCAM)}}, publisher = {{IEEE}}, title = {{{SootFX: A Static Code Feature Extraction Tool for Java and Android}}}, doi = {{10.1109/scam52516.2021.00030}}, year = {{2021}}, } @inproceedings{21598, abstract = {{Static analysis is used to automatically detect bugs and security breaches, and aids compileroptimization. Whole-program analysis (WPA) can yield high precision, however causes long analysistimes and thus does not match common software-development workflows, making it often impracticalto use for large, real-world applications.This paper thus presents the design and implementation ofModAlyzer, a novel static-analysisapproach that aims at accelerating whole-program analysis by making the analysis modular andcompositional. It shows how to computelossless, persisted summaries for callgraph, points-to anddata-flow information, and it reports under which circumstances this function-level compositionalanalysis outperforms WPA.We implementedModAlyzeras an extension to LLVM and PhASAR, and applied it to 12 real-world C and C++ applications. At analysis time,ModAlyzermodularly and losslessly summarizesthe analysis effect of the library code those applications share, hence avoiding its repeated re-analysis.The experimental results show that the reuse of these summaries can save, on average, 72% ofanalysis time over WPA. Moreover, because it is lossless, the module-wise analysis fully retainsprecision and recall. Surprisingly, as our results show, it sometimes even yields precision superior toWPA. The initial summary generation, on average, takes about 3.67 times as long as WPA.}}, author = {{Schubert, Philipp and Hermann, Ben and Bodden, Eric}}, booktitle = {{European Conference on Object-Oriented Programming (ECOOP)}}, title = {{{Lossless, Persisted Summarization of Static Callgraph, Points-To and Data-Flow Analysis}}}, year = {{2021}}, } @article{31132, author = {{Dann, Andreas Peter and Plate, Henrik and Hermann, Ben and Ponta, Serena Elisa and Bodden, Eric}}, issn = {{0098-5589}}, journal = {{IEEE Transactions on Software Engineering}}, keywords = {{Software}}, pages = {{1--1}}, publisher = {{Institute of Electrical and Electronics Engineers (IEEE)}}, title = {{{Identifying Challenges for OSS Vulnerability Scanners - A Study & Test Suite}}}, doi = {{10.1109/tse.2021.3101739}}, year = {{2021}}, } @inproceedings{26407, author = {{Piskachev, Goran and Krishnamurthy, Ranjith and Bodden, Eric}}, booktitle = {{2021 IEEE 21st International Working Conference on Source Code Analysis and Manipulation (SCAM)}}, title = {{{SecuCheck: Engineering configurable taint analysis for software developers}}}, year = {{2021}}, } @inproceedings{22463, author = {{Luo, Linghui and Schäf, Martin and Sanchez, Daniel and Bodden, Eric}}, booktitle = {{Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}}, title = {{{IDE Support for Cloud-Based Static Analyses}}}, year = {{2021}}, } @inproceedings{33840, author = {{Karakaya, Kadiray and Bodden, Eric}}, booktitle = {{2021 IEEE 21st International Working Conference on Source Code Analysis and Manipulation (SCAM)}}, pages = {{181–186}}, title = {{{SootFX: A Static Code Feature Extraction Tool for Java and Android}}}, year = {{2021}}, }