@article{61108,
  abstract     = {{<jats:p>Greybox fuzzing is used extensively in research and practice. There are umpteen publications that improve greybox fuzzing. However, to what extent do these improvements affect the internal components or internals of a given fuzzer is not yet understood as the improvements are mostly evaluated using code coverage and bug finding capability. Such an evaluation is insufficient to understand the effect of improvements on the fuzzer internals. Some of the literature visualizes the outcomes of fuzzing to enhance the understanding. However, they only focus on high-level information and no previous research on visualization has been dedicated to understanding fuzzing internals.</jats:p>
          <jats:p>To close this gap, we propose the first step towards development of a fuzzing-specific visualization framework: a taxonomy of visualization analysis tasks that fuzzing experts desire to help them understand the fuzzing internals. Our approach involves conducting interviews with fuzzing experts and using qualitative data analysis to systematically extract the task taxonomy from the interview data. We also evaluate the support of existing fuzzing visualization tools through the lens of our taxonomy. In our study, we have conducted 33 interviews with fuzzing practitioners and extracted a taxonomy of 120 visualization analysis tasks. Our evaluation shows that the existing fuzzing visualization tools only provide aids to support 10 of them.</jats:p>}},
  author       = {{Kummita, Sriteja and Miao, Miao and Bodden, Eric and Wei, Shiyi}},
  issn         = {{1049-331X}},
  journal      = {{ACM Transactions on Software Engineering and Methodology}},
  publisher    = {{Association for Computing Machinery (ACM)}},
  title        = {{{Visualization Task Taxonomy to Understand the Fuzzing Internals}}},
  doi          = {{10.1145/3718346}},
  year         = {{2025}},
}

@article{61546,
  abstract     = {{<jats:p>Fuzzing is a powerful software testing technique renowned for its effectiveness in identifying software vulnerabilities. Traditional fuzzing evaluations typically focus on overall fuzzer performance across a set of target programs, yet few benchmarks consider how fine-grained program features influence fuzzing effectiveness. To bridge this gap, we introduce FeatureBench, a novel benchmark designed to generate programs with configurable, fine-grained program features to enhance fuzzing evaluations. We reviewed 25 recent grey-box fuzzing studies, extracting 7 program features related to control-flow and data-flow that can impact fuzzer performance. Using these features, we generated a benchmark consisting of 153 programs controlled by 10 fine-grained configurable parameters. We evaluated 11 fuzzers using this benchmark, with each fuzzer representing either distinct claimed improvements or serving as a widely used baseline in fuzzing evaluations. The results indicate that fuzzer performance varies significantly based on the program features and their strengths, highlighting the importance of incorporating program characteristics into fuzzing evaluations.</jats:p>}},
  author       = {{Miao, Miao and Kummita, Sriteja and Bodden, Eric and Wei, Shiyi}},
  issn         = {{2994-970X}},
  journal      = {{Proceedings of the ACM on Software Engineering}},
  number       = {{ISSTA}},
  pages        = {{527--549}},
  publisher    = {{Association for Computing Machinery (ACM)}},
  title        = {{{Program Feature-Based Benchmarking for Fuzz Testing}}},
  doi          = {{10.1145/3728899}},
  volume       = {{2}},
  year         = {{2025}},
}

@inproceedings{23388,
  abstract     = {{As one of the most popular programming languages, PYTHON has become a relevant target language for static analysis tools. The primary data structure for performing an inter-procedural static analysis is call-graph (CG), which links call sites to potential call targets in a program. There exists multiple algorithms for constructing callgraphs, tailored to specific languages. However, comparatively few implementations target PYTHON. Moreover, there is still lack of empirical evidence as to how these few algorithms perform in terms of precision and recall. This paper thus presents EVAL_CG, an extensible framework for comparative analysis of Python call-graphs. We conducted two experiments which run the CG algorithms on different Python programming constructs and real-world applications. In both experiments, we evaluate three CG generation frameworks namely, Code2flow, Pyan, and Wala. We record precision, recall, and running time, and identify sources of unsoundness of each framework. Our evaluation shows that none of the current CG construction frameworks produce a sound CG. Moreover, the static CGs contain many spurious edges. Code2flow is also comparatively slow. Hence, further research is needed to support CG generation for Python programs.}},
  author       = {{Kummita, Sriteja and Piskachev, Goran and Spaeth, Johannes and Bodden, Eric}},
  booktitle    = {{Proceedings of the 2021 International Conference on Code Quality (ICCQ)}},
  isbn         = {{978-1-7281-8477-7}},
  keywords     = {{Static Analysis, Callgraph Analysis, Python, Qualitative Analysis, Quantitative Analysis, Empirical Evaluation}},
  location     = {{Virtual}},
  title        = {{{Qualitative and Quantitative Analysis of Callgraph Algorithms for PYTHON}}},
  doi          = {{10.1109/ICCQ51190.2021.9392986}},
  year         = {{2021}},
}

@techreport{23389,
  abstract     = {{Background - Software companies increasingly rely on static analysis tools to detect potential bugs and security vulnerabilities in their software products. In the past decade, more and more commercial and open-source static analysis tools have been developed and are maintained. Each tool comes with its own reporting format, preventing an easy integration of multiple analysis tools in a single interface, such as the Static Analysis Server Protocol (SASP). In 2017, a collaborative effort in industry, including Microsoft and GrammaTech, has proposed the Static Analysis Results Interchange Format (SARIF) to address this issue. SARIF is a standardized format in which static analysis warnings can be encoded, to allow the import and export of analysis reports between different tools.
Purpose - This paper explains the SARIF format through examples and presents a proof of concept of the connector that allows the static analysis tool CogniCrypt to generate and export its results in SARIF format.
Design/Approach - We conduct a cross-sectional study between the SARIF format and CogniCrypt's output format before detailing the implementation of the connector. The study aims to find the components of interest in CogniCrypt that the SARIF export module can complete.
Originality/Value - The integration of SARIF into CogniCrypt described in this paper can be reused to integrate SARIF into other static analysis tools.
Conclusion - After detailing the SARIF format, we present an initial implementation to integrate SARIF into CogniCrypt. After taking advantage of all the features provided by SARIF, CogniCrypt will be able to support SASP.}},
  author       = {{Kummita, Sriteja and Piskachev, Goran}},
  keywords     = {{Static Analysis, Static Analysis Results Interchange Format, SARIF, Static Analysis Server Protocol, SASP}},
  title        = {{{Integration of the Static Analysis Results Interchange Format in CogniCrypt}}},
  year         = {{2019}},
}

