@article{62973,
  abstract     = {{Large Language Models (LLMs) are increasingly being explored for their potential in software engineering, particularly in static analysis tasks. In this study, we investigate the potential of current LLMs to enhance call-graph analysis and type inference for Python and JavaScript programs. We empirically evaluated 24 LLMs, including OpenAI's GPT series and open-source models like LLaMA and Mistral, using existing and newly developed benchmarks. Specifically, we enhanced TypeEvalPy, a micro-benchmarking framework for type inference in Python, with auto-generation capabilities, expanding its scope from 860 to 77,268 type annotations for Python. Additionally, we introduced SWARM-CG and SWARM-JS, comprehensive benchmarking suites for evaluating call-graph construction tools across multiple programming languages.
 Our findings reveal a contrasting performance of LLMs in static analysis tasks. For call-graph generation, traditional static analysis tools such as PyCG for Python and Jelly for JavaScript consistently outperform LLMs. While advanced models like mistral-large-it-2407-123b and gpt-4o show promise, they still struggle with completeness and soundness in call-graph analysis across both languages. In contrast, LLMs demonstrate a clear advantage in type inference for Python, surpassing traditional tools like HeaderGen and hybrid approaches such as HiTyper. These results suggest that, while LLMs hold promise in type inference, their limitations in call-graph analysis highlight the need for further research. Our study provides a foundation for integrating LLMs into static analysis workflows, offering insights into their strengths and current limitations.}},
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Sunil, Rose and Sabu, Samkutty and Mir, Amir M. and Reis, Sofia and Bodden, Eric}},
  journal      = {{Empirical Software Engineering}},
  number       = {{6}},
  publisher    = {{Springer}},
  title        = {{{An Empirical Study of Large Language Models for Type and Call Graph Analysis in Python and JavaScript}}},
  doi          = {{10.48550/ARXIV.2410.00603}},
  volume       = {{30}},
  year         = {{2025}},
}

@inproceedings{53959,
  abstract     = {{In light of the growing interest in type inference research for Python, both researchers and practitioners require a standardized process to assess the performance of various type inference techniques. This paper introduces TypeEvalPy, a comprehensive micro-benchmarking framework for evaluating type inference tools. TypeEvalPy contains 154 code snippets with 845 type annotations across 18 categories that target various Python features. The framework manages the execution of containerized tools, transforms inferred types into a standardized format, and produces meaningful metrics for assessment. Through our analysis, we compare the performance of six type inference tools, highlighting their strengths and limitations. Our findings provide a foundation for further research and optimization in the domain of Python type inference.}},
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Sabu, Samkutty and Wang, Jiawei and Mir, Amir M. and Li, Li and Bodden, Eric}},
  booktitle    = {{Proceedings of the 2024 IEEE/ACM 46th International Conference on Software Engineering: Companion Proceedings}},
  isbn         = {{9798400705021}},
  location     = {{Lisbon, Portugal}},
  pages        = {{49--53}},
  publisher    = {{Association for Computing Machinery}},
  title        = {{{TypeEvalPy: A Micro-benchmarking Framework for Python Type Inference  Tools}}},
  doi          = {{10.1145/3639478.3640033}},
  year         = {{2024}},
}

@inproceedings{55516,
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Sabu, Samkutty and Mir, Amir M. and Reis, Sofia and Bodden, Eric}},
  booktitle    = {{Proceedings of the 2024 IEEE/ACM First International Conference on AI Foundation Models and Software Engineering}},
  publisher    = {{ACM}},
  title        = {{{The Emergence of Large Language Models in Static Analysis: A First Look through Micro-Benchmarks}}},
  doi          = {{10.1145/3650105.3652288}},
  year         = {{2024}},
}

@inproceedings{41813,
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Wang, Jiawei and Li, Li and Bodden, Eric}},
  booktitle    = {{IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)}},
  title        = {{{Enhancing Comprehension and Navigation in Jupyter Notebooks with Static Analysis}}},
  year         = {{2023}},
}

@inproceedings{36522,
  abstract     = {{Jupyter notebooks enable developers to interleave code snippets with rich-text and in-line visualizations. Data scientists use Jupyter notebook as the de-facto standard for creating and sharing machine-learning based solutions, primarily written in Python. Recent studies have demonstrated, however, that a large portion of Jupyter notebooks available on public platforms are undocumented and lacks a narrative structure. This reduces the readability of these notebooks. To address this shortcoming, this paper presents HeaderGen, a novel tool-based approach that automatically annotates code cells with categorical markdown headers based on a taxonomy of machine-learning operations, and classifies and displays function calls according to this taxonomy. For this functionality to be realized, HeaderGen enhances an existing call graph analysis in PyCG. To improve precision, HeaderGen extends PyCG's analysis with support for handling external library code and flow-sensitivity. The former is realized by facilitating the resolution of function return-types. Furthermore, HeaderGen uses type information to perform pattern matching on code syntax to annotate code cells.
The evaluation on 15 real-world Jupyter notebooks from Kaggle shows that HeaderGen's underlying call graph analysis yields high accuracy (96.4% precision and 95.9% recall). This is because HeaderGen can resolve return-types of external libraries where existing type inference tools such as pytype (by Google), pyright (by Microsoft), and Jedi fall short. The header generation has a precision of 82.2% and a recall rate of 96.8% with regard to headers created manually by experts. In a user study, HeaderGen helps participants finish comprehension and navigation tasks faster. All participants clearly perceive HeaderGen as useful to their task.}},
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Wang, Jiawei and Li, Li and Bodden, Eric}},
  keywords     = {{static analysis, python, code comprehension, annotation, literate programming, jupyter notebook}},
  publisher    = {{IEEE SANER 2023 (International Conference on Software Analysis, Evolution and Reengineering)}},
  title        = {{{Enhancing Comprehension and Navigation in Jupyter Notebooks with Static Analysis}}},
  doi          = {{10.48550/ARXIV.2301.04419}},
  year         = {{2023}},
}

@inproceedings{22462,
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Bodden, Eric}},
  booktitle    = {{International Workshop on AI and Software Testing/Analysis (AISTA)}},
  title        = {{{Automated Cell Header Generator for Jupyter Notebooks}}},
  doi          = {{10.1145/3464968.3468410}},
  year         = {{2021}},
}

@inproceedings{16726,
  author       = {{Razzaghi Kouchaksaraei, Hadi and Shivarpatna Venkatesh, Ashwin Prasad and Churi, Amey and Illian, Marvin and Karl, Holger}},
  booktitle    = {{European Conference on Networks and Communications (EUCNC 2020)}},
  title        = {{{Dynamic Provisioning of Network Services on Heterogeneous Resources}}},
  year         = {{2020}},
}

@unpublished{20341,
  abstract     = {{When implementing secure software, developers must ensure certain
requirements, such as the erasure of secret data after its use and execution in
real time. Such requirements are not explicitly captured by the C language and
could potentially be violated by compiler optimizations. As a result,
developers typically use indirect methods to hide their code's semantics from
the compiler and avoid unwanted optimizations. However, such workarounds are
not permanent solutions, as increasingly efficient compiler optimization causes
code that was considered secure in the past now vulnerable. This paper is a
literature review of (1) the security complications caused by compiler
optimizations, (2) approaches used by developers to mitigate optimization
problems, and (3) recent academic efforts towards enabling security engineers
to communicate implicit security requirements to the compiler. In addition, we
present a short study of six cryptographic libraries and how they approach the
issue of ensuring security requirements. With this paper, we highlight the need
for software developers and compiler designers to work together in order to
design efficient systems for writing secure software.}},
  author       = {{Shivarpatna Venkatesh, Ashwin Prasad and Handadi, A. Bhat and Mory, Martin}},
  booktitle    = {{arXiv:1907.02530}},
  title        = {{{Security Implications Of Compiler Optimizations On Cryptography -- A  Review}}},
  year         = {{2019}},
}

