@inproceedings{52827,
  author       = {{Hu, Lijie and Habernal, Ivan and Shen, Lei and Wang, Di}},
  booktitle    = {{Findings of the Association for Computational Linguistics: EACL 2024, St. Julian’s, Malta, March 17-22, 2024}},
  editor       = {{Graham, Yvette and Purver, Matthew}},
  pages        = {{478–499}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Differentially Private Natural Language Models: Recent Advances and Future Directions}}},
  year         = {{2024}},
}

@inproceedings{52842,
  abstract     = {{Neural machine translation (NMT) is a widely popular text generation task, yet there is a considerable research gap in the development of privacy-preserving NMT models, despite significant data privacy concerns for NMT systems. Differentially private stochastic gradient descent (DP-SGD) is a popular method for training machine learning models with concrete privacy guarantees; however, the implementation specifics of training a model with DP-SGD are not always clarified in existing models, with differing software libraries used and code bases not always being public, leading to reproducibility issues. To tackle this, we introduce DP-NMT, an open-source framework for carrying out research on privacy-preserving NMT with DP-SGD, bringing together numerous models, datasets, and evaluation metrics in one systematic software package. Our goal is to provide a platform for researchers to advance the development of privacy-preserving NMT systems, keeping the specific details of the DP-SGD algorithm transparent and intuitive to implement. We run a set of experiments on datasets from both general and privacy-related domains to demonstrate our framework in use. We make our framework publicly available and welcome feedback from the community.}},
  author       = {{Igamberdiev, Timour and Vu, Doan Nam Long and Kuennecke, Felix and Yu, Zhuo and Holmer, Jannik and Habernal, Ivan}},
  booktitle    = {{Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations}},
  editor       = {{Aletras, Nikolaos and De Clercq, Orphee}},
  pages        = {{94–105}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{DP-NMT: Scalable Differentially Private Machine Translation}}},
  year         = {{2024}},
}

@inproceedings{48289,
  author       = {{Habernal, Ivan and Mireshghallah, Fatemehsadat and Thaine, Patricia and Ghanavati, Sepideh and Feyisetan, Oluwaseyi}},
  booktitle    = {{Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Privacy-Preserving Natural Language Processing}}},
  doi          = {{10.18653/v1/2023.eacl-tutorials.6}},
  year         = {{2023}},
}

@inproceedings{48288,
  author       = {{Matzken, Cleo and Eger, Steffen and Habernal, Ivan}},
  booktitle    = {{Findings of the Association for Computational Linguistics: ACL 2023}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Trade-Offs Between Fairness and Privacy in Language Modeling}}},
  doi          = {{10.18653/v1/2023.findings-acl.434}},
  year         = {{2023}},
}

@inproceedings{48291,
  author       = {{Mouhammad, Nina and Daxenberger, Johannes and Schiller, Benjamin and Habernal, Ivan}},
  booktitle    = {{Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Crowdsourcing on Sensitive Data with Privacy-Preserving Text Rewriting}}},
  doi          = {{10.18653/v1/2023.law-1.8}},
  year         = {{2023}},
}

@inproceedings{48296,
  author       = {{Yin, Ying and Habernal, Ivan}},
  booktitle    = {{Proceedings of the Natural Legal Language Processing Workshop 2022}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Privacy-Preserving Models for Legal Natural Language Processing}}},
  doi          = {{10.18653/v1/2022.nllp-1.14}},
  year         = {{2023}},
}

@article{48294,
  abstract     = {{<jats:p>Clinical NLP tasks such as mental health assessment from text, must take social constraints into account - the performance maximization must be constrained by the utmost importance of guaranteeing privacy of user data. Consumer protection regulations, such as GDPR, generally handle privacy by restricting data availability, such as requiring to limit user data to 'what is necessary' for a given purpose. In this work, we reason that providing stricter formal privacy guarantees, while increasing the volume of user data in the model, in most cases increases benefit for all parties involved, especially for the user. We demonstrate our arguments on two existing suicide risk assessment datasets of Twitter and Reddit posts. We present the first analysis juxtaposing user history length and differential privacy budgets and elaborate how modeling additional user context enables utility preservation while maintaining acceptable user privacy guarantees.</jats:p>}},
  author       = {{Sawhney, Ramit and Neerkaje, Atula and Habernal, Ivan and Flek, Lucie}},
  issn         = {{2334-0770}},
  journal      = {{Proceedings of the International AAAI Conference on Web and Social Media}},
  pages        = {{766--776}},
  publisher    = {{Association for the Advancement of Artificial Intelligence (AAAI)}},
  title        = {{{How Much User Context Do We Need? Privacy by Design in Mental Health NLP Applications}}},
  doi          = {{10.1609/icwsm.v17i1.22186}},
  volume       = {{17}},
  year         = {{2023}},
}

@inproceedings{48297,
  author       = {{Senge, Manuel and Igamberdiev, Timour and Habernal, Ivan}},
  booktitle    = {{Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{One size does not fit all: Investigating strategies for differentially-private learning across NLP tasks}}},
  doi          = {{10.18653/v1/2022.emnlp-main.496}},
  year         = {{2023}},
}

@inproceedings{48292,
  author       = {{Igamberdiev, Timour and Habernal, Ivan}},
  booktitle    = {{Findings of the Association for Computational Linguistics: ACL 2023}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{DP-BART for Privatized Text Rewriting under Local Differential Privacy}}},
  doi          = {{10.18653/v1/2023.findings-acl.874}},
  year         = {{2023}},
}

@inproceedings{48295,
  author       = {{Bongard, Leonard and Held, Lena and Habernal, Ivan}},
  booktitle    = {{Proceedings of the Natural Legal Language Processing Workshop 2022}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{The Legal Argument Reasoning Task in Civil Procedure}}},
  doi          = {{10.18653/v1/2022.nllp-1.17}},
  year         = {{2023}},
}

@article{48290,
  abstract     = {{<jats:title>Abstract</jats:title><jats:p>Identifying, classifying, and analyzing arguments in legal discourse has been a prominent area of research since the inception of the argument mining field. However, there has been a major discrepancy between the way natural language processing (NLP) researchers model and annotate arguments in court decisions and the way legal experts understand and analyze legal argumentation. While computational approaches typically simplify arguments into generic premises and claims, arguments in legal research usually exhibit a rich typology that is important for gaining insights into the particular case and applications of law in general. We address this problem and make several substantial contributions to move the field forward. First, we design a new annotation scheme for legal arguments in proceedings of the European Court of Human Rights (ECHR) that is deeply rooted in the theory and practice of legal argumentation research. Second, we compile and annotate a large corpus of 373 court decisions (2.3M tokens and 15k annotated argument spans). Finally, we train an argument mining model that outperforms state-of-the-art models in the legal NLP domain and provide a thorough expert-based evaluation. All datasets and source codes are available under open lincenses at <jats:ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="https://github.com/trusthlt/mining-legal-arguments">https://github.com/trusthlt/mining-legal-arguments</jats:ext-link>.</jats:p>}},
  author       = {{Habernal, Ivan and Faber, Daniel and Recchia, Nicola and Bretthauer, Sebastian and Gurevych, Iryna and Spiecker genannt Döhmann, Indra and Burchard, Christoph}},
  issn         = {{0924-8463}},
  journal      = {{Artificial Intelligence and Law}},
  keywords     = {{Law, Artificial Intelligence}},
  publisher    = {{Springer Science and Business Media LLC}},
  title        = {{{Mining legal arguments in court decisions}}},
  doi          = {{10.1007/s10506-023-09361-y}},
  year         = {{2023}},
}

@unpublished{49649,
  author       = {{Igamberdiev, Timour and Vu, Doan Nam Long and Künnecke, Felix and Yu, Zhuo and Holmer, Jannik and Habernal, Ivan}},
  title        = {{{DP-NMT: Scalable Differentially-Private Machine Translation}}},
  year         = {{2023}},
}

@unpublished{49650,
  author       = {{Held, Lena and Habernal, Ivan}},
  title        = {{{LaCour!: Enabling Research on Argumentation in Hearings of the European Court of Human Rights}}},
  year         = {{2023}},
}

@inproceedings{48299,
  abstract     = {{Graph convolutional networks (GCNs) are a powerful architecture for representation learning on documents that naturally occur as graphs, e.g., citation or social networks. However, sensitive personal information, such as documents with people{’}s profiles or relationships as edges, are prone to privacy leaks, as the trained model might reveal the original input. Although differential privacy (DP) offers a well-founded privacy-preserving framework, GCNs pose theoretical and practical challenges due to their training specifics. We address these challenges by adapting differentially-private gradient-based training to GCNs and conduct experiments using two optimizers on five NLP datasets in two languages. We propose a simple yet efficient method based on random graph splits that not only improves the baseline privacy bounds by a factor of 2.7 while retaining competitive F1 scores, but also provides strong privacy guarantees of epsilon = 1.0. We show that, under certain modeling choices, privacy-preserving GCNs perform up to 90{%} of their non-private variants, while formally guaranteeing strong privacy measures.}},
  author       = {{Igamberdiev, Timour and Habernal, Ivan}},
  booktitle    = {{Proceedings of the Thirteenth Language Resources and Evaluation Conference}},
  pages        = {{338–350}},
  publisher    = {{European Language Resources Association}},
  title        = {{{Privacy-Preserving Graph Convolutional Networks for Text Classification}}},
  year         = {{2022}},
}

@inproceedings{48300,
  abstract     = {{Text rewriting with differential privacy (DP) provides concrete theoretical guarantees for protecting the privacy of individuals in textual documents. In practice, existing systems may lack the means to validate their privacy-preserving claims, leading to problems of transparency and reproducibility. We introduce DP-Rewrite, an open-source framework for differentially private text rewriting which aims to solve these problems by being modular, extensible, and highly customizable. Our system incorporates a variety of downstream datasets, models, pre-training procedures, and evaluation metrics to provide a flexible way to lead and validate private text rewriting research. To demonstrate our software in practice, we provide a set of experiments as a case study on the ADePT DP text rewriting system, detecting a privacy leak in its pre-training approach. Our system is publicly available, and we hope that it will help the community to make DP text rewriting research more accessible and transparent.}},
  author       = {{Igamberdiev, Timour and Arnold, Thomas and Habernal, Ivan}},
  booktitle    = {{Proceedings of the 29th International Conference on Computational Linguistics}},
  pages        = {{2927–2933}},
  publisher    = {{International Committee on Computational Linguistics}},
  title        = {{{DP-Rewrite: Towards Reproducibility and Transparency in Differentially Private Text Rewriting}}},
  year         = {{2022}},
}

@inproceedings{48298,
  author       = {{Habernal, Ivan}},
  booktitle    = {{Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{How reparametrization trick broke differentially-private text representation learning}}},
  doi          = {{10.18653/v1/2022.acl-short.87}},
  year         = {{2022}},
}

@inproceedings{48286,
  author       = {{Habernal, Ivan}},
  booktitle    = {{Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{When differential privacy meets NLP: The devil is in the detail}}},
  doi          = {{10.18653/v1/2021.emnlp-main.114}},
  year         = {{2021}},
}

@inproceedings{48301,
  author       = {{Glockner, Max and Habernal, Ivan and Gurevych, Iryna}},
  booktitle    = {{Findings of the Association for Computational Linguistics: EMNLP 2020}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Why do you think that? Exploring Faithful Sentence-Level Rationales Without Supervision}}},
  doi          = {{10.18653/v1/2020.findings-emnlp.97}},
  year         = {{2020}},
}

@inproceedings{48303,
  author       = {{Habernal, Ivan and Wachsmuth, Henning and Gurevych, Iryna and Stein, Benno}},
  booktitle    = {{Proceedings of the 2018 Conference of the North American Chapter of          the Association for Computational Linguistics: Human Language          Technologies, Volume 1 (Long Papers)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{The Argument Reasoning Comprehension Task: Identification and            Reconstruction of Implicit Warrants}}},
  doi          = {{10.18653/v1/n18-1175}},
  year         = {{2018}},
}

@inproceedings{48302,
  author       = {{Habernal, Ivan and Wachsmuth, Henning and Gurevych, Iryna and Stein, Benno}},
  booktitle    = {{Proceedings of the 2018 Conference of the North American Chapter of          the Association for Computational Linguistics: Human Language          Technologies, Volume 1 (Long Papers)}},
  publisher    = {{Association for Computational Linguistics}},
  title        = {{{Before Name-Calling: Dynamics and Triggers of Ad Hominem Fallacies            in Web Argumentation}}},
  doi          = {{10.18653/v1/n18-1036}},
  year         = {{2018}},
}