@inbook{46516,
  abstract     = {{Linked knowledge graphs build the backbone of many data-driven applications such as search engines, conversational agents and e-commerce solutions. Declarative link discovery frameworks use complex link specifications to express the conditions under which a link between two resources can be deemed to exist. However, understanding such complex link specifications is a challenging task for non-expert users of link discovery frameworks. In this paper, we address this drawback by devising NMV-LS, a language model-based verbalization approach for translating complex link specifications into natural language. NMV-LS relies on the results of rule-based link specification verbalization to apply continuous training on T5, a large language model based on the Transformerarchitecture. We evaluated NMV-LS on English and German datasets using well-known machine translation metrics such as BLUE, METEOR, ChrF++ and TER. Our results suggest that our approach achieves a verbalization performance close to that of humans and outperforms state of the art approaches. Our source code and datasets are publicly available at https://github.com/dice-group/NMV-LS.}},
  author       = {{Ahmed, Abdullah Fathi Ahmed and Firmansyah, Asep Fajar and Sherif, Mohamed and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Explainable Integration of Knowledge Graphs Using Large Language Models}}},
  doi          = {{10.1007/978-3-031-35320-8_9}},
  year         = {{2023}},
}

@inbook{46572,
  abstract     = {{Indonesian is classified as underrepresented in the Natural Language Processing (NLP) field, despite being the tenth most spoken language in the world with 198 million speakers. The paucity of datasets is recognized as the main reason for the slow advancements in NLP research for underrepresented languages. Significant attempts were made in 2020 to address this drawback for Indonesian. The Indonesian Natural Language Understanding (IndoNLU) benchmark was introduced alongside IndoBERT pre-trained language model. The second benchmark, Indonesian Language Evaluation Montage (IndoLEM), was presented in the same year. These benchmarks support several tasks, including Named Entity Recognition (NER). However, all NER datasets are in the public domain and do not contain domain-specific datasets. To alleviate this drawback, we introduce IndQNER, a manually annotated NER benchmark dataset in the religious domain that adheres to a meticulously designed annotation guideline. Since Indonesia has the world’s largest Muslim population, we build the dataset from the Indonesian translation of the Quran. The dataset includes 2475 named entities representing 18 different classes. To assess the annotation quality of IndQNER, we perform experiments with BiLSTM and CRF-based NER, as well as IndoBERT fine-tuning. The results reveal that the first model outperforms the second model achieving 0.98 F1 points. This outcome indicates that IndQNER may be an acceptable evaluation metric for Indonesian NER tasks in the aforementioned domain, widening the research’s domain range.}},
  author       = {{Gusmita, Ria Hari and Firmansyah, Asep Fajar and Moussallem, Diego and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Natural Language Processing and Information Systems}},
  isbn         = {{9783031353192}},
  issn         = {{0302-9743}},
  keywords     = {{NER benchmark dataset, Indonesian, specific domain}},
  location     = {{Derby, UK}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{IndQNER: Named Entity Recognition Benchmark Dataset from the Indonesian Translation of the Quran}}},
  doi          = {{10.1007/978-3-031-35320-8_12}},
  year         = {{2023}},
}