@inproceedings{29030,
  abstract     = {{Geospatial data is at the core of the Semantic Web, of which the largest knowledge base contains more than 30 billions facts. Reasoning on these large amounts of geospatial data requires efficient methods for the computation of links between the resources contained in these knowledge bases. In this paper, we present RADON - efficient solution for the discovery of topological relations between geospatial resources according to the DE9-IM standard. Our evaluation shows that we outperform the state of the art significantly and by several orders of magnitude.}},
  author       = {{Sherif, Mohamed and Dreßler, Kevin and Smeros, Panayiotis and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)}},
  keywords     = {{radon sherif limes projecthobbit hobbit geiser group\_aksw SIMBA DICE sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:leds leds ngonga bioasq kevin}},
  title        = {{{RADON - Rapid Discovery of Topological Relations}}},
  year         = {{2017}},
}

@inproceedings{29024,
  abstract     = {{A significant portion of the evolution of Linked Data datasets lies in updating the links to other datasets. An important challenge when aiming to update these links automatically under the open-world assumption is the fact that usually only positive examples for the links exist. We address this challenge by presenting and evaluating WOMBAT , a novel approach for the discovery of links between knowledge bases that relies exclusively on positive examples. WOMBAT is based on generalisation via an upward refinement operator to traverse the space of link specification. We study the theoretical characteristics of WOMBAT and evaluate it on 8 different benchmark datasets. Our evaluation suggests that WOMBAT outperforms state-of-the-art supervised approaches while relying on less information. Moreover, our evaluation suggests that WOMBAT ’s pruning algorithm allows it to scale well even on large datasets.}},
  author       = {{Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille and Lehmann, Jens}},
  booktitle    = {{14th Extended Semantic Web Conference, Portorož, Slovenia, 28th May - 1st June 2017}},
  keywords     = {{2017 group\_aksw sys:relevantFor:geoknow sys:relevantFor:infai sys:relevantFor:bis ngonga simba dice sherif group\_aksw geoknow wombat lehmann MOLE}},
  publisher    = {{Springer}},
  title        = {{{WOMBAT - A Generalization Approach for Automatic Link Discovery}}},
  year         = {{2017}},
}

@phdthesis{29027,
  abstract     = {{Over the last years, the Linked Open Data (LOD) has evolved from a mere 12 to more than 10, 000 knowledge bases. These knowledge bases come from diverse domains including (but not limited to) publications, life sciences, social networking, government, media, linguistics. Moreover, the LOD cloud also contains a large number of crossdomain knowledge bases such as DBpedia and Yago2. These knowledge bases are commonly managed in a decentralized fashion and contain partly overlapping information. This architectural choice has led to knowledge pertaining to the same domain being published by independent entities in the LOD cloud. For example, information on drugs can be found in Diseasome as well as DBpedia and Drugbank. Furthermore, certain knowledge bases such as DBLP have been published by several bodies, which in turn has lead to duplicated content in the LOD. In addition, large amounts of geo-spatial information have been made available with the growth of heterogeneous Web of Data. The concurrent publication of knowledge bases containing related information promises to become a phenomenon of increasing importance with the growth of the number of independent data providers. Enabling the joint use of the knowledge bases published by these providers for tasks such as federated queries, cross-ontology question answering and data integration is most commonly tackled by creating links between the resources described within these knowledge bases. Within this thesis, we spur the transition from isolated knowledge bases to enriched Linked Data sets where information can be easily integrated and processed. To achieve this goal, we provide concepts, approaches and use cases that facilitate the integration and enrichment of information with other data types that are already present on the Linked Data Web with a focus on geo-spatial data. The first challenge that motivates our work is the lack of measures that use the geographic data for linking geo-spatial knowledge bases. This is partly due to the geo-spatial resources being described by the means of vector geometry. In particular, discrepancies in granularity and error measurements across knowledge bases render the selection of appropriate distance measures for geo-spatial resources difficult. We address this challenge by evaluating existing literature for pointset measures that can be used to measure the similarity of vector geometries. Then, we present and evaluate the ten measures that we derived from the literature on samples of three real knowledge bases. The second challenge we address in this thesis is the lack of automatic Link Discovery (LD) approaches capable of dealing with geospatial knowledge bases with missing and erroneous data. To this end,we present Colibri, an unsupervised approach that allows discovering links between knowledge bases while improving the quality of the instance data in these knowledge bases. A Colibri iteration begins by generating links between knowledge bases. Then, the approach makes use of these links to detect resources with probably erroneous or missing information. This erroneous or missing infor- mation detected by the approach is finally corrected or added. The third challenge we address is the lack of scalable LD approaches for tackling big geo-spatial knowledge bases. Thus, we present Deterministic Particle-Swarm Optimization (DPSO), a novel load balancing technique for LD on parallel hardware based on particle-swarm optimization. We combine this approach with the Orchid algorithm for geo-spatial linking and evaluate it on real and artificial data sets. The lack of approaches for automatic updating of links of an evolving knowledge base is our fourth challenge. This challenge is addressed in this thesis by the Wombat algorithm. Wombat is a novel approach for the discovery of links between knowledge bases that relies exclusively on positive examples. Wombat is based on generalisation via an upward refinement operator to traverse the space of Link Specifications (LS). We study the theoretical characteristics of Wombat and evaluate it on different benchmark data sets. The last challenge addressed herein is the lack of automatic approaches for geo-spatial knowledge base enrichment. Thus, we propose Deer, a supervised learning approach based on a refinement operator for enriching Resource Description Framework (RDF) data sets. We show how we can use exemplary descriptions of enriched resources to generate accurate enrichment pipelines. We evaluate our approach against manually defined enrichment pipelines and show that our approach can learn accurate pipelines even when provided with a small number of training examples. Each of the proposed approaches is implemented and evaluated against state-of-the-art approaches on real and/or artificial data sets. Moreover, all approaches are peer-reviewed and published in a con- ference or a journal paper. Throughout this thesis, we detail the ideas, implementation and the evaluation of each of the approaches. Moreover, we discuss each approach and present lessons learned. Finally, we conclude this thesis by presenting a set of possible future extensions and use cases for each of the proposed approaches.}},
  author       = {{Sherif, Mohamed}},
  keywords     = {{2016 group\_aksw sys:relevantFor:geoknow sys:relevantFor:infai sys:relevantFor:bis ngonga simba dice sherif group\_aksw geoknow deer lehmann MOLE}},
  publisher    = {{University of Leipzig}},
  title        = {{{Automating Geospatial RDF Dataset Integration and Enrichment}}},
  year         = {{2016}},
}

@inproceedings{29031,
  author       = {{Sherif, Mohamed and Hassan, Mofeed and Soru, Tommaso and Ngonga Ngomo, Axel-Cyrille and Lehmann, Jens}},
  booktitle    = {{Proceedings of Ontology Matching Workshop}},
  keywords     = {{sherif hassan soru lehmann ngonga geoknow group\_aksw SIMBA DICE sys:relevantFor:infai sys:relevantFor:bis limes}},
  title        = {{{Lion's Den: Feeding the LinkLion}}},
  year         = {{2016}},
}

@inproceedings{29016,
  author       = {{Georgala, Kleanthi and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{Proceedings of the 22nd European Conference on Artificial Intelligence (ECAI) 2016, The Hague, 29. August - 02. September 2016}},
  keywords     = {{sys:relevantFor:infai group\_aksw simba georgala sherif ngonga sake projecthobbit limes dice}},
  title        = {{{An Efficient Approach for the Generation of Allen Relations}}},
  year         = {{2016}},
}

@inproceedings{29026,
  abstract     = {{With the adoption of RDF across several domains, come growing requirements pertaining to the completeness and quality of RDF datasets. Currently, this problem is most commonly addressed by manually devising means of enriching an input dataset. The few tools that aim at supporting this endeavour usually focus on supporting the manual definition of enrichment pipelines. In this paper, we present a supervised learning approach based on a refinement operator for enriching RDF datasets. We show how we can use exemplary descriptions of enriched resources to generate accurate enrichment pipelines. We evaluate our approach against eight manually defined enrichment pipelines and show that our approach can learn accurate pipelines even when provided with a small number of training examples.}},
  author       = {{Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille and Lehmann, Jens}},
  booktitle    = {{12th Extended Semantic Web Conference, Portorož, Slovenia, 31st May - 4th June 2015}},
  keywords     = {{2015 group\_aksw sys:relevantFor:geoknow sys:relevantFor:infai sys:relevantFor:bis ngonga simba dice sherif group\_aksw geoknow deer lehmann MOLE}},
  publisher    = {{Springer}},
  title        = {{{Automating RDF Dataset Transformation and Enrichment}}},
  year         = {{2015}},
}

@inproceedings{29035,
  abstract     = {{The combination of the advantages of widely used relational databases and semantic technologies has attracted significant research over the past decade. In particular, mapping languages for the conversion of databases to RDF knowledge bases have been developed and standardized in the form of R2RML. In this article, we first review those mapping languages and then devise work towards a unified formal model for them. Based on this, we present the Sparqlification Mapping Language (SML), which provides an intuitive way to declare mappings based on SQL VIEWS and SPARQL construct queries. We show that SML has the same expressivity as R2RML by enumerating the language features and show the correspondences, and we outline how one syntax can be converted into the other. A conducted user study for this paper juxtaposing SML and R2RML provides evidence that SML is a more compact syntax which is easier to understand and read and thus lowers the barrier to offer SPARQL access to relational databases.}},
  author       = {{Stadler, Claus and Unbehauen, Joerg and Westphal, Patrick and Sherif, Mohamed and Lehmann, Jens}},
  booktitle    = {{Proceedings of the 8th Workshop on Linked Data on the Web (LDOW2015), Florence, Italy}},
  keywords     = {{2015 group\_aksw group\_mole mole stadler lehmann sherif simba dice sys:relevantFor:geoknow geoknow peer-reviewed MOLE westphal}},
  title        = {{{Simplified RDB2RDF Mapping}}},
  year         = {{2015}},
}

@inproceedings{29033,
  abstract     = {{Many of the available RDF datasets describe millions of resources by using billions of triples. Consequently, millions of links can potentially exist among such datasets. While parallel implementations of link discovery approaches have been developed in the past, load balancing approaches for local implementations of link discovery algorithms have been paid little attention to. In this paper, we thus present a novel load balancing technique for link discovery on parallel hardware based on particle-swarm optimization. We combine this approach with the Orchid algorithm for geo-spatial linking and evaluate it on real and artificial datasets. Our evaluation suggests that while naïve approaches can be super-linear on small data sets, our deterministic particle swarm optimization outperforms both naïve and classical load balancing approaches such as greedy load balancing on large datasets.}},
  author       = {{Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  booktitle    = {{SEMANTiCS 2015}},
  keywords     = {{2015 sys:relevantFor:geoknow geoknow ngonga sherif simba dice group\_aksw sys:relevantFor:infai sys:relevantFor:bis SIMBA limes}},
  title        = {{{An Optimization Approach for Load Balancing in Parallel Link Discovery}}},
  year         = {{2015}},
}

@inbook{29020,
  author       = {{Lehmann, Jens and Athanasiou, Spiros and Both, Andreas and Garcia-Rojas, Alejandra and Giannopoulos, Giorgos and Hladky, Daniel and Hoeffner, Konrad and Jay Le Grange, Jon and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Stadler, Claus and Wauer, Matthias and Westphal, Patrick and Zaslawski, Vadim}},
  booktitle    = {{The Semantic Web in Earth and Space Science. Current Status and Future Directions}},
  keywords     = {{2015 group\_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:geoknow lehmann ngonga MOLE sherif simba dice hoeffner geoknow wauer westphal}},
  pages        = {{51–78}},
  publisher    = {{IOS Press}},
  title        = {{{Managing Geospatial Linked Data in the GeoKnow Project}}},
  doi          = {{ 10.3233/978-1-61499-501-2-51}},
  volume       = {{ Volume 20}},
  year         = {{2015}},
}

@techreport{29019,
  author       = {{Lehmann, Jens and Athanasiou, Spiros and Both, Andreas and Buehmann, Lorenz and Garcia-Rojas, Alejandra and Giannopoulos, Giorgos and Hladky, Daniel and Hoeffner, Konrad and Jay Le Grange, Jon and Ngonga Ngomo, Axel-Cyrille and Pietzsch, Rene and Isele, Robert and Sherif, Mohamed and Stadler, Claus and Wauer, Matthias and Westphal, Patrick}},
  keywords     = {{2015 group\_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:geoknow lehmann ngonga MOLE sherif simba dice hoeffner geoknow westphal buehmann}},
  title        = {{{The GeoKnow Handbook}}},
  year         = {{2015}},
}

@inproceedings{29023,
  abstract     = {{It is widely accepted that food supply and quality are major problems in the 21st century. Due to the growth of the world's population, there is a pressing need to improve the productivity of agricultural crops, which hinges on different factors such as geographical location, soil type, weather condition and particular attributes of the crops to plant. In many regions of the world, information about those factors is not readily accessible and dispersed across a multitude of different sources. One of those regions is Nepal, in which the lack of access to this knowledge poses a significant burden for agricultural planning and decision making. Making such knowledge more accessible can boot up a farmer's living standard and increase their competitiveness on national and global markets. In this article, we show how we converted several available, although not easily accessible, datasets to RDF, thereby lowering the barrier for data re-usage and integration. We describe the conversion, linking, and publication process as well as use cases, which can be implemented using the farming datasets in Nepal.}},
  author       = {{Pokharel, Suresh and Sherif, Mohamed and Lehmann, Jens}},
  booktitle    = {{Proc. of the International Conference on Web Intelligence}},
  keywords     = {{group\_aksw MOLE 2014 sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:geoknow topic\_geospatial lehmann sherif simba dice}},
  title        = {{{Ontology Based Data Access and Integration for Improving the Effectiveness of Farming in Nepal}}},
  year         = {{2014}},
}

@inproceedings{29028,
  abstract     = {{In the last couple of years the amount of structured open government data has increased significantly. Already now, citizens are able to leverage the advantages of open data through increased transparency and better opportunities to take part in governmental decision making processes. Our approach increases the interoperability of existing but distributed open governmental datasets by converting them to the RDF-based NLP Interchange Format (NIF). Furthermore, we integrate the converted data into a geodata store and present a user interface for querying this data via a keyword-based search. The language resource generated in this project is publicly available for download and via a dedicated SPARQL endpoint.}},
  author       = {{Sherif, Mohamed and Coelho, Sandro and Usbeck, Ricardo and Hellmann, Sebastian and Lehmann, Jens and Brümmer, Martin and Both, Andreas}},
  booktitle    = {{The 9th edition of the Language Resources and Evaluation Conference, 26-31 May, Reykjavik, Iceland}},
  keywords     = {{2014 dice simba sherif sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:geoknow hellmann kilt lehmann usbeck bruemmer nif4oggd group\_aksw kilt Lidmole MOLE}},
  title        = {{{NIF4OGGD - NLP Interchange Format for Open German Governmental Data}}},
  year         = {{2014}},
}

@inproceedings{29022,
  abstract     = {{The Linked Data Web has developed into a compendium of partly very large datasets. Devising efficient approaches to compute links between these datasets is thus central to achieve the vision behind the Data Web. Several unsupervised approaches have been developed to achieve this goal. Yet, so far, none of these approaches makes use of the replication of resources across several knowledge bases to improve the accuracy it achieves while linking. In this paper, we present Colibri, an iterative unsupervised approach for link discovery. Colibri allows discovering links between n datasets (n ≥ 2) while improving the quality of the instance data in these datasets. To this end, Colibri combines error detection and correction with unsupervised link discovery. We evaluate our approach on benchmark datasets with respect to the F-score itachieves. Our results suggest that Colibri can significantly improve the results of unsupervised machine-learning approaches for link discovery while correctly detecting erroneous resources.}},
  author       = {{Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Lyko, Klaus}},
  booktitle    = {{Extended Semantic Web Conference (ESWC 2014)}},
  keywords     = {{ngonga sherif lyko simba group\_aksw sys:relevantFor:infai sys:relevantFor:bis SIMBA DICE limes}},
  title        = {{{Unsupervised Link Discovery Through Knowledge Base Repair}}},
  year         = {{2014}},
}

@article{29034,
  abstract     = {{In this paper we describe the Semantic Quran dataset, a multilingual RDF representation of translations of the Quran. The dataset was created by integrating data from two different semi-structured sources and aligned to an ontology designed to represent multilingual data from sources with a hierarchical structure. The resulting RDF data encompasses 43 different languages which belong to the most under-represented languages in the Linked Data Cloud, including Arabic, Amharic and Amazigh. We designed the dataset to be easily usable in natural-language processing applications with the goal of facilitating the development of knowledge extraction tools for these languages. In particular, the Semantic Quran is compatible with the Natural-Language Interchange Format and contains explicit morpho-syntactic information on the utilized terms. We present the ontology devised for structuring the data. We also provide the transformation rules implemented in our extraction framework. Finally, we detail the link creation process as well as possible usage scenarios for the Semantic Quran dataset.}},
  author       = {{Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}},
  journal      = {{Semantic Web Journal}},
  keywords     = {{group\_aksw SIMBA sys:relevantFor:infai sys:relevantFor:bis ngonga limes simba dice sherif 2014 limes semanticquran}},
  pages        = {{1–5}},
  title        = {{{Semantic Quran: A Multilingual Resource for Natural-Language Processing}}},
  volume       = {{XXX}},
  year         = {{2014}},
}

@inproceedings{29017,
  author       = {{Jay Le Grange, Jon and Lehmann, Jens and Athanasiou, Spiros and Garcia Rojas, Alejandra and Giannopoulos, Giorgos and Hladky, Daniel and Isele, Robert and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Stadler, Claus and Wauer, Matthias}},
  booktitle    = {{Proceedings of the Linking Geospatial Data Workshop}},
  keywords     = {{2014 group\_aksw group\_mole mole ngonga lehmann sherif topic\_Lifecycle sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 sys:relevantFor:geoknow geoknow lod lod2page peer-reviewed MOLE simba dice wauer stadler}},
  title        = {{{The GeoKnow Generator: Managing Geospatial Data in the Linked Data Web}}},
  year         = {{2014}},
}

@article{29036,
  abstract     = {{The improvement of public health is one of the main indicators for societal progress. Statistical data for monitoring public health is highly relevant for a number of sectors, such as research (e.g. in the life sciences or economy), policy making, health care, pharmaceutical industry, insurances etc. Such data is meanwhile available even on a global scale, e.g. in the Global Health Observatory (GHO) of the United Nations's World Health Organization (WHO). GHO comprises more than 50 different datasets, it covers all 198 WHO member countries and is updated as more recent or revised data becomes available or when there are changes to the methodology being used. However, this data is only accessible via complex spreadsheets and, therefore, queries over the 50 different datasets as well as combinations with other datasets are very tedious and require a significant amount of manual work. By making the data available as RDF, we lower the barrier for data re-use and integration. In this article, we describe the conversion and publication process as well as use cases, which can be implemented using the GHO data.}},
  author       = {{Zaveri, Amrapali and Lehmann, Jens and Auer, Sören and M. Hassan, Mofeed and Sherif, Mohamed and Martin, Michael}},
  journal      = {{Semantic Web Journal}},
  keywords     = {{2013 MOLE group\_aksw zaveri martin lehmann auer hassan sherif simba dice sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page peer-reviewed gho}},
  number       = {{3}},
  pages        = {{315–322}},
  title        = {{{Publishing and Interlinking the Global Health Observatory Dataset}}},
  volume       = {{Special Call for Linked Dataset descriptions}},
  year         = {{2013}},
}

