Source code for eds4jinja2.adapters.local_sparql_ds

#!/usr/bin/python3

# remote_sparql_ds.py
# Date:  07/08/2020
# Author: Eugeniu Costetchi
# Email: costezki.eugen@gmail.com
from pathlib import Path

import pandas as pd
import rdflib

from eds4jinja2.adapters.base_data_source import DataSource, UnsupportedRepresentation
from eds4jinja2.adapters.substitution_template import SubstitutionTemplate

DEFAULT_ENCODING = 'utf-8'


[docs]class RDFFileDataSource(DataSource): """ Accesses a local RDF file and provides the possibility to fetch data from it by SPARQL queries. """ def __init__(self, filename): self.__can_be_tree = False self.__can_be_tabular = True self.__graph__ = rdflib.Graph() self.__query__ = "" self.__filename__ = filename def __reduce_bound_triple_to_string_format(self, dict_of_bound_variables: dict): return {str(k): str(v) for k, v in dict_of_bound_variables.items()}
[docs] def with_query(self, sparql_query: str, substitution_variables: dict = None, prefixes: str = "") -> 'RDFFileDataSource': """ Set the query text and return the reference to self for chaining. :return: """ if self.__query__ != "": raise Exception("The query was already set.") if substitution_variables: template = SubstitutionTemplate(sparql_query) self.__query__ = template.safe_substitute(substitution_variables) else: self.__query__ = sparql_query self.__query__ = (prefixes + " " + self.__query__).strip() return self
[docs] def with_query_from_file(self, sparql_query_file_path: str, substitution_variables: dict = None, prefixes: str = "") -> 'RDFFileDataSource': """ Set the query text and return the reference to self for chaining. :return: """ if self.__query__ != "": raise Exception("The query was already set.") with open(Path(sparql_query_file_path).resolve(), 'r') as file: self.__query__ = file.read() if substitution_variables: template = SubstitutionTemplate(self.__query__) self.__query__ = template.safe_substitute(substitution_variables) self.__query__ = (prefixes + " " + self.__query__).strip() return self
[docs] def with_file(self, file: str) -> 'RDFFileDataSource': """ Set the query text and return the reference to self for chaining. :return: """ self.__filename__ = file return self
[docs] def _fetch_tabular(self): if not self.__query__: raise Exception("The query is empty.") self.__graph__.parse(self.__filename__) result = self.__graph__.query(self.__query__) reduced_result_binding = [self.__reduce_bound_triple_to_string_format(t) for t in result.bindings] return pd.DataFrame(reduced_result_binding)
[docs] def _fetch_tree(self): raise UnsupportedRepresentation("Only TABULAR representation is supported")
[docs] def _can_be_tree(self) -> bool: return self.__can_be_tree
[docs] def _can_be_tabular(self) -> bool: return self.__can_be_tabular
def __str__(self): return f"from <...{str(self.__filename__)[-30:]}> {str(self.__query__)[:60]} ..."