relpipe-data/examples/relpipe-in-sparql.sh
author František Kučera <franta-hg@frantovo.cz>
Fri, 26 Nov 2021 22:14:18 +0100
branchv_0
changeset 328 cc60c8dd7924
parent 310 aeda3cb4528d
permissions -rwxr-xr-x
relpipe-in-sparql: update to new relpipe-in-csv CLI API

#!/bin/bash

# Relational pipes
# Copyright © 2020 František Kučera (Frantovo.cz, GlobalCode.info)
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

relpipe_in_sparql_help() {
	cat <<-EOF
		SPARQL query is expected on the STDIN. Exapmple:
		  echo 'SELECT * WHERE { ?subject ?predicate ?object . } LIMIT 3' \
| relpipe-in-sparql | relpipe-out-tabular
		
		Namespace prefixes are part of the query.
		But because they are usually constant, they can be set as the 
		RELPIPE_IN_SPARQL_PREFIXES environmental variable.
		Then they are prepended to the query.
		
		SPARQL endpoint can be set ad-hoc by the --endpoint option.
		
		To configure default SPARQL endpoint, add something like this to your environment:
		  export RELPIPE_IN_SPARQL_ENDPOINT="https://query.wikidata.org/sparql"
		  export RELPIPE_IN_SPARQL_ENDPOINT="https://dbpedia.org/sparql"
		  export RELPIPE_IN_SPARQL_ENDPOINT="https://data.gov.cz/sparql"
		  export RELPIPE_IN_SPARQL_ENDPOINT="https://data.cssz.cz/sparql"

		The relation name defaults to "rdf". Custom name can be set using the --relation option.
EOF
}

relation="rdf";
endpoint="${RELPIPE_IN_SPARQL_ENDPOINT:-https://dbpedia.org/sparql}";

while [[ $# -gt 0 ]]; do
	argument="$1";
	case "$argument" in
		"--relation") relation="$2"; shift; shift; ;;
		"--endpoint") endpoint="$2"; shift; shift; ;;
		"--help") relpipe_in_sparql_help; exit; ;;
	esac
done

[[ -n "$RELPIPE_IN_SPARQL_PREFIXES" ]] && query="$RELPIPE_IN_SPARQL_PREFIXES"; query+=$'\n\n';

query+="$(</dev/stdin)";


# Simple implementation that utilizes the CSV output of the SPARQL endpoint.
relpipe_in_sparql_implementation_csv() {
	curl \
		--header "Accept: text/csv" \
		--data-urlencode query="$query" \
		--fail \
		--silent \
		--show-error \
		"$endpoint" \
		| relpipe-in-csv --relation "$relation"
}

# More powerful implementation based on XML.
# Can be customized through XSLT.
# But: has more dependencies and avoids streaming.
relpipe_in_sparql_implementation_xml() {
	DIR="$(dirname $(realpath "$0"))";
	curl \
		--header "Accept: application/sparql-results+xml" \
		--data-urlencode query="$query" \
		--fail \
		--silent \
		--show-error \
		"$endpoint" \
		| xsltproc --stringparam "relation" "$relation" "$DIR/relpipe-in-sparql.xsl" - \
		| relpipe-in-xml
}

relpipe_in_sparql_implementation_${RELPIPE_IN_SPARQL_IMPLEMENTATION:-csv}