-
Notifications
You must be signed in to change notification settings - Fork 12
Query notebook
Gezim Sejdiu edited this page Mar 26, 2018
·
2 revisions
SANSA uses vertical partitioning (VP) approach and is designed to support extensible partitioning of RDF data. Instead of dealing with a single three-column table (s, p, o), data is partitioned into multiple tables based on the used RDF predicates, RDF term types and literal datatypes. The first column of these tables is always a string representing the subject. The second column always represents the literal value as a Scala/Java datatype. Tables for storing literals with language tags have an additional third string column for the language tag.
import org.apache.jena.riot.Lang
import net.sansa_stack.rdf.spark.io._
import net.sansa_stack.query.spark.query._
val input = "hdfs://namenode:8020/data/rdf.nt"
val lang = Lang.NTRIPLES
val triples = spark.rdf(lang)(input)
val sparqlQuery = """SELECT ?s ?p ?o
WHERE {?s ?p ?o }
LIMIT 10"""
val result = triples.sparql(sparqlQuery)
z.show(result)
import java.net.URI
import net.sansa_stack.rdf.spark.io.NTripleReader
import net.sansa_stack.rdf.spark.partition.core.RdfPartitionUtilsSpark
import net.sansa_stack.query.spark.sparqlify.QueryExecutionFactorySparqlifySpark
import net.sansa_stack.query.spark.sparqlify.SparqlifyUtils3
import org.aksw.jena_sparql_api.server.utils.FactoryBeanSparqlServer
import org.apache.spark.sql.SparkSession
import scala.collection.mutable
val input = "hdfs://namenode:8020/data/rdf.nt"
val graphRdd = NTripleReader.load(spark, URI.create(input))
val partitions = RdfPartitionUtilsSpark.partitionGraph(graphRdd)
val rewriter = SparqlifyUtils3.createSparqlSqlRewriter(spark, partitions)
val port = 7531
val qef = new QueryExecutionFactorySparqlifySpark(spark, rewriter)
val server = FactoryBeanSparqlServer.newInstance.setSparqlServiceFactory(qef).setPort(port).create()
server.join()