PyLoDStorage: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
|||
| Line 99: | Line 99: | ||
-k | --kill: kill the running fuseki server | -k | --kill: kill the running fuseki server | ||
-l | --load [ttl file]: download jena / tdbloader and load given ttl file | -l | --load [ttl file]: download jena / tdbloader and load given ttl file | ||
</source> | |||
== SPARQL unit test == | |||
see https://github.com/WolfgangFahl/DgraphAndWeaviateTest/blob/master/tests/testJena.py | |||
<source lang='python'> | |||
''' | |||
Created on 2020-08-14 | |||
@author: wf | |||
''' | |||
import unittest | |||
import getpass | |||
from lodstorage.sparql import SPARQL | |||
from lodstorage.sample import Sample | |||
import time | |||
class TestSPARQL(unittest.TestCase): | |||
''' Test SPARQL access e.g. Apache Jena via Wrapper''' | |||
def setUp(self): | |||
self.debug=False | |||
pass | |||
def tearDown(self): | |||
pass | |||
def getJena(self,mode='query',debug=False,typedLiterals=False,profile=False): | |||
''' | |||
get the jena endpoint for the given mode | |||
Args: | |||
mode(string): query or update | |||
debug(boolean): True if debug information should be output | |||
typedLiterals(boolean): True if INSERT DATA SPARQL commands should use typed literals | |||
profile(boolean): True if profile/timing information should be shown | |||
''' | |||
endpoint="http://localhost:3030/example" | |||
jena=SPARQL(endpoint,mode=mode,debug=debug,typedLiterals=typedLiterals,profile=profile) | |||
return jena | |||
def testJenaQuery(self): | |||
''' | |||
test Apache Jena Fuseki SPARQL endpoint with example SELECT query | |||
''' | |||
jena=self.getJena() | |||
queryString = "SELECT * WHERE { ?s ?p ?o. }" | |||
results=jena.query(queryString) | |||
self.assertTrue(len(results)>20) | |||
pass | |||
def testJenaInsert(self): | |||
''' | |||
test a Jena INSERT DATA | |||
''' | |||
jena=self.getJena(mode="update") | |||
insertCommands = [ """ | |||
PREFIX cr: <http://cr.bitplan.com/> | |||
INSERT DATA { | |||
cr:version cr:author "Wolfgang Fahl". | |||
} | |||
""",'INVALID COMMAND'] | |||
for index,insertCommand in enumerate(insertCommands): | |||
result,ex=jena.insert(insertCommand) | |||
if index==0: | |||
self.assertTrue(ex is None) | |||
print(result) | |||
else: | |||
msg=ex.args[0] | |||
self.assertTrue("QueryBadFormed" in msg) | |||
self.assertTrue("Error 400" in msg) | |||
pass | |||
def checkErrors(self,errors,expected=0): | |||
''' | |||
check the given list of errors - print any errors if there are some | |||
and after that assert that the length of the list of errors is zero | |||
Args: | |||
errors(list): the list of errors to check | |||
''' | |||
if len(errors)>0: | |||
print("ERRORS:") | |||
for error in errors: | |||
print(error) | |||
self.assertEquals(expected,len(errors)) | |||
def testDob(self): | |||
''' | |||
test the DOB (date of birth) function that converts from ISO-Date to | |||
datetime.date | |||
''' | |||
dt=Sample.dob("1926-04-21") | |||
self.assertEqual(1926,dt.year) | |||
self.assertEqual(4,dt.month) | |||
self.assertEqual(21,dt.day) | |||
def testListOfDictInsert(self): | |||
''' | |||
test inserting a list of Dicts and retrieving the values again | |||
using a person based example | |||
instead of | |||
https://en.wikipedia.org/wiki/FOAF_(ontology) | |||
we use an object oriented derivate of FOAF with a focus on datatypes | |||
''' | |||
listofDicts=Sample.getRoyals() | |||
typedLiteralModes=[True,False] | |||
entityType='foafo:Person' | |||
primaryKey='name' | |||
prefixes='PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>' | |||
for typedLiteralMode in typedLiteralModes: | |||
jena=self.getJena(mode='update',typedLiterals=typedLiteralMode,debug=True) | |||
deleteString= """ | |||
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> | |||
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> | |||
DELETE WHERE { | |||
?person a 'foafo:Person'. | |||
?person ?p ?o. | |||
} | |||
""" | |||
jena.query(deleteString) | |||
errors=jena.insertListOfDicts(listofDicts,entityType,primaryKey,prefixes) | |||
self.checkErrors(errors) | |||
jena=self.getJena(mode="query",debug=True) | |||
queryString = """ | |||
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> | |||
SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE { | |||
?person a 'foafo:Person'. | |||
?person foafo:Person_name ?name. | |||
?person foafo:Person_born ?born. | |||
?person foafo:Person_numberInLine ?numberInLine. | |||
?person foafo:Person_wikidataurl ?wikidataurl. | |||
?person foafo:Person_age ?age. | |||
?person foafo:Person_ofAge ?ofAge. | |||
?person foafo:Person_lastmodified ?lastmodified. | |||
}""" | |||
personResults=jena.query(queryString) | |||
self.assertEqual(len(listofDicts),len(personResults)) | |||
personList=jena.asListOfDicts(personResults) | |||
for index,person in enumerate(personList): | |||
print("%d: %s" %(index,person)) | |||
# check the correct round-trip behavior | |||
self.assertEqual(listofDicts,personList) | |||
def testControlEscape(self): | |||
''' | |||
check the control-escaped version of an UTF-8 string | |||
''' | |||
controls="Α\tΩ\r\n"; | |||
expected="Α\\tΩ\\r\\n" | |||
esc=SPARQL.controlEscape(controls) | |||
self.assertEqual(expected,esc) | |||
def testSPARQLErrorMessage(self): | |||
''' | |||
test error handling | |||
see https://stackoverflow.com/questions/63486767/how-can-i-get-the-fuseki-api-via-sparqlwrapper-to-properly-report-a-detailed-err | |||
''' | |||
listOfDicts=[{ | |||
'title': '“Bioinformatics of Genome Regulation and Structure\Systems Biology” – BGRS\SB-2018', | |||
'url': 'https://thenode.biologists.com/event/11th-international-multiconference-bioinformatics-genome-regulation-structuresystems-biology-bgrssb-2018/'}] | |||
entityType="cr:Event" | |||
primaryKey='title' | |||
prefixes="PREFIX cr: <http://cr.bitplan.com/Event/0.1/>" | |||
jena=self.getJena(mode='update',typedLiterals=False,debug=True) | |||
errors=jena.insertListOfDicts(listOfDicts,entityType,primaryKey,prefixes) | |||
self.checkErrors(errors,1) | |||
error=errors[0] | |||
self.assertTrue("probably the sparql query is bad formed" in error) | |||
def testEscapeStringContent(self): | |||
''' | |||
test handling of double quoted strings | |||
''' | |||
helpListOfDicts=[{'topic':'edit','description': '''Use | |||
the "edit" | |||
button to start editing - you can use | |||
- tab \t | |||
- carriage return \r | |||
- newline \n | |||
as escape characters | |||
''' | |||
}] | |||
entityType='help:Topic' | |||
primaryKey='topic' | |||
prefixes='PREFIX help: <http://help.bitplan.com/help/0.0.1/>' | |||
jena=self.getJena(mode='update',debug=True) | |||
errors=jena.insertListOfDicts(helpListOfDicts, entityType, primaryKey, prefixes, profile=True) | |||
self.checkErrors(errors) | |||
query=""" | |||
PREFIX help: <http://help.bitplan.com/help/0.0.1/> | |||
SELECT ?topic ?description | |||
WHERE { | |||
?help help:Topic_topic ?topic. | |||
?help help:Topic_description ?description. | |||
} | |||
""" | |||
jena=self.getJena(mode='query') | |||
listOfDicts=jena.queryAsListOfDicts(query) | |||
# check round trip equality | |||
self.assertEqual(helpListOfDicts,listOfDicts) | |||
def testIssue7(self): | |||
''' | |||
test conversion of dates with timezone info | |||
''' | |||
value="2020-01-01T00:00:00Z" | |||
dt=SPARQL.strToDatetime(value) | |||
self.assertEqual(dt.year,2020) | |||
def testListOfDictSpeed(self): | |||
''' | |||
test the speed of adding data | |||
''' | |||
limit=5000 | |||
for batchSize in [None,1000]: | |||
listOfDicts=Sample.getSample(limit) | |||
jena=self.getJena(mode='update',profile=True) | |||
entityType="ex:TestRecord" | |||
primaryKey='pkey' | |||
prefixes='PREFIX ex: <http://example.com/>' | |||
startTime=time.time() | |||
errors=jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes,batchSize=batchSize) | |||
self.checkErrors(errors) | |||
elapsed=time.time()-startTime | |||
print ("adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed)) | |||
def testLocalWikdata(self): | |||
''' | |||
check local wikidata | |||
''' | |||
# check we have local wikidata copy: | |||
if getpass.getuser()=="wf": | |||
# use 2018 wikidata copy | |||
endpoint="http://jena.zeus.bitplan.com/wikidata/" | |||
wd=SPARQL(endpoint) | |||
queryString="""# get a list of whisky distilleries | |||
PREFIX wd: <http://www.wikidata.org/entity/> | |||
PREFIX wdt: <http://www.wikidata.org/prop/direct/> | |||
SELECT ?item ?coord | |||
WHERE | |||
{ | |||
# instance of whisky distillery | |||
?item wdt:P31 wd:Q10373548. | |||
# get the coordinate | |||
?item wdt:P625 ?coord. | |||
} | |||
""" | |||
results=wd.query(queryString) | |||
self.assertTrue(238<=len(results)) | |||
if __name__ == "__main__": | |||
#import sys;sys.argv = ['', 'Test.testName'] | |||
unittest.main() | |||
</source> | </source> | ||
Revision as of 07:03, 22 September 2020
OsProject
| OsProject | |
|---|---|
| id | PyLodStorage |
| state | |
| owner | Wolfgang Fahl |
| title | python List of Dict (Table) Storage library |
| url | https://github.com/WolfgangFahl/pyLodStorage |
| version | 0.0.12 |
| description | |
| date | 2020/09/21 |
| since | |
| until | |
see also DgraphAndWeaviateTest
List of Dicts = Table
a list of dicts(Hashtables) in python can be interpreted as Table which is suitable to be stored
- in a relational database like sqlite3
- as JSON
Royals example
@staticmethod
def getRoyals():
listOfDicts=[
{'name': 'Elizabeth Alexandra Mary Windsor', 'born': Sample.dob('1926-04-21'), 'numberInLine': 0, 'wikidataurl': 'https://www.wikidata.org/wiki/Q9682' },
{'name': 'Charles, Prince of Wales', 'born': Sample.dob('1948-11-14'), 'numberInLine': 1, 'wikidataurl': 'https://www.wikidata.org/wiki/Q43274' },
{'name': 'George of Cambridge', 'born': Sample.dob('2013-07-22'), 'numberInLine': 3, 'wikidataurl': 'https://www.wikidata.org/wiki/Q1359041'},
{'name': 'Harry Duke of Sussex', 'born': Sample.dob('1984-09-15'), 'numberInLine': 6, 'wikidataurl': 'https://www.wikidata.org/wiki/Q152316'}
]
today=date.today()
for person in listOfDicts:
born=person['born']
age=(today - born).days / 365.2425
person['age']=age
person['ofAge']=age>=18
person['lastmodified']=datetime.now()
return listOfDicts
The above list of dict can be stored in a Person table with the following structure:

SQL
The idea is to derive the necessary DDL and SQL command automatically:
CREATE TABLE Family(name TEXT PRIMARY KEY,country TEXT,lastmodified TIMESTAMP)
INSERT INTO Family (name,country,lastmodified) values (:name,:country,:lastmodified)
and use them via simple API
from lodstorage.sample import Sample
from lodstorage.sql import SQLDB, EntityInfo
listOfRecords=Sample.getRoyals()
sqlDB=SQLDB()
entityName='Person'
primaryKey='name'
entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
sqlDB.store(listOfRecords,entityInfo)
resultList=self.sqlDB.queryAll(entityInfo)
The resultList will be the same as the original listOfRecords.
JSON
Apache Jena
The jena -l and jena -f options will automatically download and unpack the needed Apache jena files.
Jena load example dataset
scripts/jena -l sampledata/example.ttl
Jena fuseki server start
scripts/jena -f example
You should be able to browse the admin GUI at http://localhost:3030 and have the example dataset ready for you
Jena fuseki server stop
scripts/jena -k
jena script usage
scripts/jena -h
scripts/jena [-f|--fuseki|-h|--help|-k|--kill|-l|--load]
-f | --fuseki [dataset]: download and start fuseki server with the given dataset
-h | --help: show this usage
-k | --kill: kill the running fuseki server
-l | --load [ttl file]: download jena / tdbloader and load given ttl file
SPARQL unit test
see https://github.com/WolfgangFahl/DgraphAndWeaviateTest/blob/master/tests/testJena.py
'''
Created on 2020-08-14
@author: wf
'''
import unittest
import getpass
from lodstorage.sparql import SPARQL
from lodstorage.sample import Sample
import time
class TestSPARQL(unittest.TestCase):
''' Test SPARQL access e.g. Apache Jena via Wrapper'''
def setUp(self):
self.debug=False
pass
def tearDown(self):
pass
def getJena(self,mode='query',debug=False,typedLiterals=False,profile=False):
'''
get the jena endpoint for the given mode
Args:
mode(string): query or update
debug(boolean): True if debug information should be output
typedLiterals(boolean): True if INSERT DATA SPARQL commands should use typed literals
profile(boolean): True if profile/timing information should be shown
'''
endpoint="http://localhost:3030/example"
jena=SPARQL(endpoint,mode=mode,debug=debug,typedLiterals=typedLiterals,profile=profile)
return jena
def testJenaQuery(self):
'''
test Apache Jena Fuseki SPARQL endpoint with example SELECT query
'''
jena=self.getJena()
queryString = "SELECT * WHERE { ?s ?p ?o. }"
results=jena.query(queryString)
self.assertTrue(len(results)>20)
pass
def testJenaInsert(self):
'''
test a Jena INSERT DATA
'''
jena=self.getJena(mode="update")
insertCommands = [ """
PREFIX cr: <http://cr.bitplan.com/>
INSERT DATA {
cr:version cr:author "Wolfgang Fahl".
}
""",'INVALID COMMAND']
for index,insertCommand in enumerate(insertCommands):
result,ex=jena.insert(insertCommand)
if index==0:
self.assertTrue(ex is None)
print(result)
else:
msg=ex.args[0]
self.assertTrue("QueryBadFormed" in msg)
self.assertTrue("Error 400" in msg)
pass
def checkErrors(self,errors,expected=0):
'''
check the given list of errors - print any errors if there are some
and after that assert that the length of the list of errors is zero
Args:
errors(list): the list of errors to check
'''
if len(errors)>0:
print("ERRORS:")
for error in errors:
print(error)
self.assertEquals(expected,len(errors))
def testDob(self):
'''
test the DOB (date of birth) function that converts from ISO-Date to
datetime.date
'''
dt=Sample.dob("1926-04-21")
self.assertEqual(1926,dt.year)
self.assertEqual(4,dt.month)
self.assertEqual(21,dt.day)
def testListOfDictInsert(self):
'''
test inserting a list of Dicts and retrieving the values again
using a person based example
instead of
https://en.wikipedia.org/wiki/FOAF_(ontology)
we use an object oriented derivate of FOAF with a focus on datatypes
'''
listofDicts=Sample.getRoyals()
typedLiteralModes=[True,False]
entityType='foafo:Person'
primaryKey='name'
prefixes='PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>'
for typedLiteralMode in typedLiteralModes:
jena=self.getJena(mode='update',typedLiterals=typedLiteralMode,debug=True)
deleteString= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
DELETE WHERE {
?person a 'foafo:Person'.
?person ?p ?o.
}
"""
jena.query(deleteString)
errors=jena.insertListOfDicts(listofDicts,entityType,primaryKey,prefixes)
self.checkErrors(errors)
jena=self.getJena(mode="query",debug=True)
queryString = """
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE {
?person a 'foafo:Person'.
?person foafo:Person_name ?name.
?person foafo:Person_born ?born.
?person foafo:Person_numberInLine ?numberInLine.
?person foafo:Person_wikidataurl ?wikidataurl.
?person foafo:Person_age ?age.
?person foafo:Person_ofAge ?ofAge.
?person foafo:Person_lastmodified ?lastmodified.
}"""
personResults=jena.query(queryString)
self.assertEqual(len(listofDicts),len(personResults))
personList=jena.asListOfDicts(personResults)
for index,person in enumerate(personList):
print("%d: %s" %(index,person))
# check the correct round-trip behavior
self.assertEqual(listofDicts,personList)
def testControlEscape(self):
'''
check the control-escaped version of an UTF-8 string
'''
controls="Α\tΩ\r\n";
expected="Α\\tΩ\\r\\n"
esc=SPARQL.controlEscape(controls)
self.assertEqual(expected,esc)
def testSPARQLErrorMessage(self):
'''
test error handling
see https://stackoverflow.com/questions/63486767/how-can-i-get-the-fuseki-api-via-sparqlwrapper-to-properly-report-a-detailed-err
'''
listOfDicts=[{
'title': '“Bioinformatics of Genome Regulation and Structure\Systems Biology” – BGRS\SB-2018',
'url': 'https://thenode.biologists.com/event/11th-international-multiconference-bioinformatics-genome-regulation-structuresystems-biology-bgrssb-2018/'}]
entityType="cr:Event"
primaryKey='title'
prefixes="PREFIX cr: <http://cr.bitplan.com/Event/0.1/>"
jena=self.getJena(mode='update',typedLiterals=False,debug=True)
errors=jena.insertListOfDicts(listOfDicts,entityType,primaryKey,prefixes)
self.checkErrors(errors,1)
error=errors[0]
self.assertTrue("probably the sparql query is bad formed" in error)
def testEscapeStringContent(self):
'''
test handling of double quoted strings
'''
helpListOfDicts=[{'topic':'edit','description': '''Use
the "edit"
button to start editing - you can use
- tab \t
- carriage return \r
- newline \n
as escape characters
'''
}]
entityType='help:Topic'
primaryKey='topic'
prefixes='PREFIX help: <http://help.bitplan.com/help/0.0.1/>'
jena=self.getJena(mode='update',debug=True)
errors=jena.insertListOfDicts(helpListOfDicts, entityType, primaryKey, prefixes, profile=True)
self.checkErrors(errors)
query="""
PREFIX help: <http://help.bitplan.com/help/0.0.1/>
SELECT ?topic ?description
WHERE {
?help help:Topic_topic ?topic.
?help help:Topic_description ?description.
}
"""
jena=self.getJena(mode='query')
listOfDicts=jena.queryAsListOfDicts(query)
# check round trip equality
self.assertEqual(helpListOfDicts,listOfDicts)
def testIssue7(self):
'''
test conversion of dates with timezone info
'''
value="2020-01-01T00:00:00Z"
dt=SPARQL.strToDatetime(value)
self.assertEqual(dt.year,2020)
def testListOfDictSpeed(self):
'''
test the speed of adding data
'''
limit=5000
for batchSize in [None,1000]:
listOfDicts=Sample.getSample(limit)
jena=self.getJena(mode='update',profile=True)
entityType="ex:TestRecord"
primaryKey='pkey'
prefixes='PREFIX ex: <http://example.com/>'
startTime=time.time()
errors=jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes,batchSize=batchSize)
self.checkErrors(errors)
elapsed=time.time()-startTime
print ("adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed))
def testLocalWikdata(self):
'''
check local wikidata
'''
# check we have local wikidata copy:
if getpass.getuser()=="wf":
# use 2018 wikidata copy
endpoint="http://jena.zeus.bitplan.com/wikidata/"
wd=SPARQL(endpoint)
queryString="""# get a list of whisky distilleries
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?item ?coord
WHERE
{
# instance of whisky distillery
?item wdt:P31 wd:Q10373548.
# get the coordinate
?item wdt:P625 ?coord.
}
"""
results=wd.query(queryString)
self.assertTrue(238<=len(results))
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()