Difference between revisions of "PyLoDStorage"
(56 intermediate revisions by the same user not shown) | |||
Line 3: | Line 3: | ||
{{OsProject | {{OsProject | ||
|id=PyLodStorage | |id=PyLodStorage | ||
+ | |state=active | ||
|owner=Wolfgang Fahl | |owner=Wolfgang Fahl | ||
|title=python List of Dict (Table) Storage library | |title=python List of Dict (Table) Storage library | ||
|url=https://github.com/WolfgangFahl/pyLodStorage | |url=https://github.com/WolfgangFahl/pyLodStorage | ||
− | |version=0. | + | |version=0.13.3 |
− | |date=2020 | + | |date=2024-11-02 |
+ | |since=2020-09-10 | ||
|storemode=property | |storemode=property | ||
}} | }} | ||
+ | =tickets= | ||
+ | see also | ||
+ | * | ||
+ | |||
=Freitext= | =Freitext= | ||
+ | {{Link|target=DgraphAndWeaviateTest}} | ||
+ | * {{Link|target=Geograpy}} | ||
+ | * {{Link|target=ProceedingsTitleParser}} | ||
+ | |||
+ | == List of Dicts = Table == | ||
+ | a list of dicts(Hashtables) in python can be interpreted as Table which is suitable to | ||
+ | # be stored in a relational database like sqlite3 | ||
+ | # represented as JSON, CSV or example | ||
+ | # edited with a [https://github.com/WolfgangFahl/pyOnlineSpreadSheetEditing Spreadsheet editor] | ||
+ | |||
+ | == Royals example == | ||
+ | <source lang='python'> | ||
+ | @staticmethod | ||
+ | def getRoyals(): | ||
+ | listOfDicts=[ | ||
+ | {'name': 'Elizabeth Alexandra Mary Windsor', 'born': Sample.dob('1926-04-21'), 'numberInLine': 0, 'wikidataurl': 'https://www.wikidata.org/wiki/Q9682' }, | ||
+ | {'name': 'Charles, Prince of Wales', 'born': Sample.dob('1948-11-14'), 'numberInLine': 1, 'wikidataurl': 'https://www.wikidata.org/wiki/Q43274' }, | ||
+ | {'name': 'George of Cambridge', 'born': Sample.dob('2013-07-22'), 'numberInLine': 3, 'wikidataurl': 'https://www.wikidata.org/wiki/Q1359041'}, | ||
+ | {'name': 'Harry Duke of Sussex', 'born': Sample.dob('1984-09-15'), 'numberInLine': 6, 'wikidataurl': 'https://www.wikidata.org/wiki/Q152316'} | ||
+ | ] | ||
+ | today=date.today() | ||
+ | for person in listOfDicts: | ||
+ | born=person['born'] | ||
+ | age=(today - born).days / 365.2425 | ||
+ | person['age']=age | ||
+ | person['ofAge']=age>=18 | ||
+ | person['lastmodified']=datetime.now() | ||
+ | return listOfDicts | ||
+ | </source> | ||
+ | |||
+ | The above list of dict can be stored in a Person table with the following structure: | ||
+ | |||
+ | <uml> | ||
+ | package Royals { | ||
+ | class Person << Entity >> { | ||
+ | age : FLOAT | ||
+ | born : DATE | ||
+ | lastmodified : TIMESTAMP | ||
+ | name : TEXT <<PK>> | ||
+ | numberInLine : INTEGER | ||
+ | ofAge : BOOLEAN | ||
+ | wikidataurl : TEXT | ||
+ | } | ||
+ | } | ||
+ | </uml> | ||
+ | |||
+ | {{pip|pyLodStorage}} | ||
+ | |||
+ | = SQL = | ||
+ | The idea is to derive the necessary DDL and SQL command automatically: | ||
+ | <source lang='sql'> | ||
+ | CREATE TABLE Family(name TEXT PRIMARY KEY,country TEXT,lastmodified TIMESTAMP) | ||
+ | INSERT INTO Family (name,country,lastmodified) values (:name,:country,:lastmodified) | ||
+ | </source> | ||
+ | and use them via simple API | ||
+ | <source lang='python'> | ||
+ | from lodstorage.sample import Sample | ||
+ | from lodstorage.sql import SQLDB, EntityInfo | ||
+ | |||
+ | listOfRecords=Sample.getRoyals() | ||
+ | sqlDB=SQLDB() | ||
+ | entityName='Person' | ||
+ | primaryKey='name' | ||
+ | entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey) | ||
+ | sqlDB.store(listOfRecords,entityInfo) | ||
+ | resultList=self.sqlDB.queryAll(entityInfo) | ||
+ | </source> | ||
+ | |||
+ | The resultList will be the same as the original listOfRecords. | ||
+ | |||
+ | = JSON = | ||
+ | Wrap the royals example into a JSONAble class and test using that class for "round-trip" JSON storage: | ||
+ | <source lang='python'> | ||
+ | ... | ||
+ | class Royals(JSONAble): | ||
+ | def __init__(self,load=False): | ||
+ | if load: | ||
+ | self.royals=Sample.getRoyals() | ||
+ | else: | ||
+ | self.royals=None | ||
+ | ... | ||
+ | def testRoyals(self): | ||
+ | ''' | ||
+ | test Royals example | ||
+ | ''' | ||
+ | royals1=Royals(load=True) | ||
+ | self.assertEqual(4,len(royals1.royals)) | ||
+ | json=royals1.toJSON() | ||
+ | print(json) | ||
+ | types=Types.forClass(royals1, "royals") | ||
+ | royals2=Royals() | ||
+ | royals2.fromJson(json,types=types) | ||
+ | self.assertEqual(4,len(royals2.royals)) | ||
+ | print(royals1.royals) | ||
+ | print(royals2.royals) | ||
+ | self.assertEqual(royals1.royals,royals2.royals) | ||
+ | </source> | ||
+ | == JSON encoding == | ||
+ | <source lang='json'> | ||
+ | { | ||
+ | "royals": [ | ||
+ | { | ||
+ | "age": 94.42767476402663, | ||
+ | "born": "1926-04-21", | ||
+ | "lastmodified": "2020-09-23T09:44:48.661740", | ||
+ | "name": "Elizabeth Alexandra Mary Windsor", | ||
+ | "numberInLine": 0, | ||
+ | "ofAge": true, | ||
+ | "wikidataurl": "https://www.wikidata.org/wiki/Q9682" | ||
+ | }, | ||
+ | { | ||
+ | "age": 71.85910730542037, | ||
+ | "born": "1948-11-14", | ||
+ | "lastmodified": "2020-09-23T09:44:48.661747", | ||
+ | "name": "Charles, Prince of Wales", | ||
+ | "numberInLine": 1, | ||
+ | "ofAge": true, | ||
+ | "wikidataurl": "https://www.wikidata.org/wiki/Q43274" | ||
+ | }, | ||
+ | { | ||
+ | "age": 7.17331635830989, | ||
+ | "born": "2013-07-22", | ||
+ | "lastmodified": "2020-09-23T09:44:48.661748", | ||
+ | "name": "George of Cambridge", | ||
+ | "numberInLine": 3, | ||
+ | "ofAge": false, | ||
+ | "wikidataurl": "https://www.wikidata.org/wiki/Q1359041" | ||
+ | }, | ||
+ | { | ||
+ | "age": 36.022642490947796, | ||
+ | "born": "1984-09-15", | ||
+ | "lastmodified": "2020-09-23T09:44:48.661749", | ||
+ | "name": "Harry Duke of Sussex", | ||
+ | "numberInLine": 6, | ||
+ | "ofAge": true, | ||
+ | "wikidataurl": "https://www.wikidata.org/wiki/Q152316" | ||
+ | } | ||
+ | ] | ||
+ | } | ||
+ | </source> | ||
+ | |||
+ | = Query Documentation = | ||
+ | see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46 | ||
+ | == Examples == | ||
+ | === 15 Random substances with CAS number === | ||
+ | Wikidata SPARQL query showing the 15 random chemical substances with their CAS Number | ||
+ | ==== query ==== | ||
+ | <source lang='sparql'> | ||
+ | # List of 15 random chemical components with CAS-Number, formula and structure | ||
+ | # see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46 | ||
+ | # WF 2021-08-23 | ||
+ | SELECT ?substance ?substanceLabel ?formula ?structure ?CAS | ||
+ | WHERE { | ||
+ | ?substance wdt:P31 wd:Q11173. | ||
+ | ?substance wdt:P231 ?CAS. | ||
+ | ?substance wdt:P274 ?formula. | ||
+ | ?substance wdt:P117 ?structure. | ||
+ | SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } | ||
+ | } | ||
+ | LIMIT 15 | ||
+ | |||
+ | </source> | ||
+ | |||
+ | [https://query.wikidata.org/#%23%20List%20of%2015%20random%20chemical%20components%20with%20CAS-Number%2C%20formula%20and%20structure%0A%23%20see%20also%20https%3A//github.com/WolfgangFahl/pyLoDStorage/issues/46%0A%23%20WF%202021-08-23%0ASELECT%20%3Fsubstance%20%3FsubstanceLabel%20%3Fformula%20%3Fstructure%20%3FCAS%0AWHERE%20%7B%20%0A%20%20%3Fsubstance%20wdt%3AP31%20wd%3AQ11173.%0A%20%20%3Fsubstance%20wdt%3AP231%20%3FCAS.%0A%20%20%3Fsubstance%20wdt%3AP274%20%3Fformula.%0A%20%20%3Fsubstance%20wdt%3AP117%20%20%3Fstructure.%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%7D%0ALIMIT%2015%0A try it!] | ||
+ | ==== result ==== | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! substance !! substanceLabel !! formula !! structure !! CAS | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q32703 Q32703] || aminomethylpropanol || C₄H₁₁NO || [http://commons.wikimedia.org/wiki/Special:FilePath/2-amino-2-methyl-1-propanol.svg 2-amino-2-methyl-1-propanol.svg] || 124-68-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q32703 Q32703] || aminomethylpropanol || C₄H₁₁NO || [http://commons.wikimedia.org/wiki/Special:FilePath/Isobutanolamine%20t.png Isobutanolamine%20t.png] || 124-68-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q43656 Q43656] || cholesterol || C₂₇H₄₆O || [http://commons.wikimedia.org/wiki/Special:FilePath/Structural%20formula%20of%20cholesterol.svg Structural%20formula%20of%20cholesterol.svg] || 57-88-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q45143 Q45143] || fulminic acid || CHNO || [http://commons.wikimedia.org/wiki/Special:FilePath/Fulminezuur.png Fulminezuur.png] || 506-85-4 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q49546 Q49546] || acetone || C₃H₆O || [http://commons.wikimedia.org/wiki/Special:FilePath/Acetone-2D-skeletal.svg Acetone-2D-skeletal.svg] || 67-64-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q49546 Q49546] || acetone || C₃H₆O || [http://commons.wikimedia.org/wiki/Special:FilePath/Acetone-structural.png Acetone-structural.png] || 67-64-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q52858 Q52858] || ethane || C₂H₆ || [http://commons.wikimedia.org/wiki/Special:FilePath/Ethan%20Keilstrich.svg Ethan%20Keilstrich.svg] || 74-84-0 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q58356 Q58356] || amoxapine || C₁₇H₁₆ClN₃O || [http://commons.wikimedia.org/wiki/Special:FilePath/Amoxapine.svg Amoxapine.svg] || 14028-44-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q58713 Q58713] || clomipramine || C₁₉H₂₃ClN₂ || [http://commons.wikimedia.org/wiki/Special:FilePath/Clomipramine.svg Clomipramine.svg] || 303-49-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q68484 Q68484] || prucalopride || C₁₈H₂₆ClN₃O₃ || [http://commons.wikimedia.org/wiki/Special:FilePath/Prucalopride.svg Prucalopride.svg] || 179474-81-8 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q68566 Q68566] || mosapride || C₂₁H₂₅ClFN₃O₃ || [http://commons.wikimedia.org/wiki/Special:FilePath/Mosapride.svg Mosapride.svg] || 112885-41-3 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q80232 Q80232] || cyclobutane || C₄H₈ || [http://commons.wikimedia.org/wiki/Special:FilePath/Cyclobutane2.svg Cyclobutane2.svg] || 287-23-0 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q80868 Q80868] || tolonium chloride || C₁₅H₁₆ClN₃S || [http://commons.wikimedia.org/wiki/Special:FilePath/Tolonium%20chloride.svg Tolonium%20chloride.svg] || 92-31-9 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q83320 Q83320] || nitric acid || HNO₃ || [http://commons.wikimedia.org/wiki/Special:FilePath/Nitric-acid.png Nitric-acid.png] || 12507-77-6 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q83320 Q83320] || nitric acid || HNO₃ || [http://commons.wikimedia.org/wiki/Special:FilePath/Nitric-acid.png Nitric-acid.png] || 7697-37-2 | ||
+ | |} | ||
+ | == Manually improved result == | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! substance !! substanceLabel !! formula !! structure !! CAS | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q32703 Q32703] || aminomethylpropanol || C₄H₁₁NO || [[File:2-amino-2-methyl-1-propanol.svg|150px|2-amino-2-methyl-1-propanol.svg]] || 124-68-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q32703 Q32703] || aminomethylpropanol || C₄H₁₁NO || [[File:Isobutanolamine%20t.png|150px|Isobutanolamine t.png]] || 124-68-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q43656 Q43656] || cholesterol || C₂₇H₄₆O || [[File:Structural%20formula%20of%20cholesterol.svg|150px|Structural formula of cholesterol.svg]] || 57-88-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q45143 Q45143] || fulminic acid || CHNO || [[File:Fulminezuur.png|150px|Fulminezuur.png]] || 506-85-4 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q49546 Q49546] || acetone || C₃H₆O || [[File:Acetone-2D-skeletal.svg|150px|Acetone-2D-skeletal.svg]] || 67-64-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q49546 Q49546] || acetone || C₃H₆O || [[File:Acetone-structural.png|150px|Acetone-structural.png]] || 67-64-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q52858 Q52858] || ethane || C₂H₆ || [[File:Ethan%20Keilstrich.svg|150px|Ethan Keilstrich.svg]] || 74-84-0 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q58356 Q58356] || amoxapine || C₁₇H₁₆ClN₃O || [[File:Amoxapine.svg|150px|Amoxapine.svg]] || 14028-44-5 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q58713 Q58713] || clomipramine || C₁₉H₂₃ClN₂ || [[File:Clomipramine.svg|150px|Clomipramine.svg]] || 303-49-1 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q68484 Q68484] || prucalopride || C₁₈H₂₆ClN₃O₃ || [[File:Prucalopride.svg|150px|Prucalopride.svg]] || 179474-81-8 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q68566 Q68566] || mosapride || C₂₁H₂₅ClFN₃O₃ || [[File:Mosapride.svg|150px|Mosapride.svg]] || 112885-41-3 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q80232 Q80232] || cyclobutane || C₄H₈ || [[File:Cyclobutane2.svg|150px|Cyclobutane2.svg]] || 287-23-0 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q80868 Q80868] || tolonium chloride || C₁₅H₁₆ClN₃S || [[File:Tolonium%20chloride.svg|150px|Tolonium chloride.svg]] || 92-31-9 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q83320 Q83320] || nitric acid || HNO₃ || [[File:Nitric-acid.png|150px|Nitric-acid.png]] || 12507-77-6 | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q83320 Q83320] || nitric acid || HNO₃ || [[File:Nitric-acid.png|150px|Nitric-acid.png]] || 7697-37-2 | ||
+ | |} | ||
+ | |||
+ | === Ten largest cities of the world === | ||
+ | Wikidata SPARQL query showing the 10 most populated cities of the world using the million city class Q1637706 for selection | ||
+ | ==== query ==== | ||
+ | <source lang='sparql'> | ||
+ | # Ten Largest cities of the world | ||
+ | # WF 2021-08-23 | ||
+ | # see also http://wiki.bitplan.com/index.php/PyLoDStorage#Examples | ||
+ | SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel | ||
+ | WHERE { | ||
+ | VALUES ?cityClass { wd:Q1637706}. | ||
+ | ?city wdt:P31 ?cityClass . | ||
+ | ?city wdt:P1082 ?population . | ||
+ | ?city wdt:P17 ?country . | ||
+ | SERVICE wikibase:label { | ||
+ | bd:serviceParam wikibase:language "en" . | ||
+ | } | ||
+ | } | ||
+ | ORDER BY DESC(?population) | ||
+ | LIMIT 10 | ||
+ | </source> | ||
+ | |||
+ | [https://query.wikidata.org/#%23%20Ten%20Largest%20cities%20of%20the%20world%20%0A%23%20WF%202021-08-23%0A%23%20see%20also%20http%3A//wiki.bitplan.com/index.php/PyLoDStorage%23Examples%0ASELECT%20DISTINCT%20%3Fcity%20%3FcityLabel%20%3Fpopulation%20%3Fcountry%20%3FcountryLabel%20%0AWHERE%20%7B%0A%20%20VALUES%20%3FcityClass%20%7B%20wd%3AQ1637706%7D.%0A%20%20%3Fcity%20wdt%3AP31%20%3FcityClass%20.%0A%20%20%3Fcity%20wdt%3AP1082%20%3Fpopulation%20.%0A%20%20%3Fcity%20wdt%3AP17%20%3Fcountry%20.%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%0A%20%20%20%20bd%3AserviceParam%20wikibase%3Alanguage%20%22en%22%20.%0A%20%20%7D%0A%7D%0AORDER%20BY%20DESC%28%3Fpopulation%29%0ALIMIT%2010 try it!] | ||
+ | ==== result ==== | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! city !! cityLabel !! align="right"| population !! country !! countryLabel | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q1353 Q1353] || Delhi || align="right"| 26495000 || [http://www.wikidata.org/entity/Q668 Q668] || India | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q8686 Q8686] || Shanghai || align="right"| 23390000 || [http://www.wikidata.org/entity/Q148 Q148] || People's Republic of China | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q956 Q956] || Beijing || align="right"| 21710000 || [http://www.wikidata.org/entity/Q148 Q148] || People's Republic of China | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q1354 Q1354] || Dhaka || align="right"| 16800000 || [http://www.wikidata.org/entity/Q902 Q902] || Bangladesh | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q1156 Q1156] || Mumbai || align="right"| 15414288 || [http://www.wikidata.org/entity/Q668 Q668] || India | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q8660 Q8660] || Karachi || align="right"| 14910352 || [http://www.wikidata.org/entity/Q843 Q843] || Pakistan | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q8673 Q8673] || Lagos || align="right"| 14862000 || [http://www.wikidata.org/entity/Q1033 Q1033] || Nigeria | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q406 Q406] || Istanbul || align="right"| 14657434 || [http://www.wikidata.org/entity/Q43 Q43] || Turkey | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q1490 Q1490] || Tokyo || align="right"| 13942024 || [http://www.wikidata.org/entity/Q17 Q17] || Japan | ||
+ | |- | ||
+ | | [http://www.wikidata.org/entity/Q11736 Q11736] || Tianjin || align="right"| 13245000 || [http://www.wikidata.org/entity/Q148 Q148] || People's Republic of China | ||
+ | |} | ||
+ | |||
+ | === count OpenStreetMap place type instances === | ||
+ | This SPARQL query | ||
+ | determines the number of instances available in the OpenStreetMap for the placeTypes city,town and village | ||
+ | |||
+ | ==== query ==== | ||
+ | <source lang='sparql'> | ||
+ | # count osm place type instances | ||
+ | # WF 2021-08-23 | ||
+ | SELECT (count(?instance) as ?count) ?placeType ?placeTypeLabel | ||
+ | WHERE { | ||
+ | VALUES ?placeType { | ||
+ | "city" | ||
+ | "town" | ||
+ | "village" | ||
+ | } | ||
+ | ?instance osmt:place ?placeType | ||
+ | SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } | ||
+ | } | ||
+ | GROUP BY ?placeType ?placeTypeLabel | ||
+ | ORDER BY ?count | ||
+ | </source> | ||
+ | |||
+ | [https://sophox.org/#%20count%20osm%20place%20type%20instances%0A#%20WF%202021-08-23%0ASELECT%20%28count%28%3Finstance%29%20as%20%3Fcount%29%20%3FplaceType%20%3FplaceTypeLabel%0AWHERE%20%7B%20%0A%20%20VALUES%20%3FplaceType%20%7B%0A%20%20%20%20%22city%22%0A%20%20%20%20%22town%22%0A%20%20%20%20%22village%22%0A%20%20%7D%0A%20%20%3Finstance%20osmt%3Aplace%20%3FplaceType%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%7D%0AGROUP%20BY%20%3FplaceType%20%3FplaceTypeLabel%0AORDER%20BY%20%3Fcount try it!] | ||
+ | |||
+ | ==== result ==== | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! align="right"| count !! placeType !! placeTypeLabel | ||
+ | |- | ||
+ | | align="right"| 13614 || city || city | ||
+ | |- | ||
+ | | align="right"| 23238 || town || town | ||
+ | |- | ||
+ | | align="right"| 153380 || village || village | ||
+ | |} | ||
+ | |||
+ | = Apache Jena = | ||
+ | The jena -l and jena -f options will automatically download and unpack the needed Apache jena files. | ||
+ | == Jena load example dataset == | ||
+ | <source lang='bash'> | ||
+ | scripts/jena -l sampledata/example.ttl | ||
+ | </source> | ||
+ | == Jena fuseki server start == | ||
+ | <source lang='bash'> | ||
+ | scripts/jena -f example | ||
+ | </source> | ||
+ | You should be able to browse the admin GUI at http://localhost:3030 and have the example dataset ready for you | ||
+ | == Jena fuseki server stop == | ||
+ | <source lang='bash'> | ||
+ | scripts/jena -k | ||
+ | </source> | ||
+ | |||
+ | == jena script usage == | ||
+ | <source lang='bash'> | ||
+ | scripts/jena -h | ||
+ | scripts/jena [-f|--fuseki|-h|--help|-k|--kill|-l|--load] | ||
+ | |||
+ | -f | --fuseki [dataset]: download and start fuseki server with the given dataset | ||
+ | -h | --help: show this usage | ||
+ | -k | --kill: kill the running fuseki server | ||
+ | -l | --load [ttl file]: download jena / tdbloader and load given ttl file | ||
+ | </source> | ||
+ | |||
+ | = Example Unit tests = | ||
+ | == JSON unit test == | ||
+ | <source lang='python'> | ||
+ | ''' | ||
+ | Created on 2020-09-12 | ||
+ | |||
+ | @author: wf | ||
+ | ''' | ||
+ | import unittest | ||
+ | import json | ||
+ | from lodstorage.sample import Royals,Cities | ||
+ | from lodstorage.jsonable import JSONAble | ||
+ | from lodstorage.types import Types | ||
+ | import time | ||
+ | |||
+ | class TestJsonAble(unittest.TestCase): | ||
+ | ''' | ||
+ | test JSON serialization with JsonAble mixin | ||
+ | ''' | ||
+ | |||
+ | def setUp(self): | ||
+ | self.profile=True | ||
+ | self.debug=True | ||
+ | pass | ||
+ | |||
+ | def tearDown(self): | ||
+ | pass | ||
+ | |||
+ | def testSingleToDoubleQuote(self): | ||
+ | jsonStr=''' | ||
+ | { | ||
+ | "cities": [ | ||
+ | { | ||
+ | "name": "Upper Hell's Gate" | ||
+ | }, | ||
+ | { | ||
+ | "name": "N'zeto" | ||
+ | } | ||
+ | ] | ||
+ | } | ||
+ | ''' | ||
+ | listOfDicts=json.loads(jsonStr) | ||
+ | dictStr=str(listOfDicts) | ||
+ | if self.debug: | ||
+ | print(dictStr) | ||
+ | jsonStr2=JSONAble.singleQuoteToDoubleQuote(dictStr) | ||
+ | if self.debug: | ||
+ | print(jsonStr2) | ||
+ | self.assertEqual('''{"cities": [{"name": "Upper Hell's Gate"}, {"name": "N'zeto"}]}''',jsonStr2) | ||
+ | |||
+ | def testSingleQuoteToDoubleQuoteStackoverflow(self): | ||
+ | """ | ||
+ | see | ||
+ | - https://stackoverflow.com/a/63862387/1497139 | ||
+ | - https://stackoverflow.com/a/50257217/1497139 | ||
+ | """ | ||
+ | singleQuotedExamples=[ | ||
+ | '''{'cities': [{'name': "Upper Hell's Gate"}, {'name': "N'zeto"}]'''] | ||
+ | for example in singleQuotedExamples: | ||
+ | print (example) | ||
+ | for useRegex in [False,True]: | ||
+ | doubleQuoted=JSONAble.singleQuoteToDoubleQuote(example,useRegex=useRegex) | ||
+ | print(doubleQuoted) | ||
+ | print | ||
+ | |||
+ | def dumpListOfDicts(self,listOfDicts,limit): | ||
+ | if self.debug: | ||
+ | for index,record in enumerate(listOfDicts[:limit]): | ||
+ | print("%2d:%s" % (index,record)) | ||
+ | |||
+ | def check(self,manager,manager1,listName,debugLimit): | ||
+ | self.dumpListOfDicts(manager.__dict__[listName], debugLimit) | ||
+ | self.dumpListOfDicts(manager1.__dict__[listName], debugLimit) | ||
+ | #self.assertEqual(manager.__dict__,manager1.__dict__) | ||
+ | |||
+ | def testJsonAble(self): | ||
+ | ''' | ||
+ | test JSONAble | ||
+ | ''' | ||
+ | examples=[{ | ||
+ | 'manager': Royals(), | ||
+ | 'listName': 'royals' | ||
+ | }, { | ||
+ | 'manager': Cities(), | ||
+ | 'listName': 'cities' | ||
+ | } | ||
+ | ] | ||
+ | debugLimit=10 | ||
+ | debugChars=debugLimit*100 | ||
+ | index=0 | ||
+ | for useToJson in [True,False]: | ||
+ | for example in examples: | ||
+ | starttime=time.time() | ||
+ | manager=example['manager'] | ||
+ | listName=example['listName'] | ||
+ | if useToJson: | ||
+ | jsonStr=manager.toJSON() | ||
+ | else: | ||
+ | jsonStr=manager.asJSON() | ||
+ | if self.debug: | ||
+ | print(jsonStr[:debugChars]) | ||
+ | #print(jsonStr,file=open('/tmp/example%d.json' %index,'w')) | ||
+ | index+=1 | ||
+ | if self.profile: | ||
+ | print("->JSON for %d took %7.3f s" % (index, (time.time()-starttime))) | ||
+ | self.assertTrue(isinstance(jsonStr,str)) | ||
+ | starttime=time.time() | ||
+ | jsonDict=json.loads(jsonStr) | ||
+ | self.assertTrue(isinstance(jsonDict,dict)) | ||
+ | if self.debug: | ||
+ | print(str(jsonDict)[:debugChars]) | ||
+ | if self.profile: | ||
+ | print("<-JSON for %d took %7.3f s" % (index, time.time()-starttime)) | ||
+ | cls=manager.__class__ | ||
+ | types=Types(cls.__name__) | ||
+ | types.getTypes(listName,manager.__dict__[listName]) | ||
+ | manager1=cls() | ||
+ | manager1.fromJson(jsonStr,types=types) | ||
+ | self.check(manager,manager1,listName,debugLimit=debugLimit) | ||
+ | pass | ||
+ | |||
+ | |||
+ | if __name__ == "__main__": | ||
+ | #import sys;sys.argv = ['', 'Test.testName'] | ||
+ | unittest.main() | ||
+ | </source> | ||
+ | == SQLDB unit test == | ||
+ | <source lang='python'> | ||
+ | ''' | ||
+ | Created on 2020-08-24 | ||
+ | |||
+ | @author: wf | ||
+ | ''' | ||
+ | import unittest | ||
+ | from datetime import datetime | ||
+ | import time | ||
+ | import os | ||
+ | import sys | ||
+ | from lodstorage.sample import Sample | ||
+ | from lodstorage.uml import UML | ||
+ | from lodstorage.sql import SQLDB, EntityInfo | ||
+ | |||
+ | |||
+ | class TestSQLDB(unittest.TestCase): | ||
+ | ''' | ||
+ | Test the SQLDB database wrapper | ||
+ | ''' | ||
+ | |||
+ | def setUp(self): | ||
+ | self.debug=True | ||
+ | pass | ||
+ | |||
+ | def tearDown(self): | ||
+ | pass | ||
+ | |||
+ | def checkListOfRecords(self,listOfRecords,entityName,primaryKey=None,executeMany=True,fixDates=False,debug=False,doClose=True): | ||
+ | ''' | ||
+ | check the handling of the given list of Records | ||
+ | |||
+ | Args: | ||
+ | |||
+ | listOfRecords(list): a list of dicts that contain the data to be stored | ||
+ | entityName(string): the name of the entity type to be used as a table name | ||
+ | primaryKey(string): the name of the key / column to be used as a primary key | ||
+ | executeMany(boolean): True if executeMany mode of sqlite3 should be used | ||
+ | debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown | ||
+ | doClose(boolean): True if the connection should be closed | ||
+ | |||
+ | ''' | ||
+ | size=len(listOfRecords) | ||
+ | print("%s size is %d fixDates is: %r" % (entityName,size,fixDates)) | ||
+ | self.sqlDB=SQLDB(debug=debug,errorDebug=True) | ||
+ | entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey) | ||
+ | startTime=time.time() | ||
+ | self.sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany) | ||
+ | elapsed=time.time()-startTime | ||
+ | print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed)) | ||
+ | resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates) | ||
+ | print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed)) | ||
+ | if doClose: | ||
+ | self.sqlDB.close() | ||
+ | return resultList | ||
+ | |||
+ | def testEntityInfo(self): | ||
+ | ''' | ||
+ | test creating entityInfo from the sample record | ||
+ | ''' | ||
+ | listOfRecords=Sample.getRoyals() | ||
+ | entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True) | ||
+ | self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd) | ||
+ | self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd) | ||
+ | self.sqlDB=SQLDB(debug=self.debug,errorDebug=True) | ||
+ | entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey) | ||
+ | tableList=self.sqlDB.getTableList() | ||
+ | if self.debug: | ||
+ | print (tableList) | ||
+ | self.assertEqual(1,len(tableList)) | ||
+ | personTable=tableList[0] | ||
+ | self.assertEqual("Person",personTable['name']) | ||
+ | self.assertEqual(7,len(personTable['columns'])) | ||
+ | uml=UML() | ||
+ | plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False) | ||
+ | if self.debug: | ||
+ | print(plantUml) | ||
+ | expected="""package Royals { | ||
+ | class Person << Entity >> { | ||
+ | age : FLOAT | ||
+ | born : DATE | ||
+ | lastmodified : TIMESTAMP | ||
+ | name : TEXT <<PK>> | ||
+ | numberInLine : INTEGER | ||
+ | ofAge : BOOLEAN | ||
+ | wikidataurl : TEXT | ||
+ | } | ||
+ | } | ||
+ | """ | ||
+ | self.assertEqual(expected,plantUml) | ||
+ | |||
+ | # testGeneralization | ||
+ | listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}] | ||
+ | entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name') | ||
+ | tableList=self.sqlDB.getTableList() | ||
+ | self.assertEqual(2,len(tableList)) | ||
+ | uml=UML() | ||
+ | plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False) | ||
+ | print(plantUml) | ||
+ | expected='''class PersonBase << Entity >> { | ||
+ | lastmodified : TIMESTAMP | ||
+ | name : TEXT <<PK>> | ||
+ | } | ||
+ | class Person << Entity >> { | ||
+ | age : FLOAT | ||
+ | born : DATE | ||
+ | numberInLine : INTEGER | ||
+ | ofAge : BOOLEAN | ||
+ | wikidataurl : TEXT | ||
+ | } | ||
+ | class Family << Entity >> { | ||
+ | country : TEXT | ||
+ | } | ||
+ | PersonBase <|-- Person | ||
+ | PersonBase <|-- Family | ||
+ | ''' | ||
+ | self.assertEqual(expected,plantUml) | ||
+ | |||
+ | def testUniqueConstraint(self): | ||
+ | ''' | ||
+ | test for https://github.com/WolfgangFahl/pyLoDStorage/issues/4 | ||
+ | sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info | ||
+ | ''' | ||
+ | listOfDicts=[ | ||
+ | {"name": "John Doe"}, | ||
+ | {"name": "Frank Doe"}, | ||
+ | {"name": "John Doe"}, | ||
+ | {"name":"Tim Doe"}] | ||
+ | sqlDB=SQLDB(debug=self.debug,errorDebug=True) | ||
+ | entityInfo=sqlDB.createTable(listOfDicts[:10],'Does','name') | ||
+ | try: | ||
+ | sqlDB.store(listOfDicts,entityInfo,executeMany=False) | ||
+ | self.fail("There should be an exception") | ||
+ | except Exception as ex: | ||
+ | expected="""INSERT INTO Does (name) values (:name) | ||
+ | failed:UNIQUE constraint failed: Does.name | ||
+ | record #3={'name': 'John Doe'}""" | ||
+ | errMsg=str(ex) | ||
+ | self.assertEqual(expected,errMsg) | ||
+ | |||
+ | def testSqlite3(self): | ||
+ | ''' | ||
+ | test sqlite3 with a few records from the royal family | ||
+ | ''' | ||
+ | listOfRecords=Sample.getRoyals() | ||
+ | resultList=self.checkListOfRecords(listOfRecords, 'Person', 'name',debug=True) | ||
+ | if self.debug: | ||
+ | print(resultList) | ||
+ | self.assertEqual(listOfRecords,resultList) | ||
+ | |||
+ | def testBindingError(self): | ||
+ | ''' | ||
+ | test list of Records with incomplete record leading to | ||
+ | "You did not supply a value for binding 2" | ||
+ | see https://bugs.python.org/issue41638 | ||
+ | ''' | ||
+ | listOfRecords=[{'name':'Pikachu', 'type':'Electric'},{'name':'Raichu' }] | ||
+ | for executeMany in [True,False]: | ||
+ | try: | ||
+ | self.checkListOfRecords(listOfRecords,'Pokemon','name',executeMany=executeMany) | ||
+ | self.fail("There should be an exception") | ||
+ | except Exception as ex: | ||
+ | if self.debug: | ||
+ | print(str(ex)) | ||
+ | self.assertTrue('no value supplied for column' in str(ex)) | ||
+ | |||
+ | def testListOfCities(self): | ||
+ | ''' | ||
+ | test sqlite3 with some 120000 city records | ||
+ | ''' | ||
+ | listOfRecords=Sample.getCities() | ||
+ | for fixDates in [True,False]: | ||
+ | retrievedList=self.checkListOfRecords(listOfRecords,'City',fixDates=fixDates) | ||
+ | self.assertEqual(len(listOfRecords),len(retrievedList)) | ||
+ | |||
+ | def testQueryParams(self): | ||
+ | ''' | ||
+ | test Query Params | ||
+ | ''' | ||
+ | listOfDicts=[ | ||
+ | {"city": "New York", "country": "US"}, | ||
+ | {"city": "Amsterdam", "country": "NL"}, | ||
+ | {"city": "Paris", "country": "FR"}] | ||
+ | sqlDB=SQLDB(debug=self.debug,errorDebug=True) | ||
+ | entityInfo=sqlDB.createTable(listOfDicts[:10],'cities','city') | ||
+ | sqlDB.store(listOfDicts,entityInfo,executeMany=False) | ||
+ | query="SELECT * from cities WHERE country in (?)" | ||
+ | params=('FR',) | ||
+ | frCities=sqlDB.query(query,params) | ||
+ | if self.debug: | ||
+ | print (frCities); | ||
+ | self.assertEqual([{'city': 'Paris', 'country': 'FR'}],frCities) | ||
+ | |||
+ | def testSqllite3Speed(self): | ||
+ | ''' | ||
+ | test sqlite3 speed with some 100000 artificial sample records | ||
+ | consisting of two columns with a running index | ||
+ | ''' | ||
+ | limit=100000 | ||
+ | listOfRecords=Sample.getSample(limit) | ||
+ | self.checkListOfRecords(listOfRecords, 'Sample', 'pKey') | ||
+ | |||
+ | def testBackup(self): | ||
+ | ''' | ||
+ | test creating a backup of the SQL database | ||
+ | ''' | ||
+ | if sys.version_info >= (3, 7): | ||
+ | listOfRecords=Sample.getCities() | ||
+ | self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False) | ||
+ | backupDB="/tmp/testSqlite.db" | ||
+ | self.sqlDB.backup(backupDB,profile=True,showProgress=200) | ||
+ | size=os.stat(backupDB).st_size | ||
+ | print ("size of backup DB is %d" % size) | ||
+ | self.assertTrue(size>600000) | ||
+ | self.sqlDB.close() | ||
+ | # restore | ||
+ | ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=True) | ||
+ | entityInfo=EntityInfo(listOfRecords[:50],'City',debug=True) | ||
+ | allCities=ramDB.queryAll(entityInfo) | ||
+ | self.assertEqual(len(allCities),len(listOfRecords)) | ||
+ | |||
+ | def testCopy(self): | ||
+ | ''' | ||
+ | test copying databases into another database | ||
+ | ''' | ||
+ | dbFile="/tmp/DAWT_Sample3x1000.db" | ||
+ | copyDB=SQLDB(dbFile) | ||
+ | for sampleNo in range(3): | ||
+ | listOfRecords=Sample.getSample(1000) | ||
+ | self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False) | ||
+ | self.sqlDB.copyTo(copyDB) | ||
+ | size=os.stat(dbFile).st_size | ||
+ | print ("size of copy DB is %d" % size) | ||
+ | self.assertTrue(size>70000) | ||
+ | tableList=copyDB.getTableList() | ||
+ | print(tableList) | ||
+ | for sampleNo in range(3): | ||
+ | self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name']) | ||
+ | |||
+ | |||
+ | if __name__ == "__main__": | ||
+ | #import sys;sys.argv = ['', 'Test.testSqllit3'] | ||
+ | unittest.main() | ||
+ | </source> | ||
+ | == SPARQL unit test == | ||
+ | see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/tests/testSPARQL.py | ||
+ | <source lang='python'> | ||
+ | ''' | ||
+ | Created on 2020-08-14 | ||
+ | |||
+ | @author: wf | ||
+ | ''' | ||
+ | import unittest | ||
+ | import getpass | ||
+ | from lodstorage.sparql import SPARQL | ||
+ | from lodstorage.sample import Sample | ||
+ | import time | ||
+ | |||
+ | class TestSPARQL(unittest.TestCase): | ||
+ | ''' Test SPARQL access e.g. Apache Jena via Wrapper''' | ||
+ | |||
+ | def setUp(self): | ||
+ | self.debug=False | ||
+ | pass | ||
+ | |||
+ | |||
+ | def tearDown(self): | ||
+ | pass | ||
+ | |||
+ | def getJena(self,mode='query',debug=False,typedLiterals=False,profile=False): | ||
+ | ''' | ||
+ | get the jena endpoint for the given mode | ||
+ | |||
+ | Args: | ||
+ | mode(string): query or update | ||
+ | debug(boolean): True if debug information should be output | ||
+ | typedLiterals(boolean): True if INSERT DATA SPARQL commands should use typed literals | ||
+ | profile(boolean): True if profile/timing information should be shown | ||
+ | ''' | ||
+ | endpoint="http://localhost:3030/example" | ||
+ | jena=SPARQL(endpoint,mode=mode,debug=debug,typedLiterals=typedLiterals,profile=profile) | ||
+ | return jena | ||
+ | |||
+ | def testJenaQuery(self): | ||
+ | ''' | ||
+ | test Apache Jena Fuseki SPARQL endpoint with example SELECT query | ||
+ | ''' | ||
+ | jena=self.getJena() | ||
+ | queryString = "SELECT * WHERE { ?s ?p ?o. }" | ||
+ | results=jena.query(queryString) | ||
+ | self.assertTrue(len(results)>20) | ||
+ | pass | ||
+ | |||
+ | def testJenaInsert(self): | ||
+ | ''' | ||
+ | test a Jena INSERT DATA | ||
+ | ''' | ||
+ | jena=self.getJena(mode="update") | ||
+ | insertCommands = [ """ | ||
+ | PREFIX cr: <http://cr.bitplan.com/> | ||
+ | INSERT DATA { | ||
+ | cr:version cr:author "Wolfgang Fahl". | ||
+ | } | ||
+ | """,'INVALID COMMAND'] | ||
+ | for index,insertCommand in enumerate(insertCommands): | ||
+ | result,ex=jena.insert(insertCommand) | ||
+ | if index==0: | ||
+ | self.assertTrue(ex is None) | ||
+ | print(result) | ||
+ | else: | ||
+ | msg=ex.args[0] | ||
+ | self.assertTrue("QueryBadFormed" in msg) | ||
+ | self.assertTrue("Error 400" in msg) | ||
+ | pass | ||
+ | |||
+ | def checkErrors(self,errors,expected=0): | ||
+ | ''' | ||
+ | check the given list of errors - print any errors if there are some | ||
+ | and after that assert that the length of the list of errors is zero | ||
+ | |||
+ | Args: | ||
+ | errors(list): the list of errors to check | ||
+ | ''' | ||
+ | if len(errors)>0: | ||
+ | print("ERRORS:") | ||
+ | for error in errors: | ||
+ | print(error) | ||
+ | self.assertEquals(expected,len(errors)) | ||
+ | |||
+ | def testDob(self): | ||
+ | ''' | ||
+ | test the DOB (date of birth) function that converts from ISO-Date to | ||
+ | datetime.date | ||
+ | ''' | ||
+ | dt=Sample.dob("1926-04-21") | ||
+ | self.assertEqual(1926,dt.year) | ||
+ | self.assertEqual(4,dt.month) | ||
+ | self.assertEqual(21,dt.day) | ||
+ | |||
+ | def testListOfDictInsert(self): | ||
+ | ''' | ||
+ | test inserting a list of Dicts and retrieving the values again | ||
+ | using a person based example | ||
+ | instead of | ||
+ | https://en.wikipedia.org/wiki/FOAF_(ontology) | ||
+ | |||
+ | we use an object oriented derivate of FOAF with a focus on datatypes | ||
+ | ''' | ||
+ | listofDicts=Sample.getRoyals() | ||
+ | typedLiteralModes=[True,False] | ||
+ | entityType='foafo:Person' | ||
+ | primaryKey='name' | ||
+ | prefixes='PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>' | ||
+ | for typedLiteralMode in typedLiteralModes: | ||
+ | jena=self.getJena(mode='update',typedLiterals=typedLiteralMode,debug=True) | ||
+ | deleteString= """ | ||
+ | PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> | ||
+ | PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> | ||
+ | DELETE WHERE { | ||
+ | ?person a 'foafo:Person'. | ||
+ | ?person ?p ?o. | ||
+ | } | ||
+ | """ | ||
+ | jena.query(deleteString) | ||
+ | errors=jena.insertListOfDicts(listofDicts,entityType,primaryKey,prefixes) | ||
+ | self.checkErrors(errors) | ||
+ | |||
+ | jena=self.getJena(mode="query",debug=True) | ||
+ | queryString = """ | ||
+ | PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> | ||
+ | SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE { | ||
+ | ?person a 'foafo:Person'. | ||
+ | ?person foafo:Person_name ?name. | ||
+ | ?person foafo:Person_born ?born. | ||
+ | ?person foafo:Person_numberInLine ?numberInLine. | ||
+ | ?person foafo:Person_wikidataurl ?wikidataurl. | ||
+ | ?person foafo:Person_age ?age. | ||
+ | ?person foafo:Person_ofAge ?ofAge. | ||
+ | ?person foafo:Person_lastmodified ?lastmodified. | ||
+ | }""" | ||
+ | personResults=jena.query(queryString) | ||
+ | self.assertEqual(len(listofDicts),len(personResults)) | ||
+ | personList=jena.asListOfDicts(personResults) | ||
+ | for index,person in enumerate(personList): | ||
+ | print("%d: %s" %(index,person)) | ||
+ | # check the correct round-trip behavior | ||
+ | self.assertEqual(listofDicts,personList) | ||
+ | |||
+ | def testControlEscape(self): | ||
+ | ''' | ||
+ | check the control-escaped version of an UTF-8 string | ||
+ | ''' | ||
+ | controls="Α\tΩ\r\n"; | ||
+ | expected="Α\\tΩ\\r\\n" | ||
+ | esc=SPARQL.controlEscape(controls) | ||
+ | self.assertEqual(expected,esc) | ||
+ | |||
+ | def testSPARQLErrorMessage(self): | ||
+ | ''' | ||
+ | test error handling | ||
+ | see https://stackoverflow.com/questions/63486767/how-can-i-get-the-fuseki-api-via-sparqlwrapper-to-properly-report-a-detailed-err | ||
+ | ''' | ||
+ | listOfDicts=[{ | ||
+ | 'title': '“Bioinformatics of Genome Regulation and Structure\Systems Biology” – BGRS\SB-2018', | ||
+ | 'url': 'https://thenode.biologists.com/event/11th-international-multiconference-bioinformatics-genome-regulation-structuresystems-biology-bgrssb-2018/'}] | ||
+ | entityType="cr:Event" | ||
+ | primaryKey='title' | ||
+ | prefixes="PREFIX cr: <http://cr.bitplan.com/Event/0.1/>" | ||
+ | jena=self.getJena(mode='update',typedLiterals=False,debug=True) | ||
+ | errors=jena.insertListOfDicts(listOfDicts,entityType,primaryKey,prefixes) | ||
+ | self.checkErrors(errors,1) | ||
+ | error=errors[0] | ||
+ | self.assertTrue("probably the sparql query is bad formed" in error) | ||
+ | |||
+ | |||
+ | def testEscapeStringContent(self): | ||
+ | ''' | ||
+ | test handling of double quoted strings | ||
+ | ''' | ||
+ | helpListOfDicts=[{'topic':'edit','description': '''Use | ||
+ | the "edit" | ||
+ | button to start editing - you can use | ||
+ | - tab \t | ||
+ | - carriage return \r | ||
+ | - newline \n | ||
+ | |||
+ | as escape characters | ||
+ | ''' | ||
+ | }] | ||
+ | entityType='help:Topic' | ||
+ | primaryKey='topic' | ||
+ | prefixes='PREFIX help: <http://help.bitplan.com/help/0.0.1/>' | ||
+ | jena=self.getJena(mode='update',debug=True) | ||
+ | errors=jena.insertListOfDicts(helpListOfDicts, entityType, primaryKey, prefixes, profile=True) | ||
+ | self.checkErrors(errors) | ||
+ | query=""" | ||
+ | PREFIX help: <http://help.bitplan.com/help/0.0.1/> | ||
+ | SELECT ?topic ?description | ||
+ | WHERE { | ||
+ | ?help help:Topic_topic ?topic. | ||
+ | ?help help:Topic_description ?description. | ||
+ | } | ||
+ | """ | ||
+ | jena=self.getJena(mode='query') | ||
+ | listOfDicts=jena.queryAsListOfDicts(query) | ||
+ | # check round trip equality | ||
+ | self.assertEqual(helpListOfDicts,listOfDicts) | ||
+ | |||
+ | def testIssue7(self): | ||
+ | ''' | ||
+ | test conversion of dates with timezone info | ||
+ | ''' | ||
+ | value="2020-01-01T00:00:00Z" | ||
+ | dt=SPARQL.strToDatetime(value) | ||
+ | self.assertEqual(dt.year,2020) | ||
+ | |||
+ | def testListOfDictSpeed(self): | ||
+ | ''' | ||
+ | test the speed of adding data | ||
+ | ''' | ||
+ | limit=5000 | ||
+ | for batchSize in [None,1000]: | ||
+ | listOfDicts=Sample.getSample(limit) | ||
+ | jena=self.getJena(mode='update',profile=True) | ||
+ | entityType="ex:TestRecord" | ||
+ | primaryKey='pkey' | ||
+ | prefixes='PREFIX ex: <http://example.com/>' | ||
+ | startTime=time.time() | ||
+ | errors=jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes,batchSize=batchSize) | ||
+ | self.checkErrors(errors) | ||
+ | elapsed=time.time()-startTime | ||
+ | print ("adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed)) | ||
+ | |||
+ | def testLocalWikdata(self): | ||
+ | ''' | ||
+ | check local wikidata | ||
+ | ''' | ||
+ | # check we have local wikidata copy: | ||
+ | if getpass.getuser()=="wf": | ||
+ | # use 2018 wikidata copy | ||
+ | endpoint="http://jena.zeus.bitplan.com/wikidata/" | ||
+ | wd=SPARQL(endpoint) | ||
+ | queryString="""# get a list of whisky distilleries | ||
+ | PREFIX wd: <http://www.wikidata.org/entity/> | ||
+ | PREFIX wdt: <http://www.wikidata.org/prop/direct/> | ||
+ | SELECT ?item ?coord | ||
+ | WHERE | ||
+ | { | ||
+ | # instance of whisky distillery | ||
+ | ?item wdt:P31 wd:Q10373548. | ||
+ | # get the coordinate | ||
+ | ?item wdt:P625 ?coord. | ||
+ | } | ||
+ | """ | ||
+ | results=wd.query(queryString) | ||
+ | self.assertTrue(238<=len(results)) | ||
+ | |||
+ | |||
+ | if __name__ == "__main__": | ||
+ | #import sys;sys.argv = ['', 'Test.testName'] | ||
+ | unittest.main() | ||
+ | |||
+ | </source> | ||
+ | = Tickets = | ||
+ | {{:PyLoDStorage/Tickets}} |
Latest revision as of 12:51, 2 November 2024
OsProject
OsProject | |
---|---|
edit | |
id | PyLodStorage |
state | active |
owner | Wolfgang Fahl |
title | python List of Dict (Table) Storage library |
url | https://github.com/WolfgangFahl/pyLodStorage |
version | 0.13.3 |
description | |
date | 2024-11-02 |
since | 2020-09-10 |
until |
tickets
see also
Freitext
List of Dicts = Table
a list of dicts(Hashtables) in python can be interpreted as Table which is suitable to
- be stored in a relational database like sqlite3
- represented as JSON, CSV or example
- edited with a Spreadsheet editor
Royals example
@staticmethod
def getRoyals():
listOfDicts=[
{'name': 'Elizabeth Alexandra Mary Windsor', 'born': Sample.dob('1926-04-21'), 'numberInLine': 0, 'wikidataurl': 'https://www.wikidata.org/wiki/Q9682' },
{'name': 'Charles, Prince of Wales', 'born': Sample.dob('1948-11-14'), 'numberInLine': 1, 'wikidataurl': 'https://www.wikidata.org/wiki/Q43274' },
{'name': 'George of Cambridge', 'born': Sample.dob('2013-07-22'), 'numberInLine': 3, 'wikidataurl': 'https://www.wikidata.org/wiki/Q1359041'},
{'name': 'Harry Duke of Sussex', 'born': Sample.dob('1984-09-15'), 'numberInLine': 6, 'wikidataurl': 'https://www.wikidata.org/wiki/Q152316'}
]
today=date.today()
for person in listOfDicts:
born=person['born']
age=(today - born).days / 365.2425
person['age']=age
person['ofAge']=age>=18
person['lastmodified']=datetime.now()
return listOfDicts
The above list of dict can be stored in a Person table with the following structure:
Installation
pip install pyLodStorage
# alternatively if your pip is not a python3 pip
pip3 install pyLodStorage
# local install from source directory of pyLodStorage
pip install .
upgrade
pip install pyLodStorage -U
# alternatively if your pip is not a python3 pip
pip3 install pyLodStorage -U
SQL
The idea is to derive the necessary DDL and SQL command automatically:
CREATE TABLE Family(name TEXT PRIMARY KEY,country TEXT,lastmodified TIMESTAMP)
INSERT INTO Family (name,country,lastmodified) values (:name,:country,:lastmodified)
and use them via simple API
from lodstorage.sample import Sample
from lodstorage.sql import SQLDB, EntityInfo
listOfRecords=Sample.getRoyals()
sqlDB=SQLDB()
entityName='Person'
primaryKey='name'
entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
sqlDB.store(listOfRecords,entityInfo)
resultList=self.sqlDB.queryAll(entityInfo)
The resultList will be the same as the original listOfRecords.
JSON
Wrap the royals example into a JSONAble class and test using that class for "round-trip" JSON storage:
...
class Royals(JSONAble):
def __init__(self,load=False):
if load:
self.royals=Sample.getRoyals()
else:
self.royals=None
...
def testRoyals(self):
'''
test Royals example
'''
royals1=Royals(load=True)
self.assertEqual(4,len(royals1.royals))
json=royals1.toJSON()
print(json)
types=Types.forClass(royals1, "royals")
royals2=Royals()
royals2.fromJson(json,types=types)
self.assertEqual(4,len(royals2.royals))
print(royals1.royals)
print(royals2.royals)
self.assertEqual(royals1.royals,royals2.royals)
JSON encoding
{
"royals": [
{
"age": 94.42767476402663,
"born": "1926-04-21",
"lastmodified": "2020-09-23T09:44:48.661740",
"name": "Elizabeth Alexandra Mary Windsor",
"numberInLine": 0,
"ofAge": true,
"wikidataurl": "https://www.wikidata.org/wiki/Q9682"
},
{
"age": 71.85910730542037,
"born": "1948-11-14",
"lastmodified": "2020-09-23T09:44:48.661747",
"name": "Charles, Prince of Wales",
"numberInLine": 1,
"ofAge": true,
"wikidataurl": "https://www.wikidata.org/wiki/Q43274"
},
{
"age": 7.17331635830989,
"born": "2013-07-22",
"lastmodified": "2020-09-23T09:44:48.661748",
"name": "George of Cambridge",
"numberInLine": 3,
"ofAge": false,
"wikidataurl": "https://www.wikidata.org/wiki/Q1359041"
},
{
"age": 36.022642490947796,
"born": "1984-09-15",
"lastmodified": "2020-09-23T09:44:48.661749",
"name": "Harry Duke of Sussex",
"numberInLine": 6,
"ofAge": true,
"wikidataurl": "https://www.wikidata.org/wiki/Q152316"
}
]
}
Query Documentation
see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46
Examples
15 Random substances with CAS number
Wikidata SPARQL query showing the 15 random chemical substances with their CAS Number
query
# List of 15 random chemical components with CAS-Number, formula and structure
# see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46
# WF 2021-08-23
SELECT ?substance ?substanceLabel ?formula ?structure ?CAS
WHERE {
?substance wdt:P31 wd:Q11173.
?substance wdt:P231 ?CAS.
?substance wdt:P274 ?formula.
?substance wdt:P117 ?structure.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 15
result
substance | substanceLabel | formula | structure | CAS |
---|---|---|---|---|
Q32703 | aminomethylpropanol | C₄H₁₁NO | 2-amino-2-methyl-1-propanol.svg | 124-68-5 |
Q32703 | aminomethylpropanol | C₄H₁₁NO | Isobutanolamine%20t.png | 124-68-5 |
Q43656 | cholesterol | C₂₇H₄₆O | Structural%20formula%20of%20cholesterol.svg | 57-88-5 |
Q45143 | fulminic acid | CHNO | Fulminezuur.png | 506-85-4 |
Q49546 | acetone | C₃H₆O | Acetone-2D-skeletal.svg | 67-64-1 |
Q49546 | acetone | C₃H₆O | Acetone-structural.png | 67-64-1 |
Q52858 | ethane | C₂H₆ | Ethan%20Keilstrich.svg | 74-84-0 |
Q58356 | amoxapine | C₁₇H₁₆ClN₃O | Amoxapine.svg | 14028-44-5 |
Q58713 | clomipramine | C₁₉H₂₃ClN₂ | Clomipramine.svg | 303-49-1 |
Q68484 | prucalopride | C₁₈H₂₆ClN₃O₃ | Prucalopride.svg | 179474-81-8 |
Q68566 | mosapride | C₂₁H₂₅ClFN₃O₃ | Mosapride.svg | 112885-41-3 |
Q80232 | cyclobutane | C₄H₈ | Cyclobutane2.svg | 287-23-0 |
Q80868 | tolonium chloride | C₁₅H₁₆ClN₃S | Tolonium%20chloride.svg | 92-31-9 |
Q83320 | nitric acid | HNO₃ | Nitric-acid.png | 12507-77-6 |
Q83320 | nitric acid | HNO₃ | Nitric-acid.png | 7697-37-2 |
Manually improved result
substance | substanceLabel | formula | structure | CAS |
---|---|---|---|---|
Q32703 | aminomethylpropanol | C₄H₁₁NO | 124-68-5 | |
Q32703 | aminomethylpropanol | C₄H₁₁NO | 124-68-5 | |
Q43656 | cholesterol | C₂₇H₄₆O | 57-88-5 | |
Q45143 | fulminic acid | CHNO | 506-85-4 | |
Q49546 | acetone | C₃H₆O | 67-64-1 | |
Q49546 | acetone | C₃H₆O | 67-64-1 | |
Q52858 | ethane | C₂H₆ | 74-84-0 | |
Q58356 | amoxapine | C₁₇H₁₆ClN₃O | 14028-44-5 | |
Q58713 | clomipramine | C₁₉H₂₃ClN₂ | 303-49-1 | |
Q68484 | prucalopride | C₁₈H₂₆ClN₃O₃ | 179474-81-8 | |
Q68566 | mosapride | C₂₁H₂₅ClFN₃O₃ | 112885-41-3 | |
Q80232 | cyclobutane | C₄H₈ | 287-23-0 | |
Q80868 | tolonium chloride | C₁₅H₁₆ClN₃S | 92-31-9 | |
Q83320 | nitric acid | HNO₃ | 12507-77-6 | |
Q83320 | nitric acid | HNO₃ | 7697-37-2 |
Ten largest cities of the world
Wikidata SPARQL query showing the 10 most populated cities of the world using the million city class Q1637706 for selection
query
# Ten Largest cities of the world
# WF 2021-08-23
# see also http://wiki.bitplan.com/index.php/PyLoDStorage#Examples
SELECT DISTINCT ?city ?cityLabel ?population ?country ?countryLabel
WHERE {
VALUES ?cityClass { wd:Q1637706}.
?city wdt:P31 ?cityClass .
?city wdt:P1082 ?population .
?city wdt:P17 ?country .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
}
ORDER BY DESC(?population)
LIMIT 10
result
city | cityLabel | population | country | countryLabel |
---|---|---|---|---|
Q1353 | Delhi | 26495000 | Q668 | India |
Q8686 | Shanghai | 23390000 | Q148 | People's Republic of China |
Q956 | Beijing | 21710000 | Q148 | People's Republic of China |
Q1354 | Dhaka | 16800000 | Q902 | Bangladesh |
Q1156 | Mumbai | 15414288 | Q668 | India |
Q8660 | Karachi | 14910352 | Q843 | Pakistan |
Q8673 | Lagos | 14862000 | Q1033 | Nigeria |
Q406 | Istanbul | 14657434 | Q43 | Turkey |
Q1490 | Tokyo | 13942024 | Q17 | Japan |
Q11736 | Tianjin | 13245000 | Q148 | People's Republic of China |
count OpenStreetMap place type instances
This SPARQL query determines the number of instances available in the OpenStreetMap for the placeTypes city,town and village
query
# count osm place type instances
# WF 2021-08-23
SELECT (count(?instance) as ?count) ?placeType ?placeTypeLabel
WHERE {
VALUES ?placeType {
"city"
"town"
"village"
}
?instance osmt:place ?placeType
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?placeType ?placeTypeLabel
ORDER BY ?count
result
count | placeType | placeTypeLabel |
---|---|---|
13614 | city | city |
23238 | town | town |
153380 | village | village |
Apache Jena
The jena -l and jena -f options will automatically download and unpack the needed Apache jena files.
Jena load example dataset
scripts/jena -l sampledata/example.ttl
Jena fuseki server start
scripts/jena -f example
You should be able to browse the admin GUI at http://localhost:3030 and have the example dataset ready for you
Jena fuseki server stop
scripts/jena -k
jena script usage
scripts/jena -h
scripts/jena [-f|--fuseki|-h|--help|-k|--kill|-l|--load]
-f | --fuseki [dataset]: download and start fuseki server with the given dataset
-h | --help: show this usage
-k | --kill: kill the running fuseki server
-l | --load [ttl file]: download jena / tdbloader and load given ttl file
Example Unit tests
JSON unit test
'''
Created on 2020-09-12
@author: wf
'''
import unittest
import json
from lodstorage.sample import Royals,Cities
from lodstorage.jsonable import JSONAble
from lodstorage.types import Types
import time
class TestJsonAble(unittest.TestCase):
'''
test JSON serialization with JsonAble mixin
'''
def setUp(self):
self.profile=True
self.debug=True
pass
def tearDown(self):
pass
def testSingleToDoubleQuote(self):
jsonStr='''
{
"cities": [
{
"name": "Upper Hell's Gate"
},
{
"name": "N'zeto"
}
]
}
'''
listOfDicts=json.loads(jsonStr)
dictStr=str(listOfDicts)
if self.debug:
print(dictStr)
jsonStr2=JSONAble.singleQuoteToDoubleQuote(dictStr)
if self.debug:
print(jsonStr2)
self.assertEqual('''{"cities": [{"name": "Upper Hell's Gate"}, {"name": "N'zeto"}]}''',jsonStr2)
def testSingleQuoteToDoubleQuoteStackoverflow(self):
"""
see
- https://stackoverflow.com/a/63862387/1497139
- https://stackoverflow.com/a/50257217/1497139
"""
singleQuotedExamples=[
'''{'cities': [{'name': "Upper Hell's Gate"}, {'name': "N'zeto"}]''']
for example in singleQuotedExamples:
print (example)
for useRegex in [False,True]:
doubleQuoted=JSONAble.singleQuoteToDoubleQuote(example,useRegex=useRegex)
print(doubleQuoted)
print
def dumpListOfDicts(self,listOfDicts,limit):
if self.debug:
for index,record in enumerate(listOfDicts[:limit]):
print("%2d:%s" % (index,record))
def check(self,manager,manager1,listName,debugLimit):
self.dumpListOfDicts(manager.__dict__[listName], debugLimit)
self.dumpListOfDicts(manager1.__dict__[listName], debugLimit)
#self.assertEqual(manager.__dict__,manager1.__dict__)
def testJsonAble(self):
'''
test JSONAble
'''
examples=[{
'manager': Royals(),
'listName': 'royals'
}, {
'manager': Cities(),
'listName': 'cities'
}
]
debugLimit=10
debugChars=debugLimit*100
index=0
for useToJson in [True,False]:
for example in examples:
starttime=time.time()
manager=example['manager']
listName=example['listName']
if useToJson:
jsonStr=manager.toJSON()
else:
jsonStr=manager.asJSON()
if self.debug:
print(jsonStr[:debugChars])
#print(jsonStr,file=open('/tmp/example%d.json' %index,'w'))
index+=1
if self.profile:
print("->JSON for %d took %7.3f s" % (index, (time.time()-starttime)))
self.assertTrue(isinstance(jsonStr,str))
starttime=time.time()
jsonDict=json.loads(jsonStr)
self.assertTrue(isinstance(jsonDict,dict))
if self.debug:
print(str(jsonDict)[:debugChars])
if self.profile:
print("<-JSON for %d took %7.3f s" % (index, time.time()-starttime))
cls=manager.__class__
types=Types(cls.__name__)
types.getTypes(listName,manager.__dict__[listName])
manager1=cls()
manager1.fromJson(jsonStr,types=types)
self.check(manager,manager1,listName,debugLimit=debugLimit)
pass
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
SQLDB unit test
'''
Created on 2020-08-24
@author: wf
'''
import unittest
from datetime import datetime
import time
import os
import sys
from lodstorage.sample import Sample
from lodstorage.uml import UML
from lodstorage.sql import SQLDB, EntityInfo
class TestSQLDB(unittest.TestCase):
'''
Test the SQLDB database wrapper
'''
def setUp(self):
self.debug=True
pass
def tearDown(self):
pass
def checkListOfRecords(self,listOfRecords,entityName,primaryKey=None,executeMany=True,fixDates=False,debug=False,doClose=True):
'''
check the handling of the given list of Records
Args:
listOfRecords(list): a list of dicts that contain the data to be stored
entityName(string): the name of the entity type to be used as a table name
primaryKey(string): the name of the key / column to be used as a primary key
executeMany(boolean): True if executeMany mode of sqlite3 should be used
debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
doClose(boolean): True if the connection should be closed
'''
size=len(listOfRecords)
print("%s size is %d fixDates is: %r" % (entityName,size,fixDates))
self.sqlDB=SQLDB(debug=debug,errorDebug=True)
entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityName,primaryKey)
startTime=time.time()
self.sqlDB.store(listOfRecords,entityInfo,executeMany=executeMany)
elapsed=time.time()-startTime
print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed))
resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)
print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed))
if doClose:
self.sqlDB.close()
return resultList
def testEntityInfo(self):
'''
test creating entityInfo from the sample record
'''
listOfRecords=Sample.getRoyals()
entityInfo=EntityInfo(listOfRecords[:3],'Person','name',debug=True)
self.assertEqual("CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN,lastmodified TIMESTAMP)",entityInfo.createTableCmd)
self.assertEqual("INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge,lastmodified) values (:name,:born,:numberInLine,:wikidataurl,:age,:ofAge,:lastmodified)",entityInfo.insertCmd)
self.sqlDB=SQLDB(debug=self.debug,errorDebug=True)
entityInfo=self.sqlDB.createTable(listOfRecords[:10],entityInfo.name,entityInfo.primaryKey)
tableList=self.sqlDB.getTableList()
if self.debug:
print (tableList)
self.assertEqual(1,len(tableList))
personTable=tableList[0]
self.assertEqual("Person",personTable['name'])
self.assertEqual(7,len(personTable['columns']))
uml=UML()
plantUml=uml.tableListToPlantUml(tableList,packageName="Royals",withSkin=False)
if self.debug:
print(plantUml)
expected="""package Royals {
class Person << Entity >> {
age : FLOAT
born : DATE
lastmodified : TIMESTAMP
name : TEXT <<PK>>
numberInLine : INTEGER
ofAge : BOOLEAN
wikidataurl : TEXT
}
}
"""
self.assertEqual(expected,plantUml)
# testGeneralization
listOfRecords=[{'name': 'Royal family', 'country': 'UK', 'lastmodified':datetime.now()}]
entityInfo=self.sqlDB.createTable(listOfRecords[:10],'Family','name')
tableList=self.sqlDB.getTableList()
self.assertEqual(2,len(tableList))
uml=UML()
plantUml=uml.tableListToPlantUml(tableList,generalizeTo="PersonBase",withSkin=False)
print(plantUml)
expected='''class PersonBase << Entity >> {
lastmodified : TIMESTAMP
name : TEXT <<PK>>
}
class Person << Entity >> {
age : FLOAT
born : DATE
numberInLine : INTEGER
ofAge : BOOLEAN
wikidataurl : TEXT
}
class Family << Entity >> {
country : TEXT
}
PersonBase <|-- Person
PersonBase <|-- Family
'''
self.assertEqual(expected,plantUml)
def testUniqueConstraint(self):
'''
test for https://github.com/WolfgangFahl/pyLoDStorage/issues/4
sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info
'''
listOfDicts=[
{"name": "John Doe"},
{"name": "Frank Doe"},
{"name": "John Doe"},
{"name":"Tim Doe"}]
sqlDB=SQLDB(debug=self.debug,errorDebug=True)
entityInfo=sqlDB.createTable(listOfDicts[:10],'Does','name')
try:
sqlDB.store(listOfDicts,entityInfo,executeMany=False)
self.fail("There should be an exception")
except Exception as ex:
expected="""INSERT INTO Does (name) values (:name)
failed:UNIQUE constraint failed: Does.name
record #3={'name': 'John Doe'}"""
errMsg=str(ex)
self.assertEqual(expected,errMsg)
def testSqlite3(self):
'''
test sqlite3 with a few records from the royal family
'''
listOfRecords=Sample.getRoyals()
resultList=self.checkListOfRecords(listOfRecords, 'Person', 'name',debug=True)
if self.debug:
print(resultList)
self.assertEqual(listOfRecords,resultList)
def testBindingError(self):
'''
test list of Records with incomplete record leading to
"You did not supply a value for binding 2"
see https://bugs.python.org/issue41638
'''
listOfRecords=[{'name':'Pikachu', 'type':'Electric'},{'name':'Raichu' }]
for executeMany in [True,False]:
try:
self.checkListOfRecords(listOfRecords,'Pokemon','name',executeMany=executeMany)
self.fail("There should be an exception")
except Exception as ex:
if self.debug:
print(str(ex))
self.assertTrue('no value supplied for column' in str(ex))
def testListOfCities(self):
'''
test sqlite3 with some 120000 city records
'''
listOfRecords=Sample.getCities()
for fixDates in [True,False]:
retrievedList=self.checkListOfRecords(listOfRecords,'City',fixDates=fixDates)
self.assertEqual(len(listOfRecords),len(retrievedList))
def testQueryParams(self):
'''
test Query Params
'''
listOfDicts=[
{"city": "New York", "country": "US"},
{"city": "Amsterdam", "country": "NL"},
{"city": "Paris", "country": "FR"}]
sqlDB=SQLDB(debug=self.debug,errorDebug=True)
entityInfo=sqlDB.createTable(listOfDicts[:10],'cities','city')
sqlDB.store(listOfDicts,entityInfo,executeMany=False)
query="SELECT * from cities WHERE country in (?)"
params=('FR',)
frCities=sqlDB.query(query,params)
if self.debug:
print (frCities);
self.assertEqual([{'city': 'Paris', 'country': 'FR'}],frCities)
def testSqllite3Speed(self):
'''
test sqlite3 speed with some 100000 artificial sample records
consisting of two columns with a running index
'''
limit=100000
listOfRecords=Sample.getSample(limit)
self.checkListOfRecords(listOfRecords, 'Sample', 'pKey')
def testBackup(self):
'''
test creating a backup of the SQL database
'''
if sys.version_info >= (3, 7):
listOfRecords=Sample.getCities()
self.checkListOfRecords(listOfRecords,'City',fixDates=True,doClose=False)
backupDB="/tmp/testSqlite.db"
self.sqlDB.backup(backupDB,profile=True,showProgress=200)
size=os.stat(backupDB).st_size
print ("size of backup DB is %d" % size)
self.assertTrue(size>600000)
self.sqlDB.close()
# restore
ramDB=SQLDB.restore(backupDB, SQLDB.RAM, profile=True)
entityInfo=EntityInfo(listOfRecords[:50],'City',debug=True)
allCities=ramDB.queryAll(entityInfo)
self.assertEqual(len(allCities),len(listOfRecords))
def testCopy(self):
'''
test copying databases into another database
'''
dbFile="/tmp/DAWT_Sample3x1000.db"
copyDB=SQLDB(dbFile)
for sampleNo in range(3):
listOfRecords=Sample.getSample(1000)
self.checkListOfRecords(listOfRecords, 'Sample_%d_1000' %sampleNo, 'pKey',doClose=False)
self.sqlDB.copyTo(copyDB)
size=os.stat(dbFile).st_size
print ("size of copy DB is %d" % size)
self.assertTrue(size>70000)
tableList=copyDB.getTableList()
print(tableList)
for sampleNo in range(3):
self.assertEqual('Sample_%d_1000' %sampleNo,tableList[sampleNo]['name'])
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testSqllit3']
unittest.main()
SPARQL unit test
see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/tests/testSPARQL.py
'''
Created on 2020-08-14
@author: wf
'''
import unittest
import getpass
from lodstorage.sparql import SPARQL
from lodstorage.sample import Sample
import time
class TestSPARQL(unittest.TestCase):
''' Test SPARQL access e.g. Apache Jena via Wrapper'''
def setUp(self):
self.debug=False
pass
def tearDown(self):
pass
def getJena(self,mode='query',debug=False,typedLiterals=False,profile=False):
'''
get the jena endpoint for the given mode
Args:
mode(string): query or update
debug(boolean): True if debug information should be output
typedLiterals(boolean): True if INSERT DATA SPARQL commands should use typed literals
profile(boolean): True if profile/timing information should be shown
'''
endpoint="http://localhost:3030/example"
jena=SPARQL(endpoint,mode=mode,debug=debug,typedLiterals=typedLiterals,profile=profile)
return jena
def testJenaQuery(self):
'''
test Apache Jena Fuseki SPARQL endpoint with example SELECT query
'''
jena=self.getJena()
queryString = "SELECT * WHERE { ?s ?p ?o. }"
results=jena.query(queryString)
self.assertTrue(len(results)>20)
pass
def testJenaInsert(self):
'''
test a Jena INSERT DATA
'''
jena=self.getJena(mode="update")
insertCommands = [ """
PREFIX cr: <http://cr.bitplan.com/>
INSERT DATA {
cr:version cr:author "Wolfgang Fahl".
}
""",'INVALID COMMAND']
for index,insertCommand in enumerate(insertCommands):
result,ex=jena.insert(insertCommand)
if index==0:
self.assertTrue(ex is None)
print(result)
else:
msg=ex.args[0]
self.assertTrue("QueryBadFormed" in msg)
self.assertTrue("Error 400" in msg)
pass
def checkErrors(self,errors,expected=0):
'''
check the given list of errors - print any errors if there are some
and after that assert that the length of the list of errors is zero
Args:
errors(list): the list of errors to check
'''
if len(errors)>0:
print("ERRORS:")
for error in errors:
print(error)
self.assertEquals(expected,len(errors))
def testDob(self):
'''
test the DOB (date of birth) function that converts from ISO-Date to
datetime.date
'''
dt=Sample.dob("1926-04-21")
self.assertEqual(1926,dt.year)
self.assertEqual(4,dt.month)
self.assertEqual(21,dt.day)
def testListOfDictInsert(self):
'''
test inserting a list of Dicts and retrieving the values again
using a person based example
instead of
https://en.wikipedia.org/wiki/FOAF_(ontology)
we use an object oriented derivate of FOAF with a focus on datatypes
'''
listofDicts=Sample.getRoyals()
typedLiteralModes=[True,False]
entityType='foafo:Person'
primaryKey='name'
prefixes='PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>'
for typedLiteralMode in typedLiteralModes:
jena=self.getJena(mode='update',typedLiterals=typedLiteralMode,debug=True)
deleteString= """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
DELETE WHERE {
?person a 'foafo:Person'.
?person ?p ?o.
}
"""
jena.query(deleteString)
errors=jena.insertListOfDicts(listofDicts,entityType,primaryKey,prefixes)
self.checkErrors(errors)
jena=self.getJena(mode="query",debug=True)
queryString = """
PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>
SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE {
?person a 'foafo:Person'.
?person foafo:Person_name ?name.
?person foafo:Person_born ?born.
?person foafo:Person_numberInLine ?numberInLine.
?person foafo:Person_wikidataurl ?wikidataurl.
?person foafo:Person_age ?age.
?person foafo:Person_ofAge ?ofAge.
?person foafo:Person_lastmodified ?lastmodified.
}"""
personResults=jena.query(queryString)
self.assertEqual(len(listofDicts),len(personResults))
personList=jena.asListOfDicts(personResults)
for index,person in enumerate(personList):
print("%d: %s" %(index,person))
# check the correct round-trip behavior
self.assertEqual(listofDicts,personList)
def testControlEscape(self):
'''
check the control-escaped version of an UTF-8 string
'''
controls="Α\tΩ\r\n";
expected="Α\\tΩ\\r\\n"
esc=SPARQL.controlEscape(controls)
self.assertEqual(expected,esc)
def testSPARQLErrorMessage(self):
'''
test error handling
see https://stackoverflow.com/questions/63486767/how-can-i-get-the-fuseki-api-via-sparqlwrapper-to-properly-report-a-detailed-err
'''
listOfDicts=[{
'title': '“Bioinformatics of Genome Regulation and Structure\Systems Biology” – BGRS\SB-2018',
'url': 'https://thenode.biologists.com/event/11th-international-multiconference-bioinformatics-genome-regulation-structuresystems-biology-bgrssb-2018/'}]
entityType="cr:Event"
primaryKey='title'
prefixes="PREFIX cr: <http://cr.bitplan.com/Event/0.1/>"
jena=self.getJena(mode='update',typedLiterals=False,debug=True)
errors=jena.insertListOfDicts(listOfDicts,entityType,primaryKey,prefixes)
self.checkErrors(errors,1)
error=errors[0]
self.assertTrue("probably the sparql query is bad formed" in error)
def testEscapeStringContent(self):
'''
test handling of double quoted strings
'''
helpListOfDicts=[{'topic':'edit','description': '''Use
the "edit"
button to start editing - you can use
- tab \t
- carriage return \r
- newline \n
as escape characters
'''
}]
entityType='help:Topic'
primaryKey='topic'
prefixes='PREFIX help: <http://help.bitplan.com/help/0.0.1/>'
jena=self.getJena(mode='update',debug=True)
errors=jena.insertListOfDicts(helpListOfDicts, entityType, primaryKey, prefixes, profile=True)
self.checkErrors(errors)
query="""
PREFIX help: <http://help.bitplan.com/help/0.0.1/>
SELECT ?topic ?description
WHERE {
?help help:Topic_topic ?topic.
?help help:Topic_description ?description.
}
"""
jena=self.getJena(mode='query')
listOfDicts=jena.queryAsListOfDicts(query)
# check round trip equality
self.assertEqual(helpListOfDicts,listOfDicts)
def testIssue7(self):
'''
test conversion of dates with timezone info
'''
value="2020-01-01T00:00:00Z"
dt=SPARQL.strToDatetime(value)
self.assertEqual(dt.year,2020)
def testListOfDictSpeed(self):
'''
test the speed of adding data
'''
limit=5000
for batchSize in [None,1000]:
listOfDicts=Sample.getSample(limit)
jena=self.getJena(mode='update',profile=True)
entityType="ex:TestRecord"
primaryKey='pkey'
prefixes='PREFIX ex: <http://example.com/>'
startTime=time.time()
errors=jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes,batchSize=batchSize)
self.checkErrors(errors)
elapsed=time.time()-startTime
print ("adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed))
def testLocalWikdata(self):
'''
check local wikidata
'''
# check we have local wikidata copy:
if getpass.getuser()=="wf":
# use 2018 wikidata copy
endpoint="http://jena.zeus.bitplan.com/wikidata/"
wd=SPARQL(endpoint)
queryString="""# get a list of whisky distilleries
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?item ?coord
WHERE
{
# instance of whisky distillery
?item wdt:P31 wd:Q10373548.
# get the coordinate
?item wdt:P625 ?coord.
}
"""
results=wd.query(queryString)
self.assertTrue(238<=len(results))
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
Tickets
- Issue 57 - Support conversion to rdf triples
- Issue 56 - addWikiData Linker to QueryResultDocumentation and callback for further such handlers while at it✓
- Issue 55 - need to export pylatexenc dependency✓
- Issue 54 - addQueryDocumentation✓
- Issue 53 - add limit for query.documentQueryResult✓
- Issue 52 - add pewee adaption
- Issue 51 - datetime handling sqlite error should lead to warning and not raise an exception
- Issue 50 - add initSqlDB✓
- Issue 49 - add append option to store API✓
- Issue 48 - isCached should be True even if count is less than 100✓
- Issue 47 - queryDocumentation: Fix tryItMarkup if lang is not SPARQL and fix links with special chars✓
- Issue 46 - add try it! button to Query documentation✓
- Issue 45 - Add option to specify cacheDir location in StorageConfig✓
- Issue 44 - use setAttr instead of assigning __dict__✓
- Issue 43 - offer warnOnNone and warnOnUnsupportedTypes for Type handling✓
- Issue 42 - fix behavior for invalid json✓
- Issue 41 - improve error message when create table command fails✓
- Issue 40 - make limit and batchSize available again for store✓
- Issue 39 - set sampleRecordCount to -1 / all as a default for fromCache handling✓
- Issue 38 - fix getCacheFile to use self.config if config is None✓
- Issue 37 - regression - getListOfDicts callback not handled as function✓
- Issue 36 - regression return of lists needed for restoreFromJsonFile and restoreFromJsonStr for backward compatiblity✓
- Issue 35 - support pydantic as a schema support
- Issue 34 - refactor EntityManager from ProceedingTitleParser here for reuse✓
- Issue 33 - SQL handling of Lists✓
- Issue 32 - getLookup fails on duplicates✓
- Issue 31 - getLookup fails if value is list✓
- Issue 30 - Limit stored attributes of JSONAble to attributes of the samples
- Issue 29 - Functionality to convert between LoD and CSV
- Issue 28 - add plot option✓
- Issue 27 - add Lookup map option✓
- Issue 26 - add intersect methods✓
- Issue 25 - integrate pandas dataframes✓
- Issue 24 - integrate tabulate✓
- Issue 23 - date type handling should be more robust - list handling should return a list of problems instead of failing✓
- Issue 22 - Regression: storeToJsonFile and restoreFromJsonFile missing in JSONAble✓
- Issue 21 - FromJson with taking type information from getSamples✓
- Issue 20 - add fixNone option to SPARQL results (same functionality as in SQL)✓
- Issue 19 - migrate query manager from ptp here✓
- Issue 18 - add getTableDict convenience function✓
- Issue 17 - add check_same_thread to be able to set it to False✓
- Issue 16 - allow to only warn if samplerecordcount is higher than number of available records✓
- Issue 15 - auto create generalization view ddl for tablelist✓
- Issue 14 - offer execute wrapper directly via sqlDB✓
- Issue 13 - set None value for undefined LoD entries✓
- Issue 12 - transitive dependency for SPARQLWrapper missing✓
- Issue 11 - make Types JSONAble to allow to store type information along side json data files✓
- Issue 10 - need to fix round-trip json behavior✓
- Issue 9 - Avoid wikidata 403✓
- Issue 8 - strToDatetime fails on invalid datetime strings e.g. with year out of range✓
- Issue 7 - support date values in format supplied e.g. by wikidata 2020-01-01T00:00:00Z✓
- Issue 6 - add SPARQL support✓
- Issue 5 - Allow use of params in queries✓
- Issue 4 - sqlite3.IntegrityError: UNIQUE constraint failed: ... show debug info✓
- Issue 3 - Refactor JSON support from DgraphAndWeaviateTest✓
- Issue 2 - add improved UML support from Proceedings Title Parser✓
- Issue 1 - Make available via pypi✓