Truly Tabular RDF/GND
Jump to navigation
Jump to search
GND property multiplicity
The following table shows the cardinality/multiplicity of properties per conference record. E.g. there are up to four different acronyms for a conference.
property | gnd | total | unique | min | max | avg |
---|---|---|---|---|---|---|
eventId | gnd:gndIdentifier | 731651 | 731651 | 1 | 1 | 1 |
title | gnd:preferredNameForTheConferenceOrEvent | 731645 | 731645 | 0 | 1 | 0.999991799 |
acronym | gnd:abbreviatedNameForTheConferenceOrEvent | 3537 | 3206 | 0 | 4 | 0.00483 |
sameAs | owl:sameAs | 769120 | 693077 | 0 | 20 | 1.05 |
variant | gnd:variantNameForTheConferenceOrEvent | 632368 | 229268 | 0 | 41 | 0.86 |
date | gnd:dateOfConferenceOrEvent | 710819 | 704949 | 0 | 9 | 0.971 |
areaCode | gnd:geographicAreaCode | 797037 | 612631 | 0 | 11 | 1.089 |
place | gnd:placeOfConferenceOrEvent | 659305 | 624667 | 0 | 18 | 0.901 |
topic | gnd:topic | 5061 | 3520 | 0 | 6 | 0.00691 |
homepage | gnd:homepage | 19011 | 18702 | 0 | 3 | 0.026 |
prec | gnd:homepage | 12182 | 12106 | 0 | 3 | 0.0166 |
succ | gnd:homepage | 11974 | 11929 | 0 | 3 | 0.0163 |
query to analyze multiplicity
# get aggregate counts of property usage
# this query needs to be modified property by property and run twice
# once without the having clause and once with the having clause
# to create the table further down in this wikipage
# WF 2021-12-05
PREFIX gndi: <https://d-nb.info/gnd>
PREFIX gnd: <https://d-nb.info/standards/elementset/gnd#>
PREFIX gndo: <https://d-nb.info/standards/vocab/gnd/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX wdrs: <http://www.w3.org/2007/05/powder-s#>
SELECT (sum (?itemCount) as ?sum) (min (?itemCount) as ?min) (max (?itemCount) as ?max) (avg (?itemCount) as ?avg) {
SELECT ?event ?eventId (count(?title) as ?itemCount)
WHERE {
?event a gnd:ConferenceOrEvent.
?event gnd:gndIdentifier ?eventId.
OPTIONAL {
# ?event gnd:abbreviatedNameForTheConferenceOrEvent ?acronym.
# ?event owl:sameAs ?sameAs.
# ?event gnd:variantNameForTheConferenceOrEvent ?variant.
?event gnd:preferredNameForTheConferenceOrEvent ?title.
# ?event gnd:dateOfConferenceOrEvent ?date
# ?event gnd:geographicAreaCode ?areaCode.
# ?event gnd:placeOfConferenceOrEvent ?place.
# ?event gnd:topic ?topic.
# ?event gnd:homepage ?homepage.
# ?event gnd:precedingConferenceOrEvent ?prec
# ?event gnd:succeedingConferenceOrEvent ?succ
}
}
GROUP BY ?event ?eventId
#HAVING(COUNT(?title) = 1)
}
GND Query
# performance optimized query of GND event details
# with aggregated properties as single, count and | separated list column
# WF 2021-12-05
PREFIX gndi: <https://d-nb.info/gnd>
PREFIX gnd: <https://d-nb.info/standards/elementset/gnd#>
PREFIX gndo: <https://d-nb.info/standards/vocab/gnd/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX wdrs: <http://www.w3.org/2007/05/powder-s#>
SELECT
?event
?eventId
(MIN(?eventTitle) as ?fulltitle)
(COUNT (DISTINCT ?eventDate) as ?dateCount)
(MIN(?eventDate) as ?date)
(MIN(?eventAcronym) as ?acronym)
(COUNT (DISTINCT ?eventAcronym) as ?acronymCount)
(GROUP_CONCAT(DISTINCT ?eventAcronym; SEPARATOR="| ") AS ?acronyms)
(MIN(?eventVariant) as ?variant)
(COUNT (DISTINCT ?eventVariant) as ?variantCount)
(GROUP_CONCAT(DISTINCT ?eventVariant; SEPARATOR="| ") AS ?variants)
(MIN(?eventPlace) as ?place)
(COUNT (DISTINCT ?eventPlace) as ?placeCount)
(GROUP_CONCAT(DISTINCT ?eventPlace; SEPARATOR="| ") AS ?places)
(MIN(?eventHomepage) as ?homepage)
WHERE {
?event a gnd:ConferenceOrEvent.
?event gnd:gndIdentifier ?eventId.
?event gnd:preferredNameForTheConferenceOrEvent ?eventTitle.
OPTIONAL { ?event gnd:abbreviatedNameForTheConferenceOrEvent ?eventAcronym. }
OPTIONAL { ?event gnd:homepage ?eventHomepage. }
OPTIONAL { ?event gnd:variantNameForTheConferenceOrEvent ?eventVariant. }
OPTIONAL { ?event gnd:dateOfConferenceOrEvent ?eventDate. }
OPTIONAL { ?event gnd:placeOfConferenceOrEvent ?eventPlace }
# only available 3520 times 2021-12
# ?event gnd:topic ?topic.
# only available 12106 times 2021-12
# ?event gnd:precedingConferenceOrEvent ?prec
# only available 11929 times 2021-12
#?event gnd:succeedingConferenceOrEvent ?succ
}
GROUP BY ?event ?eventId
dateCardinality after import to relational database
query
select count(dateCount)
from event_gnd
group by dateCount
order by 1 desc
result
count(dateCount) |
---|
715987 |
23948 |
2815 |
141 |
30 |
5 |
4 |
3 |
1 |
1 |
}}