Truly Tabular RDF/GND

From BITPlan Wiki
Revision as of 08:35, 5 August 2022 by Wf (talk | contribs) (Created page with "== GND property multiplicity == The following table shows the cardinality/multiplicity of properties per conference record. E.g. there are up to four different acronyms for a...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

GND property multiplicity

The following table shows the cardinality/multiplicity of properties per conference record. E.g. there are up to four different acronyms for a conference.

Property multiplicity
property gnd total unique min max avg
eventId gnd:gndIdentifier 731651 731651 1 1 1
title gnd:preferredNameForTheConferenceOrEvent 731645 731645 0 1 0.999991799
acronym gnd:abbreviatedNameForTheConferenceOrEvent 3537 3206 0 4 0.00483
sameAs owl:sameAs 769120 693077 0 20 1.05
variant gnd:variantNameForTheConferenceOrEvent 632368 229268 0 41 0.86
date gnd:dateOfConferenceOrEvent 710819 704949 0 9 0.971
areaCode gnd:geographicAreaCode 797037 612631 0 11 1.089
place gnd:placeOfConferenceOrEvent 659305 624667 0 18 0.901
topic gnd:topic 5061 3520 0 6 0.00691
homepage gnd:homepage 19011 18702 0 3 0.026
prec gnd:homepage 12182 12106 0 3 0.0166
succ gnd:homepage 11974 11929 0 3 0.0163

query to analyze multiplicity

# get aggregate counts of property usage 
# this query needs to be modified property by property and run twice
# once without the having clause and once with the having clause 
# to create the table further down in this wikipage
# WF 2021-12-05
PREFIX gndi:  <https://d-nb.info/gnd>
PREFIX gnd:  <https://d-nb.info/standards/elementset/gnd#>
PREFIX gndo: <https://d-nb.info/standards/vocab/gnd/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dc: <http://purl.org/dc/terms/>
PREFIX wdrs: <http://www.w3.org/2007/05/powder-s#>

SELECT (sum (?itemCount) as ?sum) (min (?itemCount) as ?min) (max (?itemCount) as ?max) (avg (?itemCount) as ?avg)  {
  SELECT  ?event ?eventId (count(?title) as ?itemCount)
  WHERE {
    ?event a gnd:ConferenceOrEvent.
    ?event gnd:gndIdentifier ?eventId.
    OPTIONAL {
      # ?event gnd:abbreviatedNameForTheConferenceOrEvent ?acronym. 
      # ?event owl:sameAs ?sameAs.
      # ?event gnd:variantNameForTheConferenceOrEvent ?variant.
      ?event gnd:preferredNameForTheConferenceOrEvent ?title.
      # ?event gnd:dateOfConferenceOrEvent ?date
      # ?event gnd:geographicAreaCode ?areaCode.
      # ?event gnd:placeOfConferenceOrEvent ?place.
      # ?event gnd:topic ?topic.
      # ?event gnd:homepage ?homepage. 
      # ?event gnd:precedingConferenceOrEvent ?prec
      # ?event gnd:succeedingConferenceOrEvent ?succ
    }
  }
  GROUP BY ?event ?eventId
  #HAVING(COUNT(?title) = 1)
}

GND Query

# performance optimized query of GND event details
        # with aggregated properties as single, count and | separated list column
        # WF 2021-12-05
        PREFIX gndi:  <https://d-nb.info/gnd>
        PREFIX gnd:  <https://d-nb.info/standards/elementset/gnd#>
        PREFIX gndo: <https://d-nb.info/standards/vocab/gnd/>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX owl: <http://www.w3.org/2002/07/owl#>
        PREFIX dc: <http://purl.org/dc/terms/>
        PREFIX wdrs: <http://www.w3.org/2007/05/powder-s#>

        SELECT  
           ?event 
           ?eventId  
           (MIN(?eventTitle) as ?fulltitle)

           (COUNT (DISTINCT ?eventDate) as ?dateCount)
           (MIN(?eventDate) as ?date)

           (MIN(?eventAcronym) as ?acronym)
           (COUNT (DISTINCT ?eventAcronym) as ?acronymCount)
           (GROUP_CONCAT(DISTINCT ?eventAcronym; SEPARATOR="| ") AS ?acronyms)

           (MIN(?eventVariant) as ?variant)
           (COUNT (DISTINCT ?eventVariant) as ?variantCount)
           (GROUP_CONCAT(DISTINCT ?eventVariant; SEPARATOR="| ") AS ?variants) 

           (MIN(?eventPlace) as ?place)
           (COUNT (DISTINCT ?eventPlace) as ?placeCount)
           (GROUP_CONCAT(DISTINCT ?eventPlace; SEPARATOR="| ") AS ?places) 

           (MIN(?eventHomepage) as ?homepage)
        WHERE {
          ?event a gnd:ConferenceOrEvent.
          ?event gnd:gndIdentifier ?eventId.
          ?event gnd:preferredNameForTheConferenceOrEvent ?eventTitle.
          OPTIONAL { ?event gnd:abbreviatedNameForTheConferenceOrEvent ?eventAcronym. }
          OPTIONAL { ?event gnd:homepage ?eventHomepage. }
          OPTIONAL { ?event gnd:variantNameForTheConferenceOrEvent ?eventVariant. }
          OPTIONAL { ?event gnd:dateOfConferenceOrEvent ?eventDate. }
          OPTIONAL { ?event gnd:placeOfConferenceOrEvent ?eventPlace }
          # only available 3520 times 2021-12
          # ?event gnd:topic ?topic.
          # only available 12106 times 2021-12
          # ?event gnd:precedingConferenceOrEvent ?prec
          # only available 11929 times 2021-12
          #?event gnd:succeedingConferenceOrEvent ?succ
        }
        GROUP BY ?event ?eventId

dateCardinality after import to relational database

query

select count(dateCount)
from event_gnd
group by dateCount
order by 1 desc

result

count(dateCount)
715987
23948
2815
141
30
5
4
3
1
1

}}