Difference between revisions of "ConferenceCorpus/statistics"
(16 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | = | + | = Introduction = |
− | == | + | == SQL Query for Event Series completion == |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
<source lang='sql'> | <source lang='sql'> | ||
SELECT | SELECT | ||
Line 50: | Line 13: | ||
group by series | group by series | ||
order by 6 desc | order by 6 desc | ||
− | |||
</source> | </source> | ||
− | === | + | = Event Signature Availability = |
+ | see also http://conferencecorpus.bitplan.com/query/SignatureAvailability | ||
+ | === signature completeness combined === | ||
+ | [[File:completeSignature_complete.png|600px]] | ||
+ | === signature completeness of acronym === | ||
+ | [[File:completeSignature_acronym.png|600px]] | ||
+ | === signature completeness of startDate === | ||
+ | [[File:completeSignature_startDate.png|600px]] | ||
+ | === signature completeness of ordinal === | ||
+ | [[File:completeSignature_ordinal.png|600px]] | ||
+ | === signature completeness of year === | ||
+ | [[File:completeSignature_year.png|600px]] | ||
+ | === signature completeness of title === | ||
+ | [[File:completeSignature_title.png|600px]] | ||
+ | === signature completeness of city === | ||
+ | [[File:completeSignature_city.png|600px]] | ||
+ | === signature completeness of country === | ||
+ | [[File:completeSignature_country.png|600px] | ||
+ | |||
+ | = Ordinal histogramms = | ||
+ | The Zipf digramms show the log frequency (leaving out the first event, since typically the decline begins from the second event) | ||
+ | |||
+ | == confref== | ||
+ | To few available ordinals for analysis. | ||
+ | |||
+ | == CEUR-WS == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_ceurws | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === ceurws ordinals === | ||
+ | [[File:ordinalhistogramm_event_ceurws.png|600px]][[File:zipf_event_ceurws.png|600px]] | ||
+ | == confref == | ||
+ | not enough data | ||
+ | |||
+ | == Crossref == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_crossref | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === crossref ordinals === | ||
+ | [[File:ordinalhistogramm_event_crossref.png|600px]][[File:zipf_event_crossref.png|600px]] | ||
+ | == dblp == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_dblp | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === dblp ordinals === | ||
+ | [[File:ordinalhistogramm_event_dblp.png|600px]][[File:zipf_event_dblp.png|600px]] | ||
+ | == GND == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_gnd | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === gnd ordinals === | ||
+ | [[File:ordinalhistogramm_event_gnd.png|600px]][[File:zipf_event_gnd.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_or | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === or ordinals === | ||
+ | [[File:ordinalhistogramm_event_or.png|600px]][[File:zipf_event_or.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_orbackup | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === orbackup ordinals === | ||
+ | [[File:ordinalhistogramm_event_orbackup.png|600px]][[File:zipf_event_orbackup.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_orclone | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === orclone ordinals === | ||
+ | [[File:ordinalhistogramm_event_orclone.png|600px]][[File:zipf_event_orclone.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_orclonebackup | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === orclonebackup ordinals === | ||
+ | [[File:ordinalhistogramm_event_orclonebackup.png|600px]][[File:zipf_event_orclonebackup.png|600px]] | ||
+ | == TIBKAT == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_tibkat | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === tibkat ordinals === | ||
+ | [[File:ordinalhistogramm_event_tibkat.png|600px]][[File:zipf_event_tibkat.png|600px]] | ||
+ | == WikiCFP == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_wikicfp | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === wikicfp ordinals === | ||
+ | [[File:ordinalhistogramm_event_wikicfp.png|600px]][[File:zipf_event_wikicfp.png|600px]] | ||
+ | == Wikidata == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT ordinal | ||
+ | FROM event_wikidata | ||
+ | where ordinal is not null | ||
+ | and ordinal < 50 | ||
+ | |||
+ | </source> | ||
+ | === wikidata ordinals === | ||
+ | [[File:ordinalhistogramm_event_wikidata.png|600px]][[File:zipf_event_wikidata.png|600px]] | ||
+ | |||
+ | = Eventseries completeness = | ||
+ | The queries here only give a rough estimate for an upper bound since the assumption here is that all events between max(ordinal) and min(ordinal) are available which is often not the case. | ||
+ | == dblp == | ||
+ | === sql query === | ||
+ | <source lang='sql'> | ||
+ | SELECT | ||
+ | series, | ||
+ | min(ordinal) as minOrdinal, | ||
+ | max(ordinal) as maxOrdinal, | ||
+ | avg(ordinal) as avgOrdinal, | ||
+ | max(Ordinal)-min(Ordinal) as ordinalRange, | ||
+ | (max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness | ||
+ | FROM event_dblp | ||
+ | WHERE ordinal is not null | ||
+ | GROUP BY series | ||
+ | ORDER by 6 DESC | ||
+ | |||
+ | </source> | ||
+ | === event series completeness of dblp === | ||
[[File:dblp_series_completeness.png|600px]] | [[File:dblp_series_completeness.png|600px]] | ||
− | + | == OpenResearch == | |
− | == | ||
=== sql query === | === sql query === | ||
<source lang='sql'> | <source lang='sql'> | ||
SELECT | SELECT | ||
− | + | inEventSeries, | |
− | + | min(ordinal) as minOrdinal, | |
− | + | max(ordinal) as maxOrdinal, | |
− | + | avg(ordinal) as avgOrdinal, | |
− | + | max(Ordinal)-min(Ordinal) as ordinalRange, | |
− | + | (max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness | |
FROM event_orclone | FROM event_orclone | ||
− | + | WHERE ordinal is not null | |
− | + | GROUP BY inEventSeries | |
− | + | ORDER by 6 DESC | |
− | + | ||
</source> | </source> | ||
− | === | + | === event series completeness of orclone === |
[[File:orclone_series_completeness.png|600px]] | [[File:orclone_series_completeness.png|600px]] | ||
− | + | == WikiCFP == | |
− | == | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
=== sql query === | === sql query === | ||
<source lang='sql'> | <source lang='sql'> | ||
SELECT | SELECT | ||
− | + | seriesId, | |
− | + | min(ordinal) as minOrdinal, | |
− | + | max(ordinal) as maxOrdinal, | |
− | + | avg(ordinal) as avgOrdinal, | |
− | + | max(Ordinal)-min(Ordinal) as ordinalRange, | |
− | + | (max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness | |
FROM event_wikicfp | FROM event_wikicfp | ||
− | + | WHERE ordinal is not null | |
− | + | GROUP BY seriesId | |
− | + | ORDER by 6 DESC | |
− | + | ||
</source> | </source> | ||
− | === | + | === event series completeness of wikicfp === |
[[File:wikicfp_series_completeness.png|600px]] | [[File:wikicfp_series_completeness.png|600px]] | ||
− | + | == Wikidata == | |
− | == | ||
=== sql query === | === sql query === | ||
<source lang='sql'> | <source lang='sql'> | ||
SELECT | SELECT | ||
− | + | eventInSeriesId, | |
− | + | min(ordinal) as minOrdinal, | |
− | + | max(ordinal) as maxOrdinal, | |
− | + | avg(ordinal) as avgOrdinal, | |
− | + | max(Ordinal)-min(Ordinal) as ordinalRange, | |
− | + | (max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness | |
FROM event_wikidata | FROM event_wikidata | ||
− | + | WHERE ordinal is not null | |
− | + | GROUP BY eventInSeriesId | |
− | + | ORDER by 6 DESC | |
− | + | ||
</source> | </source> | ||
− | === | + | === event series completeness of wikidata === |
[[File:wikidata_series_completeness.png|600px]] | [[File:wikidata_series_completeness.png|600px]] | ||
+ | |||
+ | = Eventseries completeness by acronym = | ||
+ | Not all data sources have an eventseries reference per event. As as alternative approach we analyzed the acronyms to group event series. Here we also | ||
+ | calculated the number of distinct ordinals. | ||
+ | |||
+ | The results will in some cases mix data from event series with ambiguous acronyms and therefore give a more positive impression again. So again this is an estimate for an upper bound of the event series completion. | ||
+ | == Python Code for Eventseries completeness analyis === | ||
+ | <source lang='python'> | ||
+ | def testSeriesCompletenessHistogrammByAcronym(self): | ||
+ | ''' | ||
+ | acronym based histogramms | ||
+ | ''' | ||
+ | def histogrammSettings(plot): | ||
+ | ''' | ||
+ | optional callback to add more data to histogramm | ||
+ | ''' | ||
+ | pass | ||
+ | |||
+ | debug = False | ||
+ | self.figureList=FigureList(caption="event Series completeness by acronym",figureListLabel="eventcompa",cols=3) | ||
+ | for dataSource in DataSource.sources.values(): | ||
+ | if dataSource.name in ["acm","confref"]: | ||
+ | continue | ||
+ | print(dataSource) | ||
+ | histOutputFileName=f"eventSeriesCompletionByAcronymHistogramm_{dataSource.name}.png" | ||
+ | sqlQuery = """SELECT acronym, ordinal | ||
+ | FROM %s | ||
+ | """ % (dataSource.tableName) | ||
+ | sqlDB = EventStorage.getSqlDB() | ||
+ | lod = sqlDB.query(sqlQuery) | ||
+ | series = {} | ||
+ | acronymRegexp = r'(?P<acronym>[A-Z]+)\s*[0-9]+' | ||
+ | for d in lod: | ||
+ | acronym = d.get('acronym') | ||
+ | if acronym: | ||
+ | match = re.fullmatch(acronymRegexp, acronym) | ||
+ | if match is None: | ||
+ | continue | ||
+ | seriesAcronym = match.group("acronym") | ||
+ | if isinstance(seriesAcronym, str): | ||
+ | if seriesAcronym in series: | ||
+ | series[seriesAcronym].append(d) | ||
+ | else: | ||
+ | series[seriesAcronym] = [d] | ||
+ | aggLod = [] | ||
+ | for series, eventRecords in series.items(): | ||
+ | # set operation | ||
+ | ordinals: List[int] = [int(r.get("ordinal")) | ||
+ | for r in eventRecords | ||
+ | if r.get("ordinal") | ||
+ | and ((isinstance(r.get("ordinal"), str) and r.get("ordinal").isnumeric()) or isinstance(r.get("ordinal"), int))] | ||
+ | if len(ordinals) == 0: | ||
+ | continue | ||
+ | minOrd = min(ordinals) | ||
+ | maxOrd = max(ordinals) | ||
+ | numberOfDistinctOrds = len(set(ordinals)) | ||
+ | # count set content | ||
+ | res = { | ||
+ | "series": series, | ||
+ | "minOrdinal": minOrd, | ||
+ | "maxOrdinal": maxOrd, | ||
+ | "avgOrdinal": mean(ordinals), | ||
+ | "span": maxOrd-minOrd, | ||
+ | "available": numberOfDistinctOrds, | ||
+ | "completeness": numberOfDistinctOrds / maxOrd if maxOrd>1 else 1.0 | ||
+ | } | ||
+ | aggLod.append(res) | ||
+ | figure=Figure(dataSource.title,caption=f"event series completeness of {dataSource.name}",figLabel=f"esca-{dataSource.name}",sqlQuery=None,fileNames=[histOutputFileName]) | ||
+ | self.figureList.add(figure) | ||
+ | |||
+ | values = [round(record["completeness"], 2) for record in aggLod if isinstance(record["completeness"], float)] | ||
+ | values.sort() | ||
+ | threshold =values[len(values)//2] | ||
+ | h = Histogramm(x=values) | ||
+ | hps = PlotSettings(outputFile=f"{self.histroot}/{histOutputFileName}", callback=histogrammSettings) | ||
+ | h.show(xLabel='completeness', | ||
+ | yLabel='distribution', | ||
+ | title=f'{figure.title}', | ||
+ | alpha=self.alpha, | ||
+ | density=True, | ||
+ | ps=hps, | ||
+ | bins=10, | ||
+ | vlineAt=threshold) | ||
+ | |||
+ | print(dataSource, len(values), "→", len(values) // 2) | ||
+ | self.figureList.printAllMarkups() | ||
+ | </source> | ||
+ | == CEUR-WS == | ||
+ | === event series completeness of ceurws === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_ceurws.png|600px]] | ||
+ | == confref == | ||
+ | == Crossref == | ||
+ | === event series completeness of crossref === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_crossref.png|600px]] | ||
+ | == dblp == | ||
+ | === event series completeness of dblp === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_dblp.png|600px]] | ||
+ | == GND == | ||
+ | === event series completeness of gnd === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_gnd.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === event series completeness of or === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_or.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === event series completeness of orbackup === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_orbackup.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === event series completeness of orclone === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_orclone.png|600px]] | ||
+ | == OpenResearch == | ||
+ | === event series completeness of orclonebackup === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_orclonebackup.png|600px]] | ||
+ | == TIBKAT == | ||
+ | === event series completeness of tibkat === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_tibkat.png|600px]] | ||
+ | == WikiCFP == | ||
+ | === event series completeness of wikicfp === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_wikicfp.png|600px]] | ||
+ | == Wikidata == | ||
+ | === event series completeness of wikidata === | ||
+ | [[File:eventSeriesCompletionByAcronymHistogramm_wikidata.png|600px]] |
Latest revision as of 10:36, 29 May 2022
Introduction
SQL Query for Event Series completion
SELECT
series,
min(ordinal) as minOrdinal,
max(ordinal) as maxOrdinal,
avg(ordinal) as avgOrdinal,
max(Ordinal)-min(Ordinal) as available,
(max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness
FROM event_dblp
Where ordinal is not null
group by series
order by 6 desc
Event Signature Availability
see also http://conferencecorpus.bitplan.com/query/SignatureAvailability
signature completeness combined
signature completeness of acronym
signature completeness of startDate
signature completeness of ordinal
signature completeness of year
signature completeness of title
signature completeness of city
signature completeness of country
[[File:completeSignature_country.png|600px]
Ordinal histogramms
The Zipf digramms show the log frequency (leaving out the first event, since typically the decline begins from the second event)
confref
To few available ordinals for analysis.
CEUR-WS
sql query
SELECT ordinal
FROM event_ceurws
where ordinal is not null
and ordinal < 50
ceurws ordinals
confref
not enough data
Crossref
sql query
SELECT ordinal
FROM event_crossref
where ordinal is not null
and ordinal < 50
crossref ordinals
dblp
sql query
SELECT ordinal
FROM event_dblp
where ordinal is not null
and ordinal < 50
dblp ordinals
GND
sql query
SELECT ordinal
FROM event_gnd
where ordinal is not null
and ordinal < 50
gnd ordinals
OpenResearch
sql query
SELECT ordinal
FROM event_or
where ordinal is not null
and ordinal < 50
or ordinals
OpenResearch
sql query
SELECT ordinal
FROM event_orbackup
where ordinal is not null
and ordinal < 50
orbackup ordinals
OpenResearch
sql query
SELECT ordinal
FROM event_orclone
where ordinal is not null
and ordinal < 50
orclone ordinals
OpenResearch
sql query
SELECT ordinal
FROM event_orclonebackup
where ordinal is not null
and ordinal < 50
orclonebackup ordinals
TIBKAT
sql query
SELECT ordinal
FROM event_tibkat
where ordinal is not null
and ordinal < 50
tibkat ordinals
WikiCFP
sql query
SELECT ordinal
FROM event_wikicfp
where ordinal is not null
and ordinal < 50
wikicfp ordinals
Wikidata
sql query
SELECT ordinal
FROM event_wikidata
where ordinal is not null
and ordinal < 50
wikidata ordinals
Eventseries completeness
The queries here only give a rough estimate for an upper bound since the assumption here is that all events between max(ordinal) and min(ordinal) are available which is often not the case.
dblp
sql query
SELECT
series,
min(ordinal) as minOrdinal,
max(ordinal) as maxOrdinal,
avg(ordinal) as avgOrdinal,
max(Ordinal)-min(Ordinal) as ordinalRange,
(max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness
FROM event_dblp
WHERE ordinal is not null
GROUP BY series
ORDER by 6 DESC
event series completeness of dblp
OpenResearch
sql query
SELECT
inEventSeries,
min(ordinal) as minOrdinal,
max(ordinal) as maxOrdinal,
avg(ordinal) as avgOrdinal,
max(Ordinal)-min(Ordinal) as ordinalRange,
(max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness
FROM event_orclone
WHERE ordinal is not null
GROUP BY inEventSeries
ORDER by 6 DESC
event series completeness of orclone
WikiCFP
sql query
SELECT
seriesId,
min(ordinal) as minOrdinal,
max(ordinal) as maxOrdinal,
avg(ordinal) as avgOrdinal,
max(Ordinal)-min(Ordinal) as ordinalRange,
(max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness
FROM event_wikicfp
WHERE ordinal is not null
GROUP BY seriesId
ORDER by 6 DESC
event series completeness of wikicfp
Wikidata
sql query
SELECT
eventInSeriesId,
min(ordinal) as minOrdinal,
max(ordinal) as maxOrdinal,
avg(ordinal) as avgOrdinal,
max(Ordinal)-min(Ordinal) as ordinalRange,
(max(Ordinal)-min(Ordinal)) /(max(Ordinal)-1.0) as completeness
FROM event_wikidata
WHERE ordinal is not null
GROUP BY eventInSeriesId
ORDER by 6 DESC
event series completeness of wikidata
Eventseries completeness by acronym
Not all data sources have an eventseries reference per event. As as alternative approach we analyzed the acronyms to group event series. Here we also calculated the number of distinct ordinals.
The results will in some cases mix data from event series with ambiguous acronyms and therefore give a more positive impression again. So again this is an estimate for an upper bound of the event series completion.
Python Code for Eventseries completeness analyis =
def testSeriesCompletenessHistogrammByAcronym(self):
'''
acronym based histogramms
'''
def histogrammSettings(plot):
'''
optional callback to add more data to histogramm
'''
pass
debug = False
self.figureList=FigureList(caption="event Series completeness by acronym",figureListLabel="eventcompa",cols=3)
for dataSource in DataSource.sources.values():
if dataSource.name in ["acm","confref"]:
continue
print(dataSource)
histOutputFileName=f"eventSeriesCompletionByAcronymHistogramm_{dataSource.name}.png"
sqlQuery = """SELECT acronym, ordinal
FROM %s
""" % (dataSource.tableName)
sqlDB = EventStorage.getSqlDB()
lod = sqlDB.query(sqlQuery)
series = {}
acronymRegexp = r'(?P<acronym>[A-Z]+)\s*[0-9]+'
for d in lod:
acronym = d.get('acronym')
if acronym:
match = re.fullmatch(acronymRegexp, acronym)
if match is None:
continue
seriesAcronym = match.group("acronym")
if isinstance(seriesAcronym, str):
if seriesAcronym in series:
series[seriesAcronym].append(d)
else:
series[seriesAcronym] = [d]
aggLod = []
for series, eventRecords in series.items():
# set operation
ordinals: List[int] = [int(r.get("ordinal"))
for r in eventRecords
if r.get("ordinal")
and ((isinstance(r.get("ordinal"), str) and r.get("ordinal").isnumeric()) or isinstance(r.get("ordinal"), int))]
if len(ordinals) == 0:
continue
minOrd = min(ordinals)
maxOrd = max(ordinals)
numberOfDistinctOrds = len(set(ordinals))
# count set content
res = {
"series": series,
"minOrdinal": minOrd,
"maxOrdinal": maxOrd,
"avgOrdinal": mean(ordinals),
"span": maxOrd-minOrd,
"available": numberOfDistinctOrds,
"completeness": numberOfDistinctOrds / maxOrd if maxOrd>1 else 1.0
}
aggLod.append(res)
figure=Figure(dataSource.title,caption=f"event series completeness of {dataSource.name}",figLabel=f"esca-{dataSource.name}",sqlQuery=None,fileNames=[histOutputFileName])
self.figureList.add(figure)
values = [round(record["completeness"], 2) for record in aggLod if isinstance(record["completeness"], float)]
values.sort()
threshold =values[len(values)//2]
h = Histogramm(x=values)
hps = PlotSettings(outputFile=f"{self.histroot}/{histOutputFileName}", callback=histogrammSettings)
h.show(xLabel='completeness',
yLabel='distribution',
title=f'{figure.title}',
alpha=self.alpha,
density=True,
ps=hps,
bins=10,
vlineAt=threshold)
print(dataSource, len(values), "→", len(values) // 2)
self.figureList.printAllMarkups()