Difference between revisions of "Wikidata Import 2023-05-10"

From BITPlan Wiki
Jump to navigation Jump to search
(Created page with "{{PageSequence|prev=Wikidata Import 2023-05-05|next=Wikidata Import 2023-05-10|category=Wikidata|categoryIcon=cloud-download}} = Download = == Download Options == https://dump...")
 
Line 3: Line 3:
 
== Download Options ==
 
== Download Options ==
 
https://dumps.wikimedia.org/wikidatawiki/entities
 
https://dumps.wikimedia.org/wikidatawiki/entities
 +
<pre>
 +
dcatap.rdf                                        06-May-2023 02:08              84753
 +
latest-all.json.bz2                                03-May-2023 21:06        81640390615
 +
latest-all.json.gz                                10-May-2023 13:49        124070020402
 +
latest-all.nt.bz2                                  04-May-2023 16:07        158382342866
 +
latest-all.nt.gz                                  03-May-2023 22:23        205171447838
 +
latest-all.ttl.bz2                                04-May-2023 03:24        101606862077
 +
latest-all.ttl.gz                                  03-May-2023 17:08        124093922794
 +
latest-lexemes.json.bz2                            10-May-2023 03:57          306901617
 +
latest-lexemes.json.gz                            10-May-2023 03:55          418171562
 +
latest-lexemes.nt.bz2                              05-May-2023 23:36          793805750
 +
latest-lexemes.nt.gz                              05-May-2023 23:30          1035632811
 +
latest-lexemes.ttl.bz2                            05-May-2023 23:31          450346788
 +
latest-lexemes.ttl.gz                              05-May-2023 23:27          559471601
 +
latest-truthy.nt.bz2                              06-May-2023 01:38        36065028020
 +
latest-truthy.nt.gz                                05-May-2023 22:20        59829390689
 +
</pre>
 +
== download script ==
 +
<source lang='bash'>
 +
cat download.sh
 +
#/bin/bash
 +
# WF 2023-04-26
 +
# download wikidata dumps
 +
baseurl=https://dumps.wikimedia.org/wikidatawiki/entities/
 +
for file in latest-all latest-lexemes
 +
do
 +
  for ext in ttl.bz2
 +
  do
 +
    url=$baseurl/$file.$ext
 +
    log=$file-$ext.log
 +
    nohup wget $url >> $log&
 +
  done
 +
done
 +
 +
</source>
 +
 +
= Munging ~29 h=
 +
== Preparation ==
 +
see  [[Wikidata_Import_2023-04-26#Preparation_.7E20-30_min]]
 +
 +
== calling munge.sh ==
 +
=== domunge.sh ===
 +
<source lang='bash'>
 +
#!/bin/bash
 +
# WF 2023-04-29
 +
# start munge in background
 +
bzcat latest-all.ttl.bz2 | service/munge.sh -f - -d data -- --skolemize
 +
</source>
 +
=== start domunge.sh and show nohup.out log ===

Revision as of 19:14, 10 May 2023

Download

Download Options

https://dumps.wikimedia.org/wikidatawiki/entities

dcatap.rdf                                         06-May-2023 02:08               84753
latest-all.json.bz2                                03-May-2023 21:06         81640390615
latest-all.json.gz                                 10-May-2023 13:49        124070020402
latest-all.nt.bz2                                  04-May-2023 16:07        158382342866
latest-all.nt.gz                                   03-May-2023 22:23        205171447838
latest-all.ttl.bz2                                 04-May-2023 03:24        101606862077
latest-all.ttl.gz                                  03-May-2023 17:08        124093922794
latest-lexemes.json.bz2                            10-May-2023 03:57           306901617
latest-lexemes.json.gz                             10-May-2023 03:55           418171562
latest-lexemes.nt.bz2                              05-May-2023 23:36           793805750
latest-lexemes.nt.gz                               05-May-2023 23:30          1035632811
latest-lexemes.ttl.bz2                             05-May-2023 23:31           450346788
latest-lexemes.ttl.gz                              05-May-2023 23:27           559471601
latest-truthy.nt.bz2                               06-May-2023 01:38         36065028020
latest-truthy.nt.gz                                05-May-2023 22:20         59829390689

download script

cat download.sh 
#/bin/bash
# WF 2023-04-26
# download wikidata dumps
baseurl=https://dumps.wikimedia.org/wikidatawiki/entities/
for file in latest-all latest-lexemes
do
  for ext in ttl.bz2
  do
    url=$baseurl/$file.$ext
    log=$file-$ext.log
    nohup wget $url >> $log&
  done
done

Munging ~29 h

Preparation

see Wikidata_Import_2023-04-26#Preparation_.7E20-30_min

calling munge.sh

domunge.sh

#!/bin/bash
# WF 2023-04-29
# start munge in background
bzcat latest-all.ttl.bz2 | service/munge.sh -f - -d data -- --skolemize

start domunge.sh and show nohup.out log