Difference between revisions of "Wikidata Import 2025-11-23"

From BITPlan Wiki
Jump to navigation Jump to search
Line 24: Line 24:
 
* https://www.wikidata.org/wiki/Q118954995
 
* https://www.wikidata.org/wiki/Q118954995
 
== Setup ==
 
== Setup ==
=== Install prerequisite libraries ===
+
 
<source lang=bash'>
+
== setup_milleniumdb.sh ==
sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev
 
</source>
 
=== setup_milleniumdb.sh ===
 
 
<source lang='bash'>
 
<source lang='bash'>
 
#!/bin/bash
 
#!/bin/bash
 
+
# MilleniumDB installation script
# Exit immediately if a command exits with a non-zero status.
+
# Created by Gemini 3 Pro Preview
 +
# based on https://github.com/MillenniumDB/MillenniumDB/wiki/Setup
 +
# initiated by  WF 2025-11-23
 +
# for https://wiki.bitplan.com/index.php/Wikidata_Import_2025-11-23
 +
# Links
 +
#
 +
# https://github.com/MillenniumDB/MillenniumDB
 +
# https://github.com/MillenniumDB/MillenniumDB/wiki
 +
# https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB
 +
# https://www.wikidata.org/wiki/Q118954995
 +
#
 
set -e
 
set -e
 
# --- Configuration ---
 
# You can change the base directory where 'source/cpp' will be created.
 
# Defaults to the current user's home directory.
 
 
BASE_DIR="${HOME}"
 
BASE_DIR="${HOME}"
 
# Number of cores to use for compilation.
 
# Defaults to the number of available processing units.
 
# You can override this by passing a number as the first argument to the script.
 
# e.g., ./setup_millenniumdb.sh 8
 
 
N_CORES=${1:-$(nproc)}
 
N_CORES=${1:-$(nproc)}
  
# --- Script Start ---
+
function install_dependencies() {
echo "Starting MillenniumDB setup..."
+
    echo "Installing dependencies..."
echo "Using base directory: ${BASE_DIR}"
+
     sudo apt update
echo "----------------------------------------"
+
     sudo apt install -y git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev
 
+
}
# 1. Create directory structure and clone the repository
 
echo "Step 1: Cloning MillenniumDB repository..."
 
mkdir -p "${BASE_DIR}/source/cpp"
 
cd "${BASE_DIR}/source/cpp"
 
 
 
if [ -d "MillenniumDB" ]; then
 
     echo "MillenniumDB directory already exists. Skipping clone."
 
else
 
     git clone https://github.com/MillenniumDB/MillenniumDB.git
 
fi
 
 
 
cd MillenniumDB
 
export MDB_HOME=$(pwd)
 
echo "Project path (MDB_HOME) set to: ${MDB_HOME}"
 
echo "----------------------------------------"
 
 
 
 
 
# 2. Download and install the required Boost version
 
echo "Step 2: Installing Boost 1.82.0 locally for the project..."
 
 
 
# Variables for Boost
 
BOOST_VERSION_URL="1.82.0"
 
BOOST_VERSION_UNDERSCORE="1_82_0"
 
BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
 
BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
 
BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"
 
BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"
 
 
 
# Download in the MDB_HOME directory
 
cd "${MDB_HOME}"
 
 
 
echo "Downloading Boost from ${BOOST_URL}..."
 
wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"
 
 
 
echo "Extracting ${BOOST_TARBALL}..."
 
tar -xf "${BOOST_TARBALL}"
 
 
 
echo "Creating target directory at ${BOOST_INSTALL_PATH}"
 
mkdir -p "${BOOST_INSTALL_PATH}"
 
 
 
echo "Moving Boost headers into place..."
 
mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"
 
 
 
echo "Cleaning up downloaded files..."
 
rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
 
echo "Boost installation complete."
 
echo "----------------------------------------"
 
 
 
  
# 3. Build the project using CMake
+
function clone_repo() {
echo "Step 3: Building MillenniumDB with CMake..."
+
    echo "Cloning MillenniumDB repository..."
echo "This will use ${N_CORES} core(s). This may take several minutes."
+
    mkdir -p "${BASE_DIR}/source/cpp"
 +
    cd "${BASE_DIR}/source/cpp"
 +
    if [ -d "MillenniumDB" ]; then
 +
        echo "Repository already cloned. Skipping clone."
 +
    else
 +
        git clone https://github.com/MillenniumDB/MillenniumDB.git
 +
    fi
 +
    cd MillenniumDB
 +
    export MDB_HOME=$(pwd)
 +
    echo "MDB_HOME set to ${MDB_HOME}"
 +
}
  
# Go back to the repository root
+
function install_boost() {
cd "${MDB_HOME}"
+
    echo "Installing Boost 1.82.0 locally..."
 +
    cd "${MDB_HOME}"
 +
    BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"
 +
    if [ -d "${BOOST_INSTALL_PATH}/boost" ]; then
 +
        echo "Boost headers already installed. Skipping."
 +
    else
 +
        BOOST_VERSION_URL="1.82.0"
 +
        BOOST_VERSION_UNDERSCORE="1_82_0"
 +
        BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
 +
        BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
 +
        BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"
  
cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
+
        wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"
cmake --build build/Release/ -j "${N_CORES}"
+
        tar -xf "${BOOST_TARBALL}"
 +
        mkdir -p "${BOOST_INSTALL_PATH}"
 +
        mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"
 +
        rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
 +
    fi
 +
}
  
echo "Build complete."
+
function build_project() {
echo "----------------------------------------"
+
    echo "Building MillenniumDB using ${N_CORES} cores..."
 +
    cd "${MDB_HOME}"
 +
    cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
 +
    cmake --build build/Release -j "${N_CORES}"
 +
    echo "Build complete."
 +
}
  
 +
function verify_build() {
 +
    echo "Verifying build by running help command..."
 +
    "${MDB_HOME}/build/Release/bin/mdb" help
 +
}
  
# 4. Verify the build
+
# Main script execution flow
echo "Step 4: Verifying the build by running the help command..."
+
install_dependencies
build/Release/bin/mdb help
+
clone_repo
echo "----------------------------------------"
+
install_boost
 +
build_project
 +
verify_build
  
echo ""
+
echo "MillenniumDB setup and build completed successfully!"
echo "✅ MillenniumDB setup and build completed successfully!"
+
echo "To use 'mdb', export MDB_HOME:"
echo ""
 
echo "IMPORTANT:"
 
echo "To use 'mdb' from the command line in this terminal session, you must export MDB_HOME:"
 
 
echo "  export MDB_HOME=${MDB_HOME}"
 
echo "  export MDB_HOME=${MDB_HOME}"
echo ""
+
echo "Add this line to your shell startup file for permanence."
echo "To make this setting permanent, add that line to your shell's startup file (e.g., ~/.bashrc or ~/.zshrc)."
 
 
</source>
 
</source>

Revision as of 14:30, 23 November 2025

Import

Import
edit
state  ?
url  https://wiki.bitplan.com/index.php/Wikidata_Import_2025-11-23
target  Millenium DB
start  2025-11-23
end  
days  7
os  Ubuntu 22.04.3 LTS
cpu  Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz (16 cores)
ram  512
triples  
comment  Try replicating https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB

Millenium DB

Links

Setup

setup_milleniumdb.sh

#!/bin/bash
# MilleniumDB installation script
# Created by Gemini 3 Pro Preview
# based on https://github.com/MillenniumDB/MillenniumDB/wiki/Setup
# initiated by  WF 2025-11-23
# for https://wiki.bitplan.com/index.php/Wikidata_Import_2025-11-23
# Links
#
# https://github.com/MillenniumDB/MillenniumDB
# https://github.com/MillenniumDB/MillenniumDB/wiki
# https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB
# https://www.wikidata.org/wiki/Q118954995
# 
set -e
BASE_DIR="${HOME}"
N_CORES=${1:-$(nproc)}

function install_dependencies() {
    echo "Installing dependencies..."
    sudo apt update
    sudo apt install -y git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev
}

function clone_repo() {
    echo "Cloning MillenniumDB repository..."
    mkdir -p "${BASE_DIR}/source/cpp"
    cd "${BASE_DIR}/source/cpp"
    if [ -d "MillenniumDB" ]; then
        echo "Repository already cloned. Skipping clone."
    else
        git clone https://github.com/MillenniumDB/MillenniumDB.git
    fi
    cd MillenniumDB
    export MDB_HOME=$(pwd)
    echo "MDB_HOME set to ${MDB_HOME}"
}

function install_boost() {
    echo "Installing Boost 1.82.0 locally..."
    cd "${MDB_HOME}"
    BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"
    if [ -d "${BOOST_INSTALL_PATH}/boost" ]; then
        echo "Boost headers already installed. Skipping."
    else
        BOOST_VERSION_URL="1.82.0"
        BOOST_VERSION_UNDERSCORE="1_82_0"
        BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
        BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
        BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"

        wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"
        tar -xf "${BOOST_TARBALL}"
        mkdir -p "${BOOST_INSTALL_PATH}"
        mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"
        rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
    fi
}

function build_project() {
    echo "Building MillenniumDB using ${N_CORES} cores..."
    cd "${MDB_HOME}"
    cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
    cmake --build build/Release -j "${N_CORES}"
    echo "Build complete."
}

function verify_build() {
    echo "Verifying build by running help command..."
    "${MDB_HOME}/build/Release/bin/mdb" help
}

# Main script execution flow
install_dependencies
clone_repo
install_boost
build_project
verify_build

echo "MillenniumDB setup and build completed successfully!"
echo "To use 'mdb', export MDB_HOME:"
echo "  export MDB_HOME=${MDB_HOME}"
echo "Add this line to your shell startup file for permanence."