Difference between revisions of "Wikidata Import 2025-11-23"

From BITPlan Wiki
Jump to navigation Jump to search
Line 28: Line 28:
 
sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev
 
sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev
 
</source>
 
</source>
=== Clone MilleniumDB source code ===
+
=== setup_milleniumdb.sh ===
 
<source lang='bash'>
 
<source lang='bash'>
mkdir -p source/cpp
+
#!/bin/bash
cd source/cpp
+
 
git clone https://github.com/MillenniumDB/MillenniumDB
+
# Exit immediately if a command exits with a non-zero status.
ccd MillenniumDB
+
set -e
 +
 
 +
# --- Configuration ---
 +
# You can change the base directory where 'source/cpp' will be created.
 +
# Defaults to the current user's home directory.
 +
BASE_DIR="${HOME}"
 +
 
 +
# Number of cores to use for compilation.
 +
# Defaults to the number of available processing units.
 +
# You can override this by passing a number as the first argument to the script.
 +
# e.g., ./setup_millenniumdb.sh 8
 +
N_CORES=${1:-$(nproc)}
 +
 
 +
# --- Script Start ---
 +
echo "Starting MillenniumDB setup..."
 +
echo "Using base directory: ${BASE_DIR}"
 +
echo "----------------------------------------"
 +
 
 +
# 1. Create directory structure and clone the repository
 +
echo "Step 1: Cloning MillenniumDB repository..."
 +
mkdir -p "${BASE_DIR}/source/cpp"
 +
cd "${BASE_DIR}/source/cpp"
 +
 
 +
if [ -d "MillenniumDB" ]; then
 +
    echo "MillenniumDB directory already exists. Skipping clone."
 +
else
 +
    git clone https://github.com/MillenniumDB/MillenniumDB.git
 +
fi
 +
 
 +
cd MillenniumDB
 
export MDB_HOME=$(pwd)
 
export MDB_HOME=$(pwd)
 +
echo "Project path (MDB_HOME) set to: ${MDB_HOME}"
 +
echo "----------------------------------------"
 +
 +
 +
# 2. Download and install the required Boost version
 +
echo "Step 2: Installing Boost 1.82.0 locally for the project..."
 +
 +
# Variables for Boost
 +
BOOST_VERSION_URL="1.82.0"
 +
BOOST_VERSION_UNDERSCORE="1_82_0"
 +
BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
 +
BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
 +
BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"
 +
BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"
 +
 +
# Download in the MDB_HOME directory
 +
cd "${MDB_HOME}"
 +
 +
echo "Downloading Boost from ${BOOST_URL}..."
 +
wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"
 +
 +
echo "Extracting ${BOOST_TARBALL}..."
 +
tar -xf "${BOOST_TARBALL}"
 +
 +
echo "Creating target directory at ${BOOST_INSTALL_PATH}"
 +
mkdir -p "${BOOST_INSTALL_PATH}"
 +
 +
echo "Moving Boost headers into place..."
 +
mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"
 +
 +
echo "Cleaning up downloaded files..."
 +
rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
 +
echo "Boost installation complete."
 +
echo "----------------------------------------"
 +
 +
 +
# 3. Build the project using CMake
 +
echo "Step 3: Building MillenniumDB with CMake..."
 +
echo "This will use ${N_CORES} core(s). This may take several minutes."
 +
 +
# Go back to the repository root
 +
cd "${MDB_HOME}"
 +
 +
cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
 +
cmake --build build/Release/ -j "${N_CORES}"
 +
 +
echo "Build complete."
 +
echo "----------------------------------------"
 +
 +
 +
# 4. Verify the build
 +
echo "Step 4: Verifying the build by running the help command..."
 +
build/Release/bin/mdb help
 +
echo "----------------------------------------"
 +
 +
echo ""
 +
echo "✅ MillenniumDB setup and build completed successfully!"
 +
echo ""
 +
echo "IMPORTANT:"
 +
echo "To use 'mdb' from the command line in this terminal session, you must export MDB_HOME:"
 +
echo "  export MDB_HOME=${MDB_HOME}"
 +
echo ""
 +
echo "To make this setting permanent, add that line to your shell's startup file (e.g., ~/.bashrc or ~/.zshrc)."
 
</source>
 
</source>

Revision as of 14:21, 23 November 2025

Import

Import
edit
state  ?
url  https://wiki.bitplan.com/index.php/Wikidata_Import_2025-11-23
target  Millenium DB
start  2025-11-23
end  
days  7
os  Ubuntu 22.04.3 LTS
cpu  Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz (16 cores)
ram  512
triples  
comment  Try replicating https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB

Millenium DB

Links

Setup

Install prerequisite libraries

sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev

setup_milleniumdb.sh

#!/bin/bash

# Exit immediately if a command exits with a non-zero status.
set -e

# --- Configuration ---
# You can change the base directory where 'source/cpp' will be created.
# Defaults to the current user's home directory.
BASE_DIR="${HOME}"

# Number of cores to use for compilation.
# Defaults to the number of available processing units.
# You can override this by passing a number as the first argument to the script.
# e.g., ./setup_millenniumdb.sh 8
N_CORES=${1:-$(nproc)}

# --- Script Start ---
echo "Starting MillenniumDB setup..."
echo "Using base directory: ${BASE_DIR}"
echo "----------------------------------------"

# 1. Create directory structure and clone the repository
echo "Step 1: Cloning MillenniumDB repository..."
mkdir -p "${BASE_DIR}/source/cpp"
cd "${BASE_DIR}/source/cpp"

if [ -d "MillenniumDB" ]; then
    echo "MillenniumDB directory already exists. Skipping clone."
else
    git clone https://github.com/MillenniumDB/MillenniumDB.git
fi

cd MillenniumDB
export MDB_HOME=$(pwd)
echo "Project path (MDB_HOME) set to: ${MDB_HOME}"
echo "----------------------------------------"


# 2. Download and install the required Boost version
echo "Step 2: Installing Boost 1.82.0 locally for the project..."

# Variables for Boost
BOOST_VERSION_URL="1.82.0"
BOOST_VERSION_UNDERSCORE="1_82_0"
BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"
BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"

# Download in the MDB_HOME directory
cd "${MDB_HOME}"

echo "Downloading Boost from ${BOOST_URL}..."
wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"

echo "Extracting ${BOOST_TARBALL}..."
tar -xf "${BOOST_TARBALL}"

echo "Creating target directory at ${BOOST_INSTALL_PATH}"
mkdir -p "${BOOST_INSTALL_PATH}"

echo "Moving Boost headers into place..."
mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"

echo "Cleaning up downloaded files..."
rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
echo "Boost installation complete."
echo "----------------------------------------"


# 3. Build the project using CMake
echo "Step 3: Building MillenniumDB with CMake..."
echo "This will use ${N_CORES} core(s). This may take several minutes."

# Go back to the repository root
cd "${MDB_HOME}"

cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
cmake --build build/Release/ -j "${N_CORES}"

echo "Build complete."
echo "----------------------------------------"


# 4. Verify the build
echo "Step 4: Verifying the build by running the help command..."
build/Release/bin/mdb help
echo "----------------------------------------"

echo ""
echo "✅ MillenniumDB setup and build completed successfully!"
echo ""
echo "IMPORTANT:"
echo "To use 'mdb' from the command line in this terminal session, you must export MDB_HOME:"
echo "  export MDB_HOME=${MDB_HOME}"
echo ""
echo "To make this setting permanent, add that line to your shell's startup file (e.g., ~/.bashrc or ~/.zshrc)."