Difference between revisions of "Wikidata Import 2025-11-23"
Jump to navigation
Jump to search
(→Setup) |
|||
| Line 28: | Line 28: | ||
sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev | sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-dev | ||
</source> | </source> | ||
| − | === | + | === setup_milleniumdb.sh === |
<source lang='bash'> | <source lang='bash'> | ||
| − | mkdir -p source/cpp | + | #!/bin/bash |
| − | cd source/cpp | + | |
| − | git clone https://github.com/MillenniumDB/MillenniumDB | + | # Exit immediately if a command exits with a non-zero status. |
| − | + | set -e | |
| + | |||
| + | # --- Configuration --- | ||
| + | # You can change the base directory where 'source/cpp' will be created. | ||
| + | # Defaults to the current user's home directory. | ||
| + | BASE_DIR="${HOME}" | ||
| + | |||
| + | # Number of cores to use for compilation. | ||
| + | # Defaults to the number of available processing units. | ||
| + | # You can override this by passing a number as the first argument to the script. | ||
| + | # e.g., ./setup_millenniumdb.sh 8 | ||
| + | N_CORES=${1:-$(nproc)} | ||
| + | |||
| + | # --- Script Start --- | ||
| + | echo "Starting MillenniumDB setup..." | ||
| + | echo "Using base directory: ${BASE_DIR}" | ||
| + | echo "----------------------------------------" | ||
| + | |||
| + | # 1. Create directory structure and clone the repository | ||
| + | echo "Step 1: Cloning MillenniumDB repository..." | ||
| + | mkdir -p "${BASE_DIR}/source/cpp" | ||
| + | cd "${BASE_DIR}/source/cpp" | ||
| + | |||
| + | if [ -d "MillenniumDB" ]; then | ||
| + | echo "MillenniumDB directory already exists. Skipping clone." | ||
| + | else | ||
| + | git clone https://github.com/MillenniumDB/MillenniumDB.git | ||
| + | fi | ||
| + | |||
| + | cd MillenniumDB | ||
export MDB_HOME=$(pwd) | export MDB_HOME=$(pwd) | ||
| + | echo "Project path (MDB_HOME) set to: ${MDB_HOME}" | ||
| + | echo "----------------------------------------" | ||
| + | |||
| + | |||
| + | # 2. Download and install the required Boost version | ||
| + | echo "Step 2: Installing Boost 1.82.0 locally for the project..." | ||
| + | |||
| + | # Variables for Boost | ||
| + | BOOST_VERSION_URL="1.82.0" | ||
| + | BOOST_VERSION_UNDERSCORE="1_82_0" | ||
| + | BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}" | ||
| + | BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz" | ||
| + | BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}" | ||
| + | BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include" | ||
| + | |||
| + | # Download in the MDB_HOME directory | ||
| + | cd "${MDB_HOME}" | ||
| + | |||
| + | echo "Downloading Boost from ${BOOST_URL}..." | ||
| + | wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}" | ||
| + | |||
| + | echo "Extracting ${BOOST_TARBALL}..." | ||
| + | tar -xf "${BOOST_TARBALL}" | ||
| + | |||
| + | echo "Creating target directory at ${BOOST_INSTALL_PATH}" | ||
| + | mkdir -p "${BOOST_INSTALL_PATH}" | ||
| + | |||
| + | echo "Moving Boost headers into place..." | ||
| + | mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/" | ||
| + | |||
| + | echo "Cleaning up downloaded files..." | ||
| + | rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}" | ||
| + | echo "Boost installation complete." | ||
| + | echo "----------------------------------------" | ||
| + | |||
| + | |||
| + | # 3. Build the project using CMake | ||
| + | echo "Step 3: Building MillenniumDB with CMake..." | ||
| + | echo "This will use ${N_CORES} core(s). This may take several minutes." | ||
| + | |||
| + | # Go back to the repository root | ||
| + | cd "${MDB_HOME}" | ||
| + | |||
| + | cmake -B build/Release -D CMAKE_BUILD_TYPE=Release | ||
| + | cmake --build build/Release/ -j "${N_CORES}" | ||
| + | |||
| + | echo "Build complete." | ||
| + | echo "----------------------------------------" | ||
| + | |||
| + | |||
| + | # 4. Verify the build | ||
| + | echo "Step 4: Verifying the build by running the help command..." | ||
| + | build/Release/bin/mdb help | ||
| + | echo "----------------------------------------" | ||
| + | |||
| + | echo "" | ||
| + | echo "✅ MillenniumDB setup and build completed successfully!" | ||
| + | echo "" | ||
| + | echo "IMPORTANT:" | ||
| + | echo "To use 'mdb' from the command line in this terminal session, you must export MDB_HOME:" | ||
| + | echo " export MDB_HOME=${MDB_HOME}" | ||
| + | echo "" | ||
| + | echo "To make this setting permanent, add that line to your shell's startup file (e.g., ~/.bashrc or ~/.zshrc)." | ||
</source> | </source> | ||
Revision as of 14:21, 23 November 2025
Import
| Import | |
|---|---|
| state | ? |
| url | https://wiki.bitplan.com/index.php/Wikidata_Import_2025-11-23 |
| target | Millenium DB |
| start | 2025-11-23 |
| end | |
| days | 7 |
| os | Ubuntu 22.04.3 LTS |
| cpu | Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz (16 cores) |
| ram | 512 |
| triples | |
| comment | Try replicating https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB |
Millenium DB
Links
- https://github.com/MillenniumDB/MillenniumDB
- https://github.com/MillenniumDB/MillenniumDB/wiki
- https://github.com/MillenniumDB/MillenniumDB/wiki/Setup
- https://www.wikidata.org/wiki/Wikidata:Scaling_Wikidata/Benchmarking/MilleniumDB
- https://www.wikidata.org/wiki/Q118954995
Setup
Install prerequisite libraries
sudo apt update && sudo apt install git g++ cmake libssl-dev libncurses-dev less python3 python3-venv libicu-devsetup_milleniumdb.sh
#!/bin/bash
# Exit immediately if a command exits with a non-zero status.
set -e
# --- Configuration ---
# You can change the base directory where 'source/cpp' will be created.
# Defaults to the current user's home directory.
BASE_DIR="${HOME}"
# Number of cores to use for compilation.
# Defaults to the number of available processing units.
# You can override this by passing a number as the first argument to the script.
# e.g., ./setup_millenniumdb.sh 8
N_CORES=${1:-$(nproc)}
# --- Script Start ---
echo "Starting MillenniumDB setup..."
echo "Using base directory: ${BASE_DIR}"
echo "----------------------------------------"
# 1. Create directory structure and clone the repository
echo "Step 1: Cloning MillenniumDB repository..."
mkdir -p "${BASE_DIR}/source/cpp"
cd "${BASE_DIR}/source/cpp"
if [ -d "MillenniumDB" ]; then
echo "MillenniumDB directory already exists. Skipping clone."
else
git clone https://github.com/MillenniumDB/MillenniumDB.git
fi
cd MillenniumDB
export MDB_HOME=$(pwd)
echo "Project path (MDB_HOME) set to: ${MDB_HOME}"
echo "----------------------------------------"
# 2. Download and install the required Boost version
echo "Step 2: Installing Boost 1.82.0 locally for the project..."
# Variables for Boost
BOOST_VERSION_URL="1.82.0"
BOOST_VERSION_UNDERSCORE="1_82_0"
BOOST_DIR_NAME="boost_${BOOST_VERSION_UNDERSCORE}"
BOOST_TARBALL="${BOOST_DIR_NAME}.tar.gz"
BOOST_URL="https://archives.boost.io/release/${BOOST_VERSION_URL}/source/${BOOST_TARBALL}"
BOOST_INSTALL_PATH="${MDB_HOME}/third_party/boost_1_82/include"
# Download in the MDB_HOME directory
cd "${MDB_HOME}"
echo "Downloading Boost from ${BOOST_URL}..."
wget -q --show-progress -O "${BOOST_TARBALL}" "${BOOST_URL}"
echo "Extracting ${BOOST_TARBALL}..."
tar -xf "${BOOST_TARBALL}"
echo "Creating target directory at ${BOOST_INSTALL_PATH}"
mkdir -p "${BOOST_INSTALL_PATH}"
echo "Moving Boost headers into place..."
mv "${BOOST_DIR_NAME}/boost" "${BOOST_INSTALL_PATH}/"
echo "Cleaning up downloaded files..."
rm -r "${BOOST_TARBALL}" "${BOOST_DIR_NAME}"
echo "Boost installation complete."
echo "----------------------------------------"
# 3. Build the project using CMake
echo "Step 3: Building MillenniumDB with CMake..."
echo "This will use ${N_CORES} core(s). This may take several minutes."
# Go back to the repository root
cd "${MDB_HOME}"
cmake -B build/Release -D CMAKE_BUILD_TYPE=Release
cmake --build build/Release/ -j "${N_CORES}"
echo "Build complete."
echo "----------------------------------------"
# 4. Verify the build
echo "Step 4: Verifying the build by running the help command..."
build/Release/bin/mdb help
echo "----------------------------------------"
echo ""
echo "✅ MillenniumDB setup and build completed successfully!"
echo ""
echo "IMPORTANT:"
echo "To use 'mdb' from the command line in this terminal session, you must export MDB_HOME:"
echo " export MDB_HOME=${MDB_HOME}"
echo ""
echo "To make this setting permanent, add that line to your shell's startup file (e.g., ~/.bashrc or ~/.zshrc)."