QLever/script

From BITPlan Wiki
Revision as of 08:17, 23 May 2022 by Wf (talk | contribs) (→‎qlever)
Jump to navigation Jump to search

The script below is deprecated. See https://github.com/ad-freiburg/qlever-control for the "official" replacement.

This is a script for getting started with QLever along the lines of the Quickstart description

see

usage

usage: ./qlever [-h|--help|...]
  -h|--help: show this usage
  -aw|--all_wikidata: run all steps for wikidata version,env,pull,download and index
  -b|--build: build qlever docker image
  -p|--pull: pull qlever docker image
  --port <port> port to server endpoint from, default: 7001
  -s|--server: start SPARQL server
  -c|--clone: clone qlever
  -e|--env: show, check and modify environment
  -v|--version: show version of this script
  -wd|--wikidata_download: download wikidata data dump
  -wi|--wikidata_index: download wikidata data dump

This helper script simplifies the access to the steps outlined in https://github.com/ad-freiburg/qlever/blob/master/docs/quickstart.md

qleverauto

#!/bin/bash
#
# a script for getting started with QLever and automatic tasks for
# it 
#
# see https://github.com/ad-freiburg/qlever/blob/master/docs/quickstart.md
# see https://wiki.bitplan.com/index.php/QLever
# see https://github.com/ad-freiburg/qlever-control for the
#   official qlever control script
#
#
# WF 2022-01-28
#

# we assume the script is started from the QLEVER_HOME directory
export QLEVER_HOME=$(pwd)
dockerimage="qlever"
port=7001
version="$Revision: 1.29 $"
versionDate="$Date: 2022/05/23 06:15:28 $"

startTime=0
finishTime=0

#ansi colors
#http://www.csc.uvic.ca/~sae/seng265/fall04/tips/s265s047-tips/bash-using-colors.html
blue='\033[0;34m'
red='\033[0;31m'
green='\033[0;32m' # '\e[1;32m' is too bright for white bg.
endColor='\033[0m'

#
# a colored message
#   params:
#     1: l_color - the color of the message
#     2: l_msg - the message to display
#
color_msg() {
  local l_color="$1"
  local l_msg="$2"
  echo -e "${l_color}$l_msg${endColor}"
}

#
# error
#
#   show an error message and exit
#
#   params:
#     1: l_msg - the message to display
error() {
  local l_msg="$1"
  # use ansi red for error
  color_msg $red "Error: $l_msg" 1>&2
  exit 1
}

#
# show the usage
#
usage() {
  echo "usage: $0 [-h|--help|...]"
  echo "  -h|--help: show this usage"
  echo "  -aw|--all_wikidata: run all steps for wikidata version,env,pull,download and index"
  echo "  -b|--build: build qlever docker image"
  echo "  -p|--pull: pull qlever docker image"
  echo "  --port <port> port to server endpoint from, default: $port"
  echo "  -s|--server: start SPARQL server"
  echo "  -c|--clone: clone qlever"
  echo "  -e|--env: show, check and modify environment"
  echo "  -v|--version: show version of this script"
  echo "  -wd|--wikidata_download: download wikidata data dump"
  echo "  -wi|--wikidata_index: build the index for the  wikidata data dump"
  echo ""
  echo "This helper script simplifies the access to the steps outlined in https://github.com/ad-freiburg/qlever/blob/master/docs/quickstart.md"
}

#
# show the start of an action
#
show_timing() {
  local l_action="$1"
  local l_state="$2"
  now=$(date)
  case $l_state in
    started)
       startTime=$SECONDS
       after=""
       ;;
    finished)
      finishTime=$SECONDS
      local l_duration=$(( $finishTime - $startTime ))
      after=" after $l_duration seconds"
      ;;
  esac
  color_msg $blue "$l_action $l_state at $now$after"
}

#
# show the version of this script
#
show_version() {
  local l_script=$(basename $0)
  color_msg $blue "$l_script version $version $versionDate"
}

#
# check whether program is installed
#
#  #1: l_prog - the program to check
#
check_installed() {
  local l_prog="$1"
  local l_installed="✅"
  local l_color=$green
  local l_progbin=$(which $l_prog)
  which $l_prog > /dev/null 
  if [ $? -ne 0 ]
  then
    l_installed="❌"
    l_color=$red
  fi
  color_msg $l_color "$l_prog$l_progbin $l_installed"
}

#
# show and modify the environment
#
show_env() {
  local l_progs="docker top df jq"
  case $(uname -a) in
    Darwin*)
      l_progs="$l_progs sw_vers"
      ;;
    *) 
      l_progs="$l_progs lsb_release free"
     ;;
  esac
  color_msg $blue "needed software"
  for l_prog in $l_progs
  do
    check_installed $l_prog
  done
  color_msg $blue "operating system"
  local l_disks="/dev/s"
  case $(uname -a) in
    Darwin*)
      l_disks="/dev/disk"
      sw_vers;;
    *)
      lsb_release -a
  esac
  color_msg $blue "docker version"
  docker --version
  color_msg $blue "memory"
  case $(uname -a) in
    Darwin*)
      top -l 1 | grep PhysMem | cut -f1,2 -d" "
      ;;
    *) free -h
      ;;
  esac
  color_msg $blue "diskspace"
  df -h | grep $l_disks
  ulimit -Sn 1048576
  color_msg $blue "soft ulimit for files"
  ulimit -Sn
}

#
# check whether the given process runs and kill it if yes
# param #1: l_process  - the process to check
#
killIfRunning() {
  local l_process="$1"
  pgrep -fl "$l_process"
  local l_presult=$?
  if [ $l_presult -eq 0 ]
  then
    color_msg "$l_process already running"
    # comment out as you like
    # either stop here
    #echo "to kill the process you might want to use"
    #echo "pkill -f $l_process"
    #exit 1
    # or fully automatic kill
    color_msg "killing $l_process"
    pkill -f "$l_process"
  fi
}
#
# kill a running qlever process
# 
qlever_kill() {
  killIfRunning qlever
}

#
# pull the qlever image
#
qlever_pull() {
  dockerimage="adfreiburg/qlever"
  show_timing "pulling qlever docker image" "started"
  docker pull $dockerimage
  show_timing "pulling qlever docker image" "finished"
}

#
# start the SPARQL server
#
qlever_start() {
  local l_port="$1"
  docker run --rm -v $QLEVER_HOME/qlever-indices/wikidata:/index \
      -p $l_port:7001 -e INDEX_PREFIX=wikidata --name qlever.wikidata $dockerimage 
}

#
# clone the qlever code
#
qlever_clone() {
  cd $QLEVER_HOME
  if [ ! -d qlever-code ]
  then
    color_msg $blue "cloning qlever - please wait typically 1 min ..."
    show_timing "cloning qlever" "started"
    git clone --recursive https://github.com/ad-freiburg/qlever qlever-code
    show_timing "cloning qlever" "finished"
  else
    color_msg $green "clone of clever-code already available"
  fi
}

#
# build the docker image
#
qlever_build() {
  #docker images | grep qlever
  #if [ $? -ne 0 ]
  #then
     cd $QLEVER_HOME/qlever-code
     color_msg $blue "building qlever - please wait typically 15 min ..."
     show_timing "docker build" "started"
     # docker build -t qlever .
     docker build --file Dockerfiles/Dockerfile.Ubuntu20.04 -t qlever .
     show_timing "docker build" "finished"
  #else
  #  color_msg $green "qlever image already build"
  #fi
}

#
# generic download
#
# params
#   1: title of the download
#   2: expected time
#   3: target directory
#   4: file expected
#   5: url to download from
#
download() {
  local l_title="$1"
  local l_expected="$2"
  local l_target="$3"
  local l_file="$4"
  local l_url="$5"
  # check if file already exists
  cd $l_target
  if [ -f $l_file ]
  then 
    color_msg $green "$l_title:$l_file already downloaded"
  else 
    color_msg $blue "downloading $l_title:$l_file ... please wait typically $l_expected ..."
    show_timing "$l_title download" "started"
    wget $l_url
    show_timing "$l_title download" "finished"
  fi
}

#
# wikidata config copy 
#
wikidata_copyconfig() {
  cd $QLEVER_HOME
  target=qlever-indices/wikidata
  if [ ! -d $target ]
  then
     color_msg $blue "creating $target"
     mkdir -p $target
  else
     color_msg $green "$target already exists"
  fi
  cd $target
  config=wikidata.settings.json
  configpath=$QLEVER_HOME/qlever-code/examples/$config
  if [ ! -f $config ]
  then
    color_msg $blue "copying config file $configpath to $target"
    cp -p $configpath .
  else
    color_msg $green "$config already copied to $target"
  fi
}

#
# wikidata download
#
wikidata_download() {
  local l_base=https://dumps.wikimedia.org/wikidatawiki/entities/
  local l_dump=latest-all.ttl.bz2
  local l_lexemes=latest-lexemes.ttl.bz2
  #wikidata_copyconfig
  target=$QLEVER_HOME/wikidata
  download "wikidata lexemes" "3min" $target $l_lexemes $l_base/$l_lexemes
  download "wikidata dump" "6hours" $target $l_dump $l_base/$l_dump
}

#
# build the wikidata index
#
wikidata_index() {
   cd $QLEVER_HOME/wikidata
   chmod o+w .
   show_timing "creating wikidata index" "started"
#   docker run -i --rm -v $QLEVER_HOME/qlever-indices/wikidata:/index --entrypoint bash $dockerimage  -c "cd /index && bzcat latest-all.ttl.bz2 latest-lexemes.ttl.bz2 | IndexBuilderMain -F ttl -f - -l -i wikidata -s wikidata.settings.json | tee wikidata.index-log.txt"
   . ../qlever-control/qlever
   check_installed IndexBuilderMain
   qlever index
   show_timing "creating wikidata index" "finished"
}

# commandline options according to usage
while [  "$1" != ""  ]
do
  option=$1
  shift
  case $option in
    -h|--help)
       usage;;
    -aw|--all_wikidata)
      show_version
      show_env
      qlever_clone
      #qlever_build
      qlever_pull
      wikidata_download
      wikidata_index
      ;;
    -b|--build)
       qlever_build
       ;;
    -c|--clone)
       qlever_clone
       ;;
    -e|--env)
       show_env
       ;;
    -k|--kill)
       qlever_kill
       ;;
    -p|--pull)
       qlever_pull
       ;;
    --port)
       if [ $# -lt 1 ]
       then
         usage
       fi
       port=$1
       shift
       ;;
    -s|--server)
       qlever_start $port
       ;;
    -wd|--wikidata_download)
       wikidata_download
       ;;
    -wi|--wikidata_index)
       wikidata_index
       ;;
    -v|--version)
       show_version
       ;;
    -t)
      show_timing "testing" "started"
      sleep 2
      show_timing "testing" "finished"
     ;;
  esac
done