Difference between revisions of "Wiki Family"

From BITPlan Wiki
Jump to navigation Jump to search
Line 295: Line 295:
 
*/1 * * * * /srv/www/mediawiki/code/runjobs
 
*/1 * * * * /srv/www/mediawiki/code/runjobs
 
</source>
 
</source>
==== runjobs bash script ===
+
==== runjobs bash script ====
 
<source lang='bash'>
 
<source lang='bash'>
 
#!/bin/bash
 
#!/bin/bash

Revision as of 13:11, 25 November 2020

Principle

https://www.mediawiki.org/wiki/Manual:Wiki_family

Examples

Example Setup

Virtual Host based selection

The principle of this Wikifarm setup is to use a virtual host based selection of the target Wiki in the Wikifamily. Therefore each Wiki needs it's own virtual host. The simples way to achieve this is by having separate site names per wiki. So if your domain is doe.com and you want three wikis you would add

  1. wiki1.doe.com
  2. wiki2.doe.com
  3. wiki3.doe.com

To your DNS entries and make sure your http server configuration is setup accordingly. Virtual hosts can also be configured per IP address or per port. So a different setup is feasible but not as straight-forward.

Example Apache config

/etc/apache2/sites-available/wiki1.conf

# 
# Apache site wiki1
# Virtualhost wiki1.doe.com
# created 2015-11-28 08:25:54 by createsite script
<VirtualHost *:80>
  ServerAdmin webmaster@doe.com
  ServerName wiki1.doe.com 
  Redirect permanent / https://wiki1.doe.com/
</VirtualHost>
<VirtualHost *:443>
  ServerAdmin webmaster@doe.com
  ServerName wiki1.doe.com 
  include ssl.conf
  include wiki1.conf
</VirtualHost>

/etc/apache2/wiki1.conf

Some of the settings here are optional like making font-awesome available. Just comment out things you might not need.

# Apache2 configuration wiki1 for wiki1.doe.com
# created 2015-11-28 09:13:10 by createsite script
DocumentRoot /var/www/mediawiki/wiki1.doe.com
include doe_aliases.conf

#  make the font-awesome directory available unconditionally
Alias /font-awesome "/var/www/font-awesome"

# the following aliases need to be access restricted later with
# a directory setting
Alias "/images/wiki1" "/var/www/mediawiki/sites/wiki1.doe.com/images"
Alias "/images" "/var/www/mediawiki/sites/wiki1.doe.com/images"
Alias "/videos" "/var/www/mediawiki/sites/wiki1.doe.com/videos"

# this is important for security  - the .htaccess files need to be activated
# for the images and videos directories so that access is controlled
<Directory /var/www/mediawiki/sites/wiki1.doe.com>
  Options FollowSymLinks
  AllowOverride All
  Require all granted
</Directory>

# Possible values include: debug, info, notice, warn, error, crit,
# alert, emerg.
LogLevel warn
#LogLevel debug 

# the log files to use
ErrorLog  ${APACHE_LOG_DIR}/wiki1_error.log
CustomLog ${APACHE_LOG_DIR}/wiki1.log combined

Directory Layout

See the following paragraphs for examples and explanation of the parts of this layout.

Overview

Details

code

The code directory holds the common Mediawiki code. It's Localsettings.php functions as a splitter to dynamically select the LocalSettings of the virtual host that each http request targets. Please note that the virtual host enviroment already gives a security environment for the current request and might e.g. limit directory access to the necessary pair of code/site directories.

Localsettings.php (code)

<?php 
/**
 * MediaWiki wikifamily handling code
 *
 * Taken from Drupal code to tap multisite configuration.
 */
// returns true if $needle is a substring of $haystack
function contains($needle, $haystack)
{
    return strpos($haystack, $needle) !== false;
}

/**
 * Find the appropriate configuration directory for the current request
 */
function findConfiguration($confdir,$domain,$debug) {
  $host=$_SERVER['SERVER_NAME'];
  $host=str_replace(".q.",".",$host);
  if ($debug) {
     print ("find Configuration for ".$host." in ".$confdir." for domain ".$domain."<br>");
  }
  if ($host) return $host; else return NULL;
}

/**
 * optionally add the given settings
 */
function optionalRequireSettings($settings) {
  if (file_exists($settings)) {
     require_once($settings);		 
  }
}
# switch on for debuging
$debug=false;
# the main configuration directory in which all wiki farm member sites are configured
$confdir = '/var/www/mediawiki/sites';
# The Prefix to use for local/specialized configuration files
$prefix="BITPlan";
# find the configuration for the current request
$domain="bitplan.com";
$conf=findConfiguration($confdir,$domain,$debug);
#$conf=NULL;
# if we found a configuration then use it
if ($conf) {
  # make sure all farm members use the same .smw.json file
  # https://www.semantic-mediawiki.org/wiki/Help:$smwgConfigFileDir
  global $smwgConfigFileDir;
  global $wgWikiFarmSite;
  $wgWikiFarmSite=str_replace(".".$domain,"",$conf);
  # uncomment for better debugging
  if ($debug) {
    ini_set('display_errors', '1');
    ini_set('display_startup_errors', '1');
    error_reporting(E_ALL);
    $wgShowExceptionDetails=true;
  }
  $smwgConfigFileDir = $confdir;
  #
  # first use the specific LocalSettings for the site 
  $lsettings="$confdir/$conf/LocalSettings.php";
  if ($debug) {
     print ("Localsettings from ".$lsettings);
  }
  require_once( $lsettings );
  # then use the global settings for the farm 
  # if there are further specific settings for the site  use them
  $gsettings=array("$confdir/{$prefix}Settings.php","$confdir/$conf/{$prefix}Settings.php");
  foreach ($gsettings as $gsetting) {
     if (file_exists($gsetting)) {
        require_once($gsetting);		 
     }
  }
} else {
    $IP = '.';
    require_once( './includes/DefaultSettings.php' ); # used for printing the version
    require_once( './includes/NoLocalSettings.php' );
    die("wikifarm could not find configuration for $wgServer");
}
?>

worksite.php

<?php
// see https://www.mediawiki.org/wiki/Manual:Wiki_family#Updating_wikifarm_from_the_commandline
// show an error and die
function error($msg) {
  die($msg);
}

// check command line arguments
if (count($argv)<3) {
  error("usage: worksite [sitename] [update|runJobs|debug|updateNameSpaceCache]");
}

//$argv gets commandline arguments
$siteid= $argv[1]; // siteid e.g. test
$work  = $argv[2]; // work to do e.g. update

// set domain name
#$site  = $siteid. '.bitplan.com';
$site  = $siteid;
$_SERVER['SERVER_NAME'] = $site;
$_SERVER['HTTP_HOST'] = $site;
$_MWCONFIGFILE="/var/www/mediawiki/sites/$site/FullLocalSettings.php";
define( 'MW_CONFIG_FILE', $_MWCONFIGFILE); 
echo '--------------------------------------
Running '.$work.'.php for ' . $site . '
--------------------------------------
';
switch ($work) {
case "debug":
  echo "$work in progess ...";
  echo "MWCONFIGFILE: ".$_MWCONFIGFILE;
  #include($_MWCONFIGFILE);
  global $cargoavailable;
  echo "Cargo:".$cargoavailable;
  break;
case "updateEntityCountMap":
  include("extensions/SemanticMediaWiki/maintenance/updateEntityCountMap.php");
  break;
case "updateNameSpaceCache":
  #$language=
  #$namespaces=$language->getNamespaces();
  #foreach ($namespace ...)
  break;
default: 
  include("maintenance/$work.php");
}

?>

runjobs

crontab entry

# runjobs for multiple sites e.g. rq,wiki ... 
*/1 * * * * /srv/www/mediawiki/code/runjobs

runjobs bash script

#!/bin/bash
# WF 2015-06-08
# run jobs for all sites
jobs=$(pgrep -fla runjobs | wc -l)
if [ $jobs -gt 3 ]
then
  echo "$jobs runjobs already running ..."
  exit 1
fi
cd /srv/www/mediawiki/code
host=$(hostname)
cat $host.sites | egrep -v "^#"  | while read siteid site
do
  #./update $siteid
  #echo $siteid
  #echo $site
  /usr/bin/php /srv/www/mediawiki/code/worksite.php $site runJobs \
  --conf /srv/www/mediawiki/sites/$site/FullLocalSettings.php >> /var/log/mediawiki/runJobs_$siteid.log 2>&1
done

index.php

<?php
#
# Landing page for wikifarm servers e.g. merkur.bitplan.com
#
# displays wiki farm entries
# 
# WF 2020-10-01
#
#

  global $domain;
  $domain="bitplan.com";
  global $hostname;
  $hostname=gethostname();
  global $isBackup;
  $isBackup=true;
  $server='q';
  $serverLogo="https://upload.wikimedia.org/wikipedia/commons/thumb/6/65/Desmond_Llewelyn_01.jpg/330px-Desmond_Llewelyn_01.jpg";
  $gitlogo="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png";

  function getLink($pLink,$ref) {
	  return "<a href='".$pLink."'>".$ref."</a>";
  }
  /**
   * return a link to the given site
   */
  function siteLink($site,$ref) {
    global $domain;
    global $hostname;
    global $isBackup;
    $prefix="";
    if ($isBackup) {
      $prefix=".".$hostname;
    }
	  return getLink("http://".$site.$prefix.".".$domain,$ref);
  }
  /**
   *  https://stackoverflow.com/a/10663661/1497139
   *  grep for the given pattern in the given file
   */
  function fileGrep($file,$pattern) {
     if (! file_exists($file)) return "$file does not exist";
     $file = file_get_contents ($file);
     preg_match_all ($pattern, $file, $matches);
     return $matches;
  }

  function getMatch($matches) {
    $result="?";
    if (is_array($matches) && count($matches)==2) {
      $match1=$matches[1];
      if (count($match1)>0)
         $result=$match1[0];
    }
    return $result;
  }

  /**
   * get the setting for the given variable
   */
  function getSetting($lsettings,$variable) {
    $pattern='/[^#]*\$'.$variable.'\s*=\s*"(.*)"/';
    $matches=fileGrep($lsettings,$pattern);
    $settings=getMatch($matches);
    return $settings;
  }

  /**
   * check the webpage with the given url
   * for the given regexp pattern
   */
  function checkPage($url,$pattern) {
    # only do check if there is an url option
    $state="";
    if (isset($_GET['state'])) {
       $file_headers = @get_headers($url);
       #print_r($file_headers);
       if(!$file_headers || $file_headers[0] == 'HTTP/1.1 404 Not Found') {
         return "❌"; 
      }
      $page = file_get_contents($url);
      preg_match_all ($pattern, $page, $matches);
      $pages=getMatch($matches);
      if ($pages=="?") {
        $state="?";
      } else {
        #var_dump($matches);
        $state=$pages." ✅";
      }
    }
    return getLink($url,"stats")."&nbsp;".$state;
  }

  function tag($tag,$content,$attr="") {
    return "<".$tag.$attr.">".$content."</".$tag.">";
  }

  function td($content,$attr="") {
    return tag("td",$content,$attr);
  }

  # set to true debugging
  $debug=true;
  if ($debug) {
    ini_set('display_errors', '1');
    ini_set('display_startup_errors', '1');
    error_reporting(E_ALL);
    $wgShowExceptionDetails=true;
  }
?>
<html>
	<body>
    <h1>Welcome to <?= $hostname ?></h1>
    <img height='150' src='https://upload.wikimedia.org/wikipedia/commons/a/af/Tux.png'/>
    <img height='150' src='<?= $serverLogo ?>'/>
<br>
<?php
  print (getLink("?state=true","wikifarm with stats")."<br>");
  print (getLink("?nostats","wikifarm without stats"));
?>
	 <h2>Wiki Farm</h2>
   <table>
   <tr><th>#</th><th>site</th><th>logo</th><th>database</th><th>size MB</th><th>state</th></tr>
<?php 
  $sitedir="/var/www/mediawiki/sites";
  #$dirIterator = new DirectoryIterator($sitedir);
  $index=0;
  #foreach ($dirIterator as $fileinfo) {
  #  if ($fileinfo->isDir()) {
  #    if (!$fileinfo->isDot()) {
  #      $fileName=$fileinfo->getFilename();
  foreach (scandir($sitedir) as $fileName) {
        if (strpos($fileName,$domain) !== false) {
          $site=$fileName;
          $currentSiteDir=$sitedir."/".$site;
          $lsettings=$currentSiteDir."/LocalSettings.php";
          $logo=getSetting($lsettings,'wgLogo');
          $database=getSetting($lsettings,'wgDBname');
          $sqldump="/var/backup/sqlbackup/today/".$database."_full.sql";
          $dbsize=round(filesize($sqldump)/1024/1024);
          $siteid=str_replace(".".$domain,"",$fileName);
          $index++;
          $siteurl="http://".$siteid.".$server.".$domain;
          $img="<img height='80px' src='".$siteurl.$logo."'>";
          $statsurl=$siteurl."/index.php/Special:Statistics";
          $webstate=checkPage($statsurl,'/[Content pages|Inhaltsseiten]..a...td..td class="mw-statistics-numbers".([^<]*)/');
          $gitimage="";
          if (file_exists($currentSiteDir."/.git")) {
             $gitimage="<img width='32px' src='".$gitlogo."'/>";
          }
          $lockstate="";
          if (file_exists($currentSiteDir."/images/.htaccess")) {
            $lockstate="🔒";
          }
          $webstate=$lockstate.$gitimage.$webstate;
          print ("<tr>".td($index).td(siteLink($siteid,$siteid)).td(siteLink($siteid,$img)).td($database).td($dbsize," align='right'").td($webstate)."</tr>");
        }
    #  }
    #}
	}
?>
	  </table>	  
	</body>
</html>