Commit a58d2faeafe570e74f906ff571f0afde5a837f17

Authored by nbm
1 parent bfb5992e

Script that detects inconsistencies between the KT repository and the

stored documents on disk (using KTOnDiskPathStorageManager only, for
now).


git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@3606 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing 1 changed file with 166 additions and 0 deletions
bin/cleanup.php 0 → 100644
  1 +<?php
  2 +
  3 +require_once('../config/dmsDefaults.php');
  4 +require_once(KT_LIB_DIR . '/config/config.inc.php');
  5 +require_once(KT_LIB_DIR . '/browse/browseutil.inc.php');
  6 +
  7 +$oConfig =& KTConfig::getSingleton();
  8 +$fsPath = $oConfig->get('urls/documentRoot');
  9 +
  10 +$aIgnore = array(
  11 + '.', '..',
  12 + 'CVS',
  13 + '.empty',
  14 + '.htaccess',
  15 + '.cvsignore',
  16 +);
  17 +
  18 +$aFoldersToRemove = array();
  19 +$aFilesToRemove = array();
  20 +$aRepoDocumentProblems = array();
  21 +$aRepoFolderProblems = array();
  22 +$aRepoVersionProblems = array();
  23 +
  24 +function checkFileVersion($path, $version) {
  25 + $fod = KTBrowseUtil::folderOrDocument($path);
  26 + if ($fod === false) {
  27 + // No document by that name, so no point checking version
  28 + // information.
  29 + return;
  30 + }
  31 + return true;
  32 +}
  33 +
  34 +function checkFile($path, $first = true) {
  35 + $pattern = "/^(.*)-((?:\d+)\.(?:\d+))$/";
  36 + if (preg_match($pattern, $path, $matches)) {
  37 + if (checkFileVersion($matches[1], $matches[2])) {
  38 + // If it's a version, then don't check for full path
  39 + // below...
  40 + return;
  41 + }
  42 + }
  43 + $fod = KTBrowseUtil::folderOrDocument($path);
  44 + if ($fod === false) {
  45 + $GLOBALS["aFilesToRemove"][] = $path;
  46 + return;
  47 + }
  48 +}
  49 +
  50 +function checkDirectory($path) {
  51 + global $fsPath, $aIgnore;
  52 + $fullpath = sprintf("%s/%s", $fsPath, $path);
  53 +
  54 + if (!is_dir($fullpath)) {
  55 + print "Not a directory: $fullpath\n";
  56 + }
  57 +
  58 + if ($path === '/Deleted') {
  59 + // Deleted files handled separately.
  60 + return;
  61 + }
  62 +
  63 + if (!empty($path)) {
  64 + $fod = KTBrowseUtil::folderOrDocument($path);
  65 + if ($fod === false) {
  66 + $GLOBALS["aFoldersToRemove"][] = $path;
  67 + return;
  68 + }
  69 + }
  70 +
  71 + $dh = @opendir($fullpath);
  72 + if ($dh === false) {
  73 + print "Could not open directory: $fullpath\n";
  74 + }
  75 + while (($filename = readdir($dh)) !== false) {
  76 + if (in_array($filename, $aIgnore)) { continue; }
  77 + $subrelpath = sprintf("%s/%s", $path, $filename);
  78 + $subfullpath = sprintf("%s/%s", $fsPath, $subrelpath);
  79 + if (is_dir($subfullpath)) {
  80 + checkDirectory($subrelpath);
  81 + }
  82 + if (is_file($subfullpath)) {
  83 + checkFile($subrelpath);
  84 + }
  85 + }
  86 +}
  87 +
  88 +function checkRepoFolder($oFolder) {
  89 + global $fsPath, $aRepoFolderProblems;
  90 + $sFolderPath = sprintf("%s/%s", $oFolder->getFullPath(), $oFolder->getName());
  91 + $sFullPath = sprintf("%s/%s", $fsPath, $sFolderPath);
  92 + if (!is_dir($sFullPath)) {
  93 + $aRepoFolderProblems[] = $sFolderPath;
  94 + }
  95 +}
  96 +
  97 +function checkRepoDocument($oDocument) {
  98 + global $fsPath, $aRepoDocumentProblems;
  99 + $sDocumentPath = $oDocument->getStoragePath();
  100 + $sFullPath = sprintf("%s/%s", $fsPath, $sDocumentPath);
  101 + if (!is_file($sFullPath)) {
  102 + $aRepoDocumentProblems[] = $sDocumentPath;
  103 + }
  104 + checkRepoVersions($oDocument);
  105 +}
  106 +
  107 +function checkRepoVersions($oDocument) {
  108 + global $fsPath, $aRepoVersionProblems;
  109 + $table = "document_transactions";
  110 + $aVersions = DBUtil::getResultArrayKey(array("SELECT DISTINCT version FROM $table WHERE document_id = ?", array($oDocument->getID())), "version");
  111 + foreach($aVersions as $sVersion) {
  112 + if ($sVersion == $oDocument->getVersion()) {
  113 + continue;
  114 + }
  115 + $sDocumentPath = $oDocument->getStoragePath();
  116 + $sFullPath = sprintf("%s/%s-%s", $fsPath, $sDocumentPath, $sVersion);
  117 + if (!is_file($sFullPath)) {
  118 + $aRepoVersionProblems[] = array($sDocumentPath, $sVersion);
  119 + continue;
  120 + }
  121 + }
  122 +}
  123 +
  124 +checkDirectory("");
  125 +
  126 +print "\n";
  127 +print "Would remove these folders (and all their contents):\n";
  128 +foreach ($aFoldersToRemove as $path) {
  129 + print "\t$path\n";
  130 +}
  131 +print "\n";
  132 +print "Would remove these files:\n";
  133 +foreach ($aFilesToRemove as $path) {
  134 + print "\t$path\n";
  135 +}
  136 +print "\n";
  137 +
  138 +$aFolders =& Folder::getList();
  139 +foreach ($aFolders as $oFolder) {
  140 + checkRepoFolder($oFolder);
  141 +}
  142 +
  143 +print "These folders are not on the filesystem:\n";
  144 +foreach ($aRepoFolderProblems as $path) {
  145 + print "\t$path\n";
  146 +}
  147 +
  148 +$aDocuments =& Document::getList(array("status_id = ?", array(LIVE)));
  149 +foreach ($aDocuments as $oDocument) {
  150 + checkRepoDocument($oDocument);
  151 +}
  152 +print "\n";
  153 +
  154 +print "These documents are not on the filesystem:\n";
  155 +foreach ($aRepoDocumentProblems as $path) {
  156 + print "\t$path\n";
  157 +}
  158 +print "\n";
  159 +
  160 +print "These documents have versions not on the filesystem:\n";
  161 +foreach ($aRepoVersionProblems as $path) {
  162 + list($path, $version) = $path;
  163 + print "\t$path - version $version\n";
  164 +}
  165 +print "\n";
  166 +