Posted by mikeytown2 on July 19, 2012 at 12:09am
We had an issue where our NFS drive ran out of space and nodes where being created with bad file data. This occurred on 2012/5/21. We needed a way to find all nodes that have bad file data. Below is that solution.
Note that this script will add indexes for the fid to CCK tables. If not doing this, the big query never finished.
Hope someone finds this useful :)
<?php
$joins = array();
$selects = array();
$indexes = array();
foreach (content_types() as $node_type => $info) {
foreach ($info['fields'] as $field_name => $field) {
// Skip if not a filefield.
if ($field['type'] != 'filefield') {
continue;
}
// Get db info from CCK.
$db_info = content_database_info($field);
$alias = $db_info['table'];
$column = $db_info['columns']['fid']['column'];
// Add a,b,c to the alias name if table already used.
$character = 97; // 97 = a.
while (isset($joins[$alias])) {
$alias .= chr($character);
$character++;
}
// Generate the joins and the selects.
$joins[$alias] = 'LEFT JOIN {' . $db_info['table'] . '} AS ' . $alias . ' ON ' . $alias . '.' . $column . ' = files.fid';
$selects[$alias] = $alias . '.nid';
// Generate the database index queries.
if (strpos($db_info['table'], 'content_field') === 0) {
$index = 'fid';
}
else {
$index = substr($column, 6);
}
$indexes[$alias] = 'ALTER TABLE {' . $db_info['table'] . '} ADD INDEX ' . $index . ' ( ' . $column . ' ) ';
}
}
// Add indexes to database so big query will run.
foreach ($indexes as $sql) {
db_query($sql);
}
// 2012/5/20 - 2012/5/22
$start = 1337472000;
$end = 1337644800;
// Generate large query.
$query = "
SELECT
COALESCE (" . implode(", ", $selects) . ") as nid,
files.filepath,
files.filemime
FROM files
" . implode("\n", $joins) . "
WHERE COALESCE (" . implode(", ", $selects) . ") IS NOT NULL
AND files.timestamp BETWEEN " . $start . " AND " . $end;
// Run query
$query = db_query($query);
$bad_nodes = array();
$count = 0;
while ($row = db_fetch_array($query)) {
if (!isset($bad_nodes[$row['nid']])) {
$count++;
}
// Make sure file exists and filesize if greater than 0.
if (file_exists($row['filepath']) && filesize($row['filepath']) > 0) {
// If file is an image, make sure it is not corrupted
if (strpos($row['filemime'], 'image/') !== FALSE) {
if (getimagesize($row['filepath'])) {
continue;
}
}
// If file is not an image assume it is good.
else {
continue;
}
}
// Add image to the bad nodes list.
$bad_nodes[$row['nid']][] = $row['filepath'];
}
// Output results.
echo count($bad_nodes) . ' out of ' . $count . ' nodes have bad image data ' . "<br />\n";
echo my_print($bad_nodes);
// Function taken from drupal.org/project/httprl
// httprl_pr()
/**
* Pretty print data.
*
* @param $data
* Data In.
* @return
* Human readable HTML version of the data.
*/
function my_print($data) {
// Get extra arguments passed in.
$data = func_get_args();
if (count($data) == 1) {
$data = array_pop($data);
}
// Remove non UTF-8 Characters, escape HTML markup, remove extra new lines.
$output = array_filter(explode("\n", htmlentities(iconv('utf-8', 'utf-8//IGNORE', print_r($data, TRUE)), ENT_QUOTES, 'UTF-8')));
// Whitespace compression.
foreach ($output as $key => $value) {
if (str_replace(' ', '', $value) == "(") {
$output[$key-1] .= ' (';
unset($output[$key]);
}
}
// Replace whitespace with html markup.
$output = str_replace(' ', ' ', nl2br(implode("\n", $output))) . '<br />';
return $output;
}
?>