#!/bin/sh
# Small script that walks through a directory
# $base_dir where I have located gzipped backups of webserver log files
# sorted by months
# The script extracts all the files then sorts them and generates a global_output_file
# ready to be used in combination with let's say Webalizer to generate stats
base_dir='/root/httpd_log_backups';
grep_str='buildingoftheyear.bg';
grep_str1='sgradanagodinata.bg';
out_log='access_log_temp';
out_log_real='access_log_new';
dirs_to_scan="Jun Jul Aug Sep Oct Nov Dec";
global_output_file='summed_access_log';
for n in $(echo $dirs_to_scan); do
echo "Extracting sorting and parsing files in -=$n=- directory";
cd $base_dir/$n; 

for i in $PWD/*; do 
if [ `echo $PWD/$i |grep -v \.gz` ]; then
gzip -d $i;
fi
done

for L in $PWD/*; do
grep -i $grep_str $L >> $PWD/$out_log;
grep -i $grep_str1 $L >> $PWD/$out_log;
done

sort -t ' ' -k 4.9,4.12n -k 4.5,4.7M -k 4.2,4.3n -k 4.14,4.15n -k 4.17,4.18n -k 4.20,4.21n $PWD/$out_log > $PWD/$out_log_real
cat $PWD/$out_log_real >> $base_dir/$global_output_file;
rm -f $PWD/$out_log_real;
for Z in $PWD/*; do
gzip -9 -c $Z >$PWD/$Z.gz;
echo rm -f $Z;

done
