####################################################################################################
## Program_Name ## Sales_Channel_Merge.pl ##
## Written By ## Martin Roberts ##
## Date ## 5th January 2010 ##
## Parameters ## ##
## Description ## Checks the input directory for all Sales Channel files. Converts each ##
## ## file into a series of csv files for each worksheet, suffixed by a unique ##
## ## identifier. Concatenates each resulting file of the same class to ##
## ## produce a series of uniquely named files for each worksheet that ##
## ## represents a concatenation of all data from all Sales Channel workbooks ##
## ## Orgininal Excel files are moved to an Archive directory. ##
####################################################################################################
## Change Log ##
####################################################################################################
## Date ## Author ## Change Ref. ## Change Description ##
## ## ## ## ##
## ## ## ## ##
## ## ## ## ##
####################################################################################################
# INCLUDE NON_STANDARD PACKAGES
use strict "vars";
use Spreadsheet::ParseExcel;
use File::Copy;
# DEFINE DIRECTORIES AND FILES FOR USE IN PROCESS#
my $log_dir = "C:\Documents and Settings\459964\Desktop\Test";
my $log_file = "EXCEL_TO_CSV_Merge.log";
my $base_dir = "C:\Documents and Settings\459964\Desktop\Test";
# DECLARE GLOBAL VARIABLES
my $log_text = "";
my $file_text = "";
my $current_file = "";
my $script_name = "EXCEL_TO_CSV_Merge.pl";
my @file_contents;
my @worksheet_names;
my @file_list;
my $sheet_no = 0;
# RECORD DETAILS OF REQEST FOR LOGGING
my $date = &getDate();
$log_text = "$date\nExcel File List :\n";
# RETRIEVE AND VALIDATE INPUT PARAMETERS
if ($ARGV[0] eq null || length($ARGV[0]) == 0 || $ARGV[1] eq null || length($ARGV[1]) == 0
|| $ARGV[2] eq null || length($ARGV[2]) == 0 || $ARGV[3] eq null || length($ARGV[3]) == 0) {
error("Invalid Input Parameters to $script_name");
}
my $file_pattern = $ARGV[0];
my $source_dir = "$base_dir\\$ARGV[1]";
my $target_dir = "$base_dir\\$ARGV[2]";
my $delimiter = $ARGV[3];
$log_text = $log_text . "$script_name Called with Parameters\nFile Pattern - $ARGV[0]\nSource Directory - $ARGV[1]\nTarget Directory - $ARGV[2]\nDelimeter - $ARGV[3]\n";
my $temp_dir = $source_dir . "\\Temp";
my $archive_dir = $source_dir . "\\Archive";
# SCAN SOURCE DIRECTORY FOR FILES
chdir("$source_dir") or error("Unable to change to the Sales Channel Source Directory");
my @files_found = <$file_pattern*>;
# CHECK TEMP DIRECTORY EXISTS, OTHERWISE CREATE IT
if (! -d "$temp_dir") {
mkdir("Temp");
}
# CHECK WORKSHEET ARCHIVE DIRECTORY EXISTS, OTHERWISE CREATE IT
if (! -d "$archive_dir") {
mkdir("Archive");
}
# CONVERT EACH EXCEL FILE INTO ITS COMPONENT WORKSHEET CSV FILES
my $file_id = 1;
foreach my $file (@files_found) {
$log_text = $log_text . $file . "\n";
convertExcelToCsv($file, $file_id);
# MOVE EXCEL FILE FROM SOURCE TO ARCHIVE
move("$source_dir\\$file","$archive_dir\\$file") or error("Unable to move Excel File $archive_dir\\$file to Archive Directory");
$file_id++;
}
# MERGE WORKSHEETS OF SAME NAME
my $ws_no = 0;
foreach my $worksheet_name (@worksheet_names) {
open (OUTPUT, ">$target_dir\\$worksheet_name.csv") or error("Unable to open Output File - $target_dir\\$worksheet_name");
close OUTPUT;
my $file_string = @file_list[$ws_no];
my (@file_string_components) = split(/\|\|/,$file_string);
my $sheet_ind = 0;
foreach my $file_found (@file_string_components) {
concatenateFiles("$source_dir\\$worksheet_name.csv","$temp_dir\\$file_found",$sheet_ind);
#DELETE TEMP FILES
unlink("$temp_dir\\$file_found") or error("Unable to Delete Temp File, $temp_dir\\$file_found");
$sheet_ind++;
}
$ws_no++;
}
sub concatenateFiles {
my($target_file, $source_file, $sheet_id) = @_;
# CONCATENATE FILES
open (SOURCE,"<$source_file") or error("Unable to open Source File - $source_file");
open (TARGET, ">>$target_file") or error("Unable to open Target File - $target_file");
flock (TARGET,2);
my $line = "";
my $line_id = 0;
while ($line = <SOURCE>) {
# ONLY PRINT HEADER ROW FOR FIRST WORKSHEET FILE
if ($line_id != 0 ||($line_id == 0 && $sheet_id == 0)) {
print TARGET $line;
}
$line_id++;
}
close SOURCE;
close TARGET;
}
sub convertExcelToCsv {
my($excel_file, $file_id) = @_;
# OPEN EXCEL FILE
my $Excel = Spreadsheet::ParseExcel::Workbook->Parse("$source_dir/$excel_file") or error ("Unable to read File");
my $line = "";
# ITERATE THROUGH WORKSHEETS
foreach my $sheet (@{$Excel->{Worksheet}}) {
my $sheet_name = $sheet->{Name};
my $csv_file = $sheet_name . "_" . $file_id . ".csv";
if (!existsWorksheetName($sheet_name)) {
@worksheet_names[$sheet_no] = $sheet_name;
@file_list[$sheet_no] = $csv_file;
$sheet_no++;
}
else {
my $existing_sheet_no = getExistingSheetNo($sheet_name);
@file_list[$existing_sheet_no] = @file_list[$existing_sheet_no] . "||" . $csv_file;
}
# OPEN CSV FILE
open( fileS, ">$temp_dir/$csv_file");
flock(fileS, 2);
$sheet->{MaxRow} ||= $sheet->{MinRow};
# ITERATE THROUGH ROWS
my $is_blank_row = 1;
foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) {
$sheet->{MaxCol} ||= $sheet->{MinCol};
# ITERATE THROUGH COLUMNS
foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) {
# ADD DELIMITER BEFORE EACH VALUE, EXCEPT FIRST
if ($col > 0) {
$line = $line . $delimiter;
}
my $cell = $sheet->{Cells}[$row][$col];
# ENCLOSE THE CELL VALUE IN DOUBLE QUOTES AND ADD TO THE DELIMITED ROW
if ($cell) {
# INDICATE IF THE ROW CONTAINS NON BLANK CELLS
if (length($cell->{Val}) > 0) {
$is_blank_row = 0;
}
$line = $line . "\"" . $cell->{Val} . "\"";
}
}
# IF THE NON BLANK ROW INDICATOR IS SET THEN PRINT THE ROW
if ($is_blank_row == 0) {
print fileS "$line\n";
}
$line = "";
$is_blank_row = 1;
}
close(fileS);
}
}
writeLogFile();
sub existsWorksheetName {
my($ws_name) = @_;
my $found = 0;
foreach my $ws_id (@worksheet_names) {
if ($ws_id eq $ws_name) {
$found = 1;
}
}
return $found;
}
sub getExistingSheetNo {
my($ws_name) = @_;
my $id = 0;
my $sheet_id = 0;
foreach my $ws_id (@worksheet_names) {
if ($ws_id eq $ws_name) {
$sheet_id = $id;
}
$id++;
}
return $sheet_id;
}
sub getDate {
my @Weekdays = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday');
my @Months = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
my $Month = $Months[$Now[4]];
my $Weekday = $Weekdays[$Now[6]];
my $Hour = $Now[2];
my $AMPM = "";
if ($Hour > 12) {
$Hour = $Hour - 12;
$AMPM = "PM";
} else {
$Hour = 12 if $Hour == 0;
$AMPM ="AM";
}
my $Minute = $Now[1];
$Minute = "0$Minute" if $Minute < 10;
my $Year = $Now[5]+1900;
return "$Weekday, $Month $Now[3], $Year $Hour:$Minute $AMPM";
}
sub error {
my($err_id) = @_;
$file_text = "";
$log_text = $log_text . "Exception Found - $err_id\n";
writeLogFile();
}
sub writeLogFile {
open( fileLOG, ">>$log_dir/$log_file");
flock(fileLOG, 2);
print fileLOG "$log_text\n";
close(fileLOG);
}