#!/usr/bin/perl -w
# compare microarray expression data to expression signature
# (DINA)
# adapt code for web use.
# add command line arguments
# (NOAM)
# 20081106
# Use regular expression to replace illegal characters in supplied column names
# (NOAM)
# 20081118
# Added nstore for easy sharing across multiple platforms (NOAM)
# 20090224
use strict;
use Storable qw(nstore store retrieve);;
use Getopt::Long; # command-line arguments

#prototypes (tell the perl compiler the number and type of parameters)
sub getArgs();
sub openFile($);
sub readExp($$);
sub PrintProgress($$);

###########################GLOBAL###############################################
#globals (required for getArgs())
my %args = ();              # hash for storing the arguments.
                            # the switches are the keys for the hash
                            
# please fill in yes if to build  new signature,  the file name from which to build the signature,the filename which
#is supposed to be compared, and the name of hte coloumn to be compared
# in the file to compare ( the coloumn of the Gfold).

# If yes causes recomputation of the signatures.
# Recommended: no (use --rec switch to set to "yes" from command line)
my $recomputeSig = "no";

# Signature filename
my $filename_sig = "allAnovas.csv";

# Comparison file name
my $filename_to_compare = "./ANOVAinsects.csv";
my $exp_file = "./exp.csv";
my $output_file = "./out.csv";

# get and validate arguments
if (!getArgs()) {
  print "Use $0 --help to get help\n";
  exit;
};

# use HTML output (change to 0 when running from command line)
my $HTML = 0;

my @siggenes; #array of the genes that will comprise the signature

# Max. P-Value (as is or with FDR)
my $maxpval = 0.05;

# Sets the size of the signature
my $NUM_OF_GENES_IN_SIG = 1000;
# Cancels pvalue filtering in signature generation
my $DONT_CONSIDER_PVALUES_IN_SIG = 0;
# Cancels pvalue filtering in correlation computation
my $DONT_CONSIDER_PVALUES_IN_CALC = 0; 

#if you change this value, set $recomputeSig=yes
my $FDR_CORRECTION = 0; # Use FDR? - should be 0 if DONT_CONSIDER_PVALUES_IN_CALC is 1

# The names of GFOLD columns to be compared
my @cols_to_compare = ();
#my @cols_to_compare = ("F_MJ_30","F_MJ_60","F_MJ_180","F_ACC_30","F_ACC_60","F_ACC_180","F_ABA_30","F_ABA_60","F_ABA_180","F_IAA_30","F_IAA_60","F_IAA_180","F_GA3_30","F_GA3_60","F_GA3_180","F_zeatin_30","F_zeatin_60","F_zeatin_180","F_brassino_30","F_brassino_60","F_brassino_180","F_SA_180");
# Respective P-Value columns
my @pvalcols = ();
#my @pvalcols=("P_MJ_30","P_MJ_60","P_MJ_180","P_ACC_30","P_ACC_60","P_ACC_180","P_ABA_30","P_ABA_60","P_ABA_180","P_IAA_30","P_IAA_60","P_IAA_180","P_GA3_30","P_GA3_60","P_GA3_180","P_zeatin_30","P_zeatin_60","P_zeatin_180","P_brassino_30","P_brassino_60","P_brassino_180","P_SA_180");
#my @cols_to_compare =("GFoldChange(nahG vs. control)","GFoldChange(ein2 vs. control)","GFoldChange(coi1 vs. control)");
#my @pvalcols=("p-value(Abrassic vs. control)","p-value(Focciden vs. control)","p-value(Mpersica vs. control)","p-value(Prapae vs. control)","p-value(avrPstDC vs. control)");
#my @cols_to_compare =("GFoldChange(Abrassic vs. control)","GFoldChange(Focciden vs. control)","GFoldChange(Mpersica vs. control)","GFoldChange(Prapae vs. control)","GFoldChange(avrPstDC vs. control)");
#my @pvalcols=("p-value(wt * wounded vs wt * control)","p-value(mut * wounded vs wt * wounded)");

#my @pvalcols=("p-value(wt * wounded vs wt * control)");
#my @cols_to_compare =("GFoldChange(wt * wounded vs wt * control)");

# Names of the experiments respectively (will appear in the results and log files)
my @experiment_names = ();
#my @experiment_names=("MJ30", "MJ60", "MJ180","ACC30","ACC60","ACC180","ABA30","ABA60","ABA180","IAA30","IAA60","IAA180","GA3 3h","GA3 6h","GA3 9h","zeatin30","zeatin60","zeatin180","brassino30","brassino60","brassino180","SA180");
#my @experiment_names =("systemicWtVscontrolWt","systemicTlk1VsSystemicWt");
#my @experiment_names =("nahG","ein2","coi1");
#my @experiment_names =("Abrassic","Focciden","Mpersica","Prapae","avrPstDC");
#my @experiment_names =("WT_WOUNDED");
###########################GLOBAL###############################################

# detect line ending type
# store old input record separator
my $old_input_rec_sep = $/;
# read the first 2K of the experiments list file (it should be the smaller of
# the two uploaded files). To read the entire file use "local $/;"
(*EXP) = openFile($exp_file);
my $file = do { local $/ = \2048; <EXP> };
close(*EXP);
# search for end of line character
if($file =~ /\r\n/) {      # dos
  $/ = "\r\n";
} elsif($file =~ /\n\r/) { # some mac
  $/ = "\n\r";
} elsif($file =~ /\r/) {   # weird other option
  $/ = "\r";
} else {                   # unix
  $/ = "\n";
}

if (!(readExp($exp_file, $filename_to_compare))) {
  exit -1;
}

# create temp sorted file
my $sorted = $filename_to_compare . "sorted";
my ($srtEntry, @sortedCSV);
my $tmpEntry;
# create sorted file
(*SRT) = openFile(">$sorted");
# open original CSV file
(*CSV) = openFile($filename_to_compare);
# read and print first line
my $srtLine = <CSV>;
print SRT $srtLine;
# sort the rest of the entries
@sortedCSV = sort (<CSV>);
close(*CSV);
# print sorted entries
foreach $srtEntry (@sortedCSV) {
  # remove empty lines
  if (length($srtEntry) > 2) {
    $tmpEntry = $srtEntry;
    $tmpEntry =~ s/,+//;
    print SRT $srtEntry if (length($tmpEntry) > 2);
  }
}
close(*SRT);
$filename_to_compare = $sorted;

######for random#######
#my @cols_to_compare =();
#my @pvalcols=();
#for (my $k = 0;$k < 30; $k++)
#{
   # $cols_to_compare[$k] = "g_SHUFFLE_$k";
  #  $pvalcols[$k] = "p_SHUFFLE_$k";
 #   $experiment_names[$k] = "SHUFFLE_$k";
#}

###################################
my $col_to_compare_name;
my $pvalcol;
my $experiment_name;
my $i=0;

# create output file
(*OUT) = openFile(">$output_file");
# Print the headers
#print "--,MJ30, MJ60, MJ180, ACC30, ACC60, ACC180, ABA30, ABA60, ABA180, IAA30, IAA60, IAA180, gib3h, gib6h, gib9h,zeatin30, zeatin60, zeatin180, brassino30, brassino60, brassino180,SA180\n";
print OUT "--,KO_apx_0h,sig_size,KO_Apx_15m,sig_size,KO_Apx_30m,sig_size,KO_Apx_90m,sig_size,KO_Apx_3h,sig_size,KO_Apx_6h,sig_size,KO_Apx_24h,sig_size,flu_30m,sig_size,flu_1h,sig_size,flu_2h,sig_size,KD_Sod,sig_size,fnr1,sig_size,MV_30m,sig_size,MV_1h,sig_size,MV_3h,sig_size,MV_6h,sig_size,MV_12h,sig_size,MV_24h,sig_size,Rotenone_3h,sig_size,Rotenone_12h,sig_size,AS_Aox,sig_size,TDNA_Aox,sig_size,TDNA_Aox_MLD,sig_size,AT,sig_size,Cat_0,sig_size,Cat_3h,sig_size,Cat_8h,sig_size,Ozone,sig_size,H2O2,sig_size,\n";
print STDOUT "Starting\n" if !($HTML);

my $prog = 0.0;
my $total = (scalar @experiment_names) * 29.0; # no. of experiments * no. of sig
$|++; # immediate flush
for my $col (@cols_to_compare)
{
    $col_to_compare_name = $col;
    $pvalcol = $pvalcols[$i];
    $experiment_name = $experiment_names[$i];
    
    printf STDOUT "Progress: %3d %%\r", 0 if !($HTML);
    my @temparr = CheckHormone("F_KO_apx_0h", "P_KO_apx_0h", "KO_apx_0h", $recomputeSig);
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_15min", "P_KO_apx_15min", "KO_apx_15min", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_30min", "P_KO_apx_30min", "KO_apx_30min", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_90min", "P_KO_apx_90min", "KO_apx_90min", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_3h", "P_KO_apx_3h", "KO_apx_3h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_6h", "P_KO_apx_6h", "KO_apx_6h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_KO_apx_24h", "P_KO_apx_24h", "KO_apx_24h", $recomputeSig));       
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_flu_30min", "P_flu_30min", "flu30min", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_flu_1h", "P_flu_1h", "flu1h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_flu_2h", "P_flu_2h", "flu2h", $recomputeSig));
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_KD_SOD", "P_KD_SOD", "KD_SOD", $recomputeSig));
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_FNR1", "P_FNR1", "FNR1", $recomputeSig));
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_MV_30min", "P_MV_30min", "MV30min", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_MV_1h", "P_MV_1h", "MV1h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_MV_3h", "P_MV_3h", "MV3h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_MV_6h", "P_MV_6h", "MV6h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_MV_12h", "P_MV_12h", "MV12h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_MV_24h", "P_MV_24h", "MV24h", $recomputeSig));
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_rotenone_3h", "P_rotenone_3h", "rotenone3h", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_rotenone_12h", "P_rotenone_12h", "rotenone12h", $recomputeSig));
    PrintProgress(++$prog, $total);
      
    push(@temparr, CheckHormone("F_AOXas", "P_AOXas", "AOXas", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_AOX_TDNA", "P_AOX_TDNA", "AOX_TDNA", $recomputeSig));
    PrintProgress(++$prog, $total);
    push(@temparr, CheckHormone("F_AOX_MLD", "P_AOX_MLD", "AOX_MLD", $recomputeSig));
    PrintProgress(++$prog, $total);
    
    push(@temparr, CheckHormone("F_AT", "P_AT", "AT", $recomputeSig));
    PrintProgress(++$prog, $total);   
    push (@temparr, CheckHormone("F_CAT", "P_CAT", "CAT0", $recomputeSig));
    PrintProgress(++$prog, $total);
    push (@temparr, CheckHormone("F_CAT_3h", "P_CAT_3h", "CAT3", $recomputeSig));
    PrintProgress(++$prog, $total);
    push (@temparr, CheckHormone("F_CAT_8h", "P_CAT_8h", "CAT8", $recomputeSig));
    PrintProgress(++$prog, $total);
    push (@temparr, CheckHormone("F_ozone", "P_ozone", "ozone", $recomputeSig));
    PrintProgress(++$prog, $total); 
    push(@temparr, CheckHormone("F_H2O2", "P_H2O2", "H2O2", $recomputeSig));
    PrintProgress(++$prog, $total);    
    print OUT $experiment_name . "," . join(",", @temparr) . "\n";
    $i++;
    
    $recomputeSig = "no";
}
print STDOUT "\nFinished\n" if !($HTML);
$|--;
# restore old input record separator
$/ = $old_input_rec_sep;
close(*OUT);
# selete sorted file
unlink($sorted);
exit;

sub PrintProgress($$)
{
    my($prg, $ttl) = @_;
    my $percent = (($prg)/$ttl)*100;
    if(!($HTML)) {
      printf STDOUT "Progress: %3d %%\r", $percent;
    } else {
      printf STDOUT "<div class='per'>%3d %%</div>\n", $percent;
      # Now, output a new 'bar', forcing its width
      # to 3 times the percent, since we have
      # defined the percent bar to be at
      # 300 pixels wide.
      print STDOUT "<div class='bar' style='width: ",$percent * 3, "px'></div>\n";
    }
}

##########################################################################
# readExp($exp_file, $filename_to_compare)
# gets and validates the user's parameters
# requires the global variables:
# @cols_to_compare, @pvalcols, and @experiment_names
sub readExp($$) {
  my ($exp_file, $input_file) =@_;
  my ($caption, @caption_items);
  my ($line, $f, $p, $e);
  my (@col_captions, @found);
  my $search;

  (*INP) = openFile($input_file);
  $caption = <INP>; # read first line of the input file. It should contain
                    # the fold change and p-values captions
  # strip off all trailing white spaces
  $caption =~ s/\s+$//;

  @caption_items = split(/,/,$caption);
  # the first 2 columns must be Probeset ID and locus
  # there has to be a 3rd column,  Gene Title, when recalculating the signature
   if(($caption_items[0] ne "Probeset ID") or ($caption_items[1] ne "locus")) {
    close(*INP);
    print STDERR "No caption\n";
    return 0;
  }
  close(*INP);
    
  (*EXP) = openFile($exp_file); # a csv file listing the experiments that
  # would be compared to the signatures. For example:
  # F,P,E
  # F_MJ_30,P_MJ_30,MJ_30
  # F_MJ_60,P_MJ_60,MJ_60
  # F_MJ_180,P_MJ_180,MJ_180
  # The first column (F) is the label of the fold change data
  # P lists the labels of the p-value columns
  # and E is a title given by the user to the compared experiment
    
  # read first line and check that it contains capital F, P and E
  $line = <EXP>;
  chomp($line);
  # strip off all trailing white spaces
  $line =~ s/\s+$//;
  ($f, $p, $e) = split(/,/, $line);
  if(($f ne "F") or ($p ne "P") or ($e ne "E")) {
    close(*EXP);
    print STDERR "No header\n";
    return 0;
  }
  while($line = <EXP>) {
    chomp($line);
    # strip off all trailing white spaces
    $line =~ s/\s+$//;
    @col_captions = split(/,/,$line);
    if(scalar(@col_captions) == 3) {
      # check that the fold change and p value captions are listed in
      # the input file
      $search = $col_captions[0];
      $search =~ s/([\\\|\(\)\[\{\^\$\*\+\?\.])/\\$1/g;
      if(@found = grep(/^$search$/,@caption_items)) {
        push(@cols_to_compare, $col_captions[0]);
      } else {
        close(*EXP);
        print STDERR "Column not found\n";
        return 0;
      }
      $search = $col_captions[1];
      $search =~ s/([\\\|\(\)\[\{\^\$\*\+\?\.])/\\$1/g;
      if(@found = grep(/^$search$/,@caption_items)) {
        push(@pvalcols, $col_captions[1]);
      } else {
        close(*EXP);
        print STDERR "2nd column not found\n";
        return 0;
      }
      push(@experiment_names,$col_captions[2]);
    } # end if
  } # end while
  close(*EXP);
}

#gives a grade between -1 to 1 according to the extent thah the hormone response
# is similar to the stress response
sub CheckHormone
{
    # name of the coloumn of the GFold of the relevant hormone
    # name of the coloumn of the pval of the relevant hormone
    # file extention used to save the computed signature
    # to calculate again?
    my($horcol, $horpvalcol, $fileext, $compute_sig) = @_; 
    
   
    if($compute_sig eq "yes")
    {
        my @data = ParseFile($filename_sig);
        @data = SortDataBy(\@data, $horcol);
        @siggenes = ChooseSigGenes(\@data, "Probeset ID", "Gene Title", "locus", $horpvalcol, $horcol);
        nstore(\@siggenes,'signatures/siggenes.' . $fileext . '.db');
    }
    else
    {
        my $siggenes_ref = retrieve('signatures/siggenes.' . $fileext . '.db') || die "Can't open signature file";
        @siggenes = @$siggenes_ref;
    }


    my @input = ParseFile($filename_to_compare);
    my @inputsig = ExtractSig(\@input,\@siggenes, "Probeset ID");
    my ($grade, $count) = CalcCorrelation(\@siggenes, \@inputsig, $horcol,$col_to_compare_name, "Probeset ID", $fileext);
    return ($grade, $count);
}

sub CalcCorrelation
{
    my ($siggenes_ref, $inputsig_ref, $sigcol, $inputcol, $idcol, $fileext) = @_;
    my @siggenes = @$siggenes_ref;
    my @inputsig = @$inputsig_ref;
    my $sigtest;
    my $ID = $idcol;
    my $GENE_TITLE = "Gene Title";
    my $LOCUS = "locus";
    
    my $hormongrade = 0;
    my $i =0;
    my $dotproduct=0;
    my $length =0;
    my $proj_length = 0;
    my $countsiggenes =0;
    my $countfinal=0;
    
    my $pval_index = 1;
    my $total_insig_size = $#inputsig;
    
    my $file_sig_log_name = "logs/${experiment_name}-${fileext}.csv";
    
    #start of FDR
    if($FDR_CORRECTION)
    {
        my @sorted_pval_input_sig = sort
            {
                my %a = %$a;
                my %b = %$b;
                $a{$pvalcol} <=> $b{$pvalcol}
            } @inputsig;
        
        # FDR correction for each p val of the input file
        for my $row (@sorted_pval_input_sig)
        {
            $$row{$pvalcol}*=($total_insig_size/$pval_index);
            $pval_index++;
        }
        
        @inputsig = sort
                {
                    my %a = %$a;
                    my %b = %$b;
                    $a{$idcol} cmp $b{$idcol}
                } @sorted_pval_input_sig;
    }
    #end of FDR
    open FILE, ">$file_sig_log_name" or die "Failed to open logfile";
    
    for my $row (@siggenes)
    {
      if (defined($inputsig[$i]->{$idcol})) {
        if($row->{$idcol} eq $inputsig[$i]->{$idcol})
        {
            my $sigval = $row->{$sigcol};
            my $inputval = $inputsig[$i]->{$inputcol};
            my $inputpval = $inputsig[$i]->{$pvalcol};
          
            $countsiggenes++;
    
    
            #throw away values from input sig. that have pvalue greater than max allowed
            if(($DONT_CONSIDER_PVALUES_IN_CALC)||$inputpval < $maxpval)
            {
               $dotproduct += $sigval * $inputval; # v*u (dot product)
               $length += $sigval**2; # |v|**2
               $proj_length += $inputval**2; # |u|**2
               $countfinal++;
               
               # Add taken genes to the log file
               $row->{$LOCUS}||="--";
               print FILE "$row->{$ID}, $row->{$LOCUS}, $row->{$GENE_TITLE}\n";
            }
            $i++;
        }
      } else {
        $i++;
      } # end if defined
    }
    close FILE;
 #  print "\n"."signature size ". "$countsiggenes";
    #print "\n"."final number of genes compared "."$countfinal";
    if (($countfinal!=0) && ($length!=0) && ($proj_length!=0))
    {
        $length=sqrt($length); # |v|
        $proj_length = sqrt($proj_length); # |u|
        
        #Compute cos of the angle between u and v
        $hormongrade = ($dotproduct/$length)/$proj_length; # (v*u)/(|v||u|)
        
        #To get the angle take acos($hormongrade)
    }
    else
    {
        $hormongrade = "N/A";
    }
    return ($hormongrade, $countfinal);
}

sub ExtractSig
{
    my($ref1, $ref2, $colname) = @_;
    my @data = @$ref1;
    my @sig = @$ref2;
    my @inputsig;
    
    my $i=0;
    while (my $item = shift(@sig))
    {
        for my $row (@data)
        {
            if ($item->{$colname} eq $row->{$colname})
            {
                $inputsig[$i] = $row;
                last;
            }
        }
        
        $i++;
    }
    return @inputsig;
}

#parse the file.
#we store the file in the following data structure:
# ARRAY[HASH{TITLE->VALUE}]
sub ParseFile
{
    my ($filename) = @_;
    open (ANOVAFILE, $filename) || die "\nFile not found $filename\n";
    my $caption;
    my @data;
    my @caption_items;
    my $line;
    my $line_num = 0;
    $caption =<ANOVAFILE>;
    @caption_items = split(/,/,$caption);
    chomp(@caption_items);
    while ($line = <ANOVAFILE>)
    {
        # strip off all trailing white spaces
        $line =~ s/\s+$//;
        my @line_items = split(/,/,$line);
        chomp (@line_items);
        my %rowItems;
        my $i = 0;
        foreach my $capt (@caption_items)
        {
            # strip off all trailing white spaces
            $line_items[$i] =~ s/\s+$//;
            $rowItems{$capt} = $line_items[$i];
            $i++;
        }
        $data[$line_num++] = \%rowItems;
    }
    close(ANOVAFILE);
    return @data;
}

sub SortDataBy
{
    my($dataref, $colname) = @_;
    my @arr = @$dataref;
    my @sortedarr;
    @sortedarr = sort
        {
            my %a = %$a;
            my %b = %$b;
            abs($a{$colname}) <=> abs($b{$colname})
        } @arr;
    @arr = reverse @sortedarr;
    return @arr;
}

sub ChooseSigGenes
{
    my($dataref, @cols) = @_;
    my @data = @$dataref;
    
    my $pvalcol = $cols[$#cols-1];
    my $idcol = $cols[0];
    

    my @arr;
    
    my $i=0;
    for my $row (@data)
    {
        if($i>=$NUM_OF_GENES_IN_SIG) #break if we have enough genes
        {
            last;
        }
        
        # Check pvalues only of defined.
        if(($DONT_CONSIDER_PVALUES_IN_SIG) || ($row->{$pvalcol} < $maxpval))
        {
            my %temphash;
            for my $col (@cols)
            {
                $temphash{$col} = $row->{$col};
            }
            $arr[$i] = \%temphash;
            $i++;
        }
    }
    if($FDR_CORRECTION)
    {
        return  sort
                {
                    my %a = %$a;
                    my %b = %$b;
                    $a{$idcol} cmp $b{$idcol}
                } @arr;
    }
    else
    {
        return @arr;
    }
}

##########################################################################
# getArgs()
# gets and validates the user's parameters
sub getArgs() {
  my $getopts;
  my $sHelpMsg =
    "patternanalyzer.pl --input file used for comparison \n" .
    "                   --out    output file\n" .
    "                   --exp    instruction file\n" .
    "                   --sig    signature file\n" .
    "                   --rec    recompute signature\n\n" .

    " --input,\n" .
    " -i \t Specifies the location of the file that contains the\n" .
      "\t genes data.\n" .
    " --out,\n" .
    " -o \t Specifies the output file name and path.\n" .
    " --exp,\n" .
    " -e \t Specifies the experiment list file name and path.\n" .
    " [--sig,\n" .
    " -s] \t Specifies the signature file name and path.\n" .
    " [--rec | --norec]\n" .
    " \t Recompute signature(default: NO).\n";


  # get command line arguments:
  # --input             array data file used for comparison
  # --out               output file name and path
  # [--rec | --norec]   recompute signature? YES or NO (default: NO)
  $getopts = GetOptions(\%args,
             "help",
             "rec!",
             "input=s",
             "sig=s",
             "exp=s",
             "out=s");

  if(!$getopts) {
    print "$sHelpMsg";
    return 0;
  }
  if($args{help}) {
    print "$sHelpMsg";
    exit(0);
  }
  if (defined $args{rec}) {
    $recomputeSig = $args{rec} ?  "yes" : "no";
  } else {
    $recomputeSig = "no";
  }

  if($args{input}) { #input
    $filename_to_compare = $args{input}; # array data
  } else {
    return 0;
  }
  if($args{out}) {            # output file
    $output_file = $args{out};
  } else {
    return 0;
  }
  if($args{exp}) {            # experiment file
    $exp_file = $args{exp};
  } else {
    return 0;
  }
  if($args{sig}) {
    $filename_sig = $args{sig};  # signature file
  } else {
    $filename_sig = "allAnovas.csv";
  }

  return 1;
}

##########################################################################
# openFile($fileName)
# open the file $fileName and return a file handle
sub openFile($) {
  my ($fileName) = @_;
  my $fh;

  unless(open($fh, $fileName)) {
    if($fileName =~ /^>/) {
      print STDERR "Failed to create file $fileName\n";
    } else {
      print STDERR "Failed to open file $fileName\n";
    }
    exit -1;
  }
  return $fh;
}