#!/usr/bin/perl
use strict;
use HTTP::Request::Common;
use LWP::UserAgent;
use Pod::Usage;
use Getopt::Long;
use Bio::SeqIO;

#run this script with --help to see the options

=pod

=head1 NAME

SMART_batch  -  submit sequences from a FASTA file to SMART

=head1 SYNOPSIS

B<SMART_batch.pl> I<options>

=head1 DESCRIPTION

Use B<SMART_batch.pl> to submit multiple protein sequences from a FASTA file into the SMART analysis queue. Results are saved into plain text files.

=head1 GENERAL OPTIONS


=over 4

=item B<--help>

display this message

=item B<--inputFile>

FASTA file with sequences to submit

=item B<--outputDirectory>

Directory which will be used to store the results. Will be created if it doesn't exist. Defaults to 'SMART_results'.


=back

=head1 ANALYSIS OPTIONS 

=over 4

=item B<--includePfam>

Include Pfam domains in the search. 

=item B<--includeSignalP>

Include signal peptide predictions.

=item B<--includeRepeats>

Include internal repeat predictions.


=item B<--includeSchnipsel>

Include predictions of outlier homologues and homologues of known structures.

=back


=head1 SEE ALSO

 SMART Home page : https://smart.embl.de
 SMART FAQ       : https://smart.embl.de/faq.cgi

=head1 AUTHOR

 Ivica Letunic <ivica@letunic.com>
 Contact me if you have any questions or comments.

=cut

my $submit_url = "http://ismart.embl.de/smart/show_motifs.pl";
my $job_status_url = "http://ismart.embl.de/results.cgi";
my ($show_help, $input_file, $output_directory, $do_pfam, $do_signalp, $do_rep, $do_disembl, $do_schnipsel);
my $op_r = GetOptions (
                       "help" => \$show_help,
                       "inputFile=s"   => \$input_file,
                       "outputDirectory=s"   => \$output_directory,
                       "includePfam" => \$do_pfam,
                       "includeSignalP" => \$do_signalp,
                       "includeRepeats" => \$do_rep,
                       "includeSchnipsel" => \$do_schnipsel,
                      ); 

unless ($input_file) { $show_help = 1; }

pod2usage(VERBOSE => 2) if ( $show_help );

my $ua  = LWP::UserAgent->new();
$ua->agent("SMARTbatch1.0");


print "\nSMART batch analysis\n======================\n";

unless (defined $output_directory) { $output_directory = 'SMART_results'; }
unless (-d $output_directory) { mkdir $output_directory; }
unless (-e $input_file) { print STDERR "Input file does not exist."; exit;}

my $io = new Bio::SeqIO(-format=> 'fasta', -file=> $input_file);

#process sequences one by one. ALWAYS wait for the results before submitting the next sequence.

while (my $seq = $io->next_seq) {
  my $seq_id = $seq->display_id;
  my $output_file = $output_directory . "/" . $seq_id . "_SMART_results.txt";
  if (-e $output_file) {
    my @s = stat($output_file);
    if ($s[7] == 0) {
      print "Removing empty results file $output_file.\n";
      unlink $output_file;
    } else {
      print "Skipping sequence $seq_id because the results file already exists.\n";
      next;
    }
  }
  print "Submitting sequence $seq_id...\n";
  #prepare the basic POST data
  my %post_content;
  $post_content{'SEQUENCE'} = $seq->seq;
  $post_content{'TEXTONLY'} = 1;
  if ($do_pfam) { $post_content{'DO_PFAM'} = 1; }
  if ($do_signalp) { $post_content{'INCLUDE_SIGNALP'} = 1; } 
  if ($do_rep) { $post_content{'DO_PROSPERO'} = 1; } 
  if ($do_schnipsel) { $post_content{'INCLUDE_BLAST'} = 1; } 
  my $req = POST $submit_url, Content_Type => 'form-data', Content => [ %post_content ];
  my $response = $ua->request($req);

  if ($response->is_redirect) {
    #we are in the queue, get the job ID and pool the status
    my $job_id;
    my $loc =  $response->header("Location");
    $loc =~ /results\.cgi\?id=(\d+)/;
    $job_id = $1;
    if (not defined $job_id) {
      print "Could not get the job ID from the redirect header ($loc). Aborting further submissions.\n";
      exit;
    }

    #we have the jobID, check every 10 seconds until we get the results
    print "Job entered the queue with ID $job_id. Waiting for results.\n";
    my $job_status_req = GET "$job_status_url?id=$job_id";
    sleep 5;
    while (1) {
      my $job_status_response = $ua->request($job_status_req);
      if ($job_status_response->is_success) {
        #check if we got the results
        my @job_status_res = split(/\n/, $job_status_response->content);
        shift @job_status_res if ($job_status_res[1] =~ /^--\ SMART\ RESULT/);
        if ($job_status_res[0] =~ /^--\ SMART\ RESULT/m) {
          open(OUT, ">$output_file") or die "Cannot write to $output_file";
          print OUT $job_status_response->content;
          close OUT;
          print "Results saved to '$output_file'\n";
          last;
        }
        else {
          #still in queue
          sleep 10;
        }
      }
      else {
        print "SMART returned a web server error. Full message follows:\n\n";
        print $response->as_string;
        die;
      }
    }
  } elsif ($response->is_success()) {
    my @res = split(/\n/, $response->content);
    #check if we got the results directly (precomputed results)
    shift @res if ($res[1] =~ /^--\ SMART\ RESULT/);
    if ($res[0] =~ /^--\ SMART\ RESULT/) {
      open(OUT, ">$output_file") or die "Cannot write to $output_file";
      print OUT $response->content;
      close OUT;
      print "Results saved to '$output_file'\n";
    }
    else {
      #there is no result header, so an error occurred
      my $error_file = "$output_directory/$seq_id\_SMART_error.html";
      open(ERR, ">$error_file") or die "Cannot write to $error_file";
      print ERR $response->content;
      close ERR;
      print "SMART returned an error page, which was saved into '$error_file'.\nPlease check the file for details. Aborting further submissions.\n";
      exit;
    }
  }
  #be nice to other users
  sleep 5;
}
