Monthly Archives: June 2011

Perl Smith Waterman script

The following is a small script to perform a Smith Waterman alignment, and to calculate the percent identity between two sequences.

#!/usr/bin/perl  use strict; #use warnings;   
@Constants::nucs = split(//, "ACGT"); 
@Constants::aas  = split(//, "ARNDCQEGHILKMFPSTWYV");             
%Constants::gencode =
      (TTT => "F", TTC => "F", TTA => "L", TTG => "L",
      TCT => "S", TCC => "S", TCA => "S", TCG => "S",
      TAT => "Y", TAC => "Y", TAA => "*", TAG => "*",
      TGT => "C", TGC => "C", TGA => "*", TGG => "W",
      CTT => "L", CTC => "L", CTA => "L", CTG => "L",
      CCT => "P", CCC => "P", CCA => "P", CCG => "P",
      CAT => "H", CAC => "H", CAA => "Q", CAG => "Q",
      CGT => "R", CGC => "R", CGA => "R", CGG => "R",
      ATT => "I", ATC => "I", ATA => "I", ATG => "M",
      ACT => "T", ACC => "T", ACA => "T", ACG => "T",
      AAT => "N", AAC => "N", AAA => "K", AAG => "K",
      AGT => "S", AGC => "S", AGA => "R", AGG => "R",
      GTT => "V", GTC => "V", GTA => "V", GTG => "V",
      GCT => "A", GCC => "A", GCA => "A", GCG => "A",
      GAT => "D", GAC => "D", GAA => "E", GAG => "E",
      GGT => "G", GGC => "G", GGA => "G", GGG => "G" );
# The BLOSUM45 amino acid substitution matrix 
@Constants::blosum45 =
     #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V
   ( [  5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0],  # A
     [ -2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2],  # R
     [ -1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3],  # N 
     [ -2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3],  # D
     [ -1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1],  # C
     [ -1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3],  # Q
     [ -1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3],  # E
     [  0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3],  # G
     [ -2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3],  # H
     [ -1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3],  # I
     [ -1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1],  # L
     [ -1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2],  # K
     [ -1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1],  # M
     [ -2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0],  # F
     [ -1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3],  # P
     [  1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1],  # S
     [  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0],  # T
     [ -2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3],  # W
     [ -2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1],  # Y
     [  0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5]   # V
     #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V      );
# The BLOSUM50 amino acid substitution matrix 
@Constants::blosum50 =
      #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V
   ( [  5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-2,-1,-1,-3,-1, 1, 0,-3,-2, 0],  # A
     [ -2, 7,-1,-2,-4, 1, 0,-3, 0,-4,-3, 3,-2,-3,-3,-1,-1,-3,-1,-3],  # R
     [ -1,-1, 7, 2,-2, 0, 0, 0, 1,-3,-4, 0,-2,-4,-2, 1, 0,-4,-2,-3],  # N
     [ -2,-2, 2, 8,-4, 0, 2,-1,-1,-4,-4,-1,-4,-5,-1, 0,-1,-5,-3,-4],  # D
     [ -1,-4,-2,-4,13,-3,-3,-3,-3,-2,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1],  # C
     [ -1, 1, 0, 0,-3, 7, 2,-2, 1,-3,-2, 2, 0,-4,-1, 0,-1,-1,-1,-3],  # Q
     [ -1, 0, 0, 2,-3, 2, 6,-3, 0,-4,-3, 1,-2,-3,-1,-1,-1,-3,-2,-3],  # E
     [  0,-3, 0,-1,-3,-2,-3, 8,-2,-4,-4,-2,-3,-4,-2, 0,-2,-3,-3,-4],  # G
     [ -2, 0, 1,-1,-3, 1, 0,-2,10,-4,-3, 0,-1,-1,-2,-1,-2,-3, 2,-4],  # H 
     [ -1,-4,-3,-4,-2,-3,-4,-4,-4, 5, 2,-3, 2, 0,-3,-3,-1,-3,-1, 4],  # I
     [ -2,-3,-4,-4,-2,-2,-3,-4,-3, 2, 5,-3, 3, 1,-4,-3,-1,-2,-1, 1],  # L
     [ -1, 3, 0,-1,-3, 2, 1,-2, 0,-3,-3, 6,-2,-4,-1, 0,-1,-3,-2,-3],  # K
     [ -1,-2,-2,-4,-2, 0,-2,-3,-1, 2, 3,-2, 7, 0,-3,-2,-1,-1, 0, 1],  # M
     [ -3,-3,-4,-5,-2,-4,-3,-4,-1, 0, 1,-4, 0, 8,-4,-3,-2, 1, 4,-1],  # F
     [ -1,-3,-2,-1,-4,-1,-1,-2,-2,-3,-4,-1,-3,-4,10,-1,-1,-4,-3,-3],  # P
     [  1,-1, 1, 0,-1, 0,-1, 0,-1,-3,-3, 0,-2,-3,-1, 5, 2,-4,-2,-2],  # S
     [  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 2, 5,-3,-2, 0],  # T
     [ -3,-3,-4,-5,-5,-1,-3,-3,-3,-3,-2,-3,-1, 1,-4,-4,-3,15, 2,-3],  # W
     [ -2,-1,-2,-3,-3,-1,-2,-3, 2,-1,-1,-2, 0, 4,-3,-2,-2, 2, 8,-1],  # Y
     [  0,-3,-3,-4,-1,-3,-3,-4,-4, 4, 1,-3, 1,-1,-3,-2, 0,-3,-1, 5]   # V
     #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V    );
# The BLOSUM62 amino acid substitution matrix  
@Constants::blosum62 =
     #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V
   ( [  4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0],  # A
     [ -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3],  # R
     [ -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3],  # N
     [ -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3],  # D
     [  0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1],  # C
     [ -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2],  # Q
     [ -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2],  # E
     [  0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3],  # G
     [ -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3],  # H
     [ -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3],  # I
     [ -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1],  # L
     [ -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2],  # K
     [ -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1],  # M 
     [ -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1],  # F
     [ -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2],  # P
     [  1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2],  # S
     [  0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0],  # T
     [ -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3],  # W
     [ -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1],  # Y
     [  0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4]   # V
     #  A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V      );

# The amino acids, and a hashmap from amino acid to its index 0-19  
my %aaIndex;

for (my $i=0; $i<20; $i++) { $aaIndex{$Constants::aas[$i]} = $aaIndex{lc($Constants::aas[$i])} = $i; } # The score of a pair of amino acids in a given matrix sub score { my ($matrix, $aa1, $aa2) = @_; # By ref, val, val return $$matrix[$aaIndex{$aa1}][$aaIndex{$aa2}]; } # The maximum of a list of numbers sub max { my $res = -1E300; foreach (@_) { if ($_ > $res) {
      $res = $_;
    }
  }
  return $res;
} 

# Make random DNA sequence of specified length  
sub randomdna {
   my ($len) = @_;
   # length of sequence to generate
   my $dna = "";
   foreach (1..$len) {
     my $p = rand(4);
     $dna = $dna . substr("ACGT", $p, 1);
   }
   return $dna;
}   
use Data::Dumper; 
#use Constants;  
# Smith-Waterman  Algorithm  

my $seq1 = "MSEFRILSDREHVLKRAGMYIGSTTYEEHQRFLFGKWTKISYVPGLVKIIDEIIDNSVDEAIRTNFEFANVISVDIQNNIVTVTDNGRGIPQHMVTTPEGTQIPQPVAAWTRTKAGSNFDDTNRLTQGMNGVGSSLSNFFSDWFTGVTCDGETEMTVQCTNGAENITWSSKPSKLKGTNVTFCPDFDHFEGYMMDNSVLDIVHDRLQSLSVIFPKITFKFNGKRIVSKFKDYAKLYNDDPFVIEDKNFSLALVPAVEGEGFKSVSFANGLFTKNGGTHVDYVTDDLCEEIVKRIKKEYKVDVTKAAVKSGLTCILIVRELPNLRFDSQTKERLTNPTGDIKRHIDLDFKKLAKVIAKKENIIMPIIAVVLARKEAADKAAATKAAKAAKRAKVAKHVKANLIGTDAETTLFLTEGDSAINYLISVRDQDLHGGFPLRGKTLTTWEQPEAKIVKNAEIFNIMAITGLQFGVDALDVMQYKNIAIMTDADTDGIGSIKPSLISFFARWPELFEDGRIRFIKTPIIIAEPKKGDDVRWYYDLEDFENDRDNIkGYNIRYIKGLASLTESDYHRVINDPVMEIITLPENYKELFDLLYSEDADQRKIWMQS"; 
my $seq2 = "MSIRLLVHLNQIYTTYNYTNMSDLLLNVDNDYLDLAAALNIDINTITDNIDINTSKSSLNTYFTSLPKDVVVRRLNSASYQDSQEMRWPYPGQDNQKLFEQFEGVNGVIVQLQGVILQHQAQLDHSYWDEANSKYVRFCNSVGYKRTYPDGSIKLIQGLPENVCLKGVTEYGDPPNRPLPVIDKLGLVGKKGMTCSECIRAGLHSQEVEGKDRPVTCSPTGQLIFYVTGFTTRVLSNKGGKVTSTFNDYTVKELMDDTGFILIIPLKAKSTRRGIWDASTKQWTSVGYEAMVNNLIYKHSKDFANAPVGKRDTIAMKMSPYFQTIIISIVPPNPEDKNPKASLNFAVKEIPDLGAIKAARKYWQQINPAGEINVLNEDDFSNTKSVGLCAAEIVEEEPIEINKNPWAE";  
# scoring scheme 
my $GAP = -8;  
# initialization 
my @matrix; 
$matrix[0][0]{score}   = 0; 
$matrix[0][0]{pointer} = "none"; 
for(my $j = 1; $j <= length($seq1); $j++) {
     $matrix[0][$j]{score}   = 0;
     $matrix[0][$j]{pointer} = "none";
 } 
for (my $i = 1; $i <= length($seq2); $i++) {
     $matrix[$i][0]{score}   = 0;
     $matrix[$i][0]{pointer} = "none"; 
}  
# fill 
my $max_i     = 0; 
my $max_j     = 0; 
my $max_score = 0; 
my $scoring_matrix = @Constants::blosum50; 
for(my $i = 1; $i <= length($seq2); $i++) {
     for(my $j = 1; $j  $max_score) {
             $max_i     = $i;
             $max_j     = $j;
             $max_score = $matrix[$i][$j]{score};
         }
     }
}
# trace-back  
my $align1 = ""; 
my $align2 = "";  
my $j = $max_j; 
my $i = $max_i;  
while (1) {
     last if $matrix[$i][$j]{pointer} eq "none";
     if ($matrix[$i][$j]{pointer} eq "diagonal") {
         $align1 .= substr($seq1, $j-1, 1);
         $align2 .= substr($seq2, $i-1, 1);
         $i--; $j--;
     }elsif ($matrix[$i][$j]{pointer} eq "left") {
         $align1 .= substr($seq1, $j-1, 1);
         $align2 .= "-";
         $j--;
     }elsif($matrix[$i][$j]{pointer} eq "up"){
         $align1 .= "-";
         $align2 .= substr($seq2, $i-1, 1);
         $i--;
     }
}  
$align1 = reverse $align1; 
$align2 = reverse $align2; 
print "$align1n"; 
print "$align2n"; 
print &pid($align1,$align2);  

sub pid {
     my ($seq1, $seq2) = @_;
     my $matches = 0;
     my @seq1 = split(//, $seq1);
     my @seq2 = split(//, $seq2);
     for(my $i = 0; $i <= $#seq1 ; $i++) {
          if($seq1[$i] eq $seq2[$i]){
               $matches++;
          }
     }
     return ($matches/($#seq1+1));
}

How To Fix File Permissions for Joomla

Yesterday Geni came to me with an interesting problem: He needed to update user pictures, but was unable to place them inside the member_pics folder. This was because the member_pics folder was a folder that had been created by a user via ssh, not by using joomla. The permissions on the folder were 775, so a non-owner non-group user could only read and execute from the folder.

 

Continue reading

Invite people for the Journal Club

Step 1: Choose a paper. Have a different person choose the paper every week. Check the previous Journal Clubs here to see who chose one the longest time ago. Let new lab members join the club a few times so they can see what it is like before asking them to choose a paper themselves.
Step 2: Ask them to come up with papers that represent the Cutting Edge of Science. Think about:
– papers that present something new (e.g. a novel data set or approach)
– papers should preferentially be recent (so that we can use the new insights/data promptly)
– if the paper appeared in a lower-impact journal, this could avoid it having been noticed by competing groups 😉
– review papers are good to learn some background, but they usually do not represent the Cutting Edge of Science.
Step 3: Use the “Add content” link to post the paper on the Edwards Lab Site http://edwards.sdsu.edu/labsite/index.php/intranet/journal-club. Mention who chose the paper and when we are discussing it.
Step 4: Send out an email to the Edwards Lab mailing list telling them which paper to read and the date of the Journal Club. Attach a PDF of the paper. Also, tell everyone to really read the paper, or the Journal Club will lose its meaning.
Step 5: While reading, try to understand what the paper is about. If something is not clear, discuss it with the others.
Step 6: During the Journal Club discussion, remember the following points:
– Think about how we can use the information presented in the paper in our own research.
– Try to focus on positive things – trashing something is just too easy.

June 15th 2011: “Rapid pneumococcal evolution in response to clinical interventions”

Rob Edwards’ choice:

http://www.ncbi.nlm.nih.gov/pubmed/21273480 and http://www.ncbi.nlm.nih.gov/pubmed/21273474

Croucher NJ, Harris SR, Fraser C, Quail MA, Burton J, van der Linden M, McGee L, von Gottberg A, Song JH, Ko KS, Pichon B, Baker S, Parry CM, Lambertsen LM, Shahinas D, Pillai DR, Mitchell TJ, Dougan G, Tomasz A, Klugman KP, Parkhill J, Hanage WP, Bentley SD.
Rapid pneumococcal evolution in response to clinical interventions.
Science. 2011 Jan 28;331(6016):430-4.

Enright MC, Spratt BG. Genomics. The genomic view of bacterial diversification.
Science. 2011 Jan 28;331(6016):407-9.

Add Users and Make their Blog Pages

Step 1: Get administrator status within the system, specifically access to the User Manager.

Step 2: Log in to the administrator backend. http://edwards.sdsu.edu/labsite/administrator

Step 3: Go to the user manager, click New. Add in all the relevant information: full name, username, email address, password. Give them the status required to do what they need to, usually Registered or Author. Hit save, not apply.

Step 4: Go to the category manager. Make a new category attached to the lab blog section. Pretty simple. “Joe’s lab blog”, etc. Sort it alphabetically if you want. I did, just for ease of looking through to make sure everyone has a blog page that needs one.

Step 5: Go to Lab Blogs inside the Menus dropdown menu. Add a new one, a Category Blog Layout, and select your category from the menu on the right hand side. Apply then save. Make sure to sort the menu alphabetically. This is what users will see on the right hand side when browsing lab blogs, so this is not an option!

Step 6: Done.