The following perl subroutine will read in a fasta formatted file, parse the file, and return all the sequences in a reference to a hash table.
sub get_fasta{
open(FILE, "<@_") or die("Cannot open FASTA file.n");
my %seqs;
my $header;
my $first = 0;
my @lines = <FILE>
foreach my $line(@lines){
chomp($line);
if ($line =~ /^>/){
$header = $line;
$header =~ s/^>//;
$header =~ s/s.*//;
if ($first == 0){
$first = 1;
}
next;
}
if ($first == 0){ die("Not a standard FASTA file.n"); }
$seqs{$header} = $seqs{$header}.$line;
}
close(FILE);
return %seqs;
}
Usage:
my $seqs = &get_fasta("filename.txt");
foreach(keys %$seqs){
print $_;
print $$seqs{$_},"n";
}
}
