The following perl subroutine will read in a fasta formatted file, parse the file, and return all the sequences in a reference to a hash table.
sub get_fasta{ open(FILE, "<@_") or die("Cannot open FASTA file.n"); my %seqs; my $header; my $first = 0; my @lines = <FILE> foreach my $line(@lines){ chomp($line); if ($line =~ /^>/){ $header = $line; $header =~ s/^>//; $header =~ s/s.*//; if ($first == 0){ $first = 1; } next; } if ($first == 0){ die("Not a standard FASTA file.n"); } $seqs{$header} = $seqs{$header}.$line; } close(FILE); return %seqs; }
Usage:
my $seqs = &get_fasta("filename.txt"); foreach(keys %$seqs){ print $_; print $$seqs{$_},"n"; } }