Monday, April 21, 2008

My First Perl Program, Hello Bioinformatics

"Write a subroutine that takes as arguments an amino acid; a position 1, 2 or 3; and a nucleotide. It then takes each codon that encodes that specified amino acid (there may be from one to size such codons), and mutates it a the specified position to the specified nucleotide. Finally it returns the set of amino acids that are encoded by the mutated codons."

The following is my answer. "amino_to_codon.txt" stores the following:

Isoleucine ATT, ATC, ATA
Phenylalanine TTT, TTC
Methionine ATG
Cysteine TGT, TGC
Alanine GCT, GCC, GCA, GCG
Glycine GGT, GGC, GGA, GGG
Proline CCT, CCC, CCA, CCG
Threonine ACT, ACC, ACA, ACG
Tyrosine TAT, TAC
Tryptophan TGG
Glutamine CAA, CAG
Asparagine AAT, AAC
Histidine CAT, CAC
Glutamic acid GAA, GAG
Aspartic acid GAT, GAC
Lysine AAA, AAG

# Terrence Pietrondi
# CIS667 Programming Assignment 1
use strict;
use warnings;

# map with codons as keys and amino acids as values
our %codon_to_amino_acid_map = ();
# map with amino acids as keys and codons as values
our %amino_acid_to_codon_map = ();

# load the command line parameters for this program
# no arguments expected
# returns amino acid, position and nucleotide
sub load_command_line{
# initalize the amino acid command line variable
my $amino_acid;
# initalize the position command line variable
my $position;
# initalized the nucleotide variable
my $nucleotide;
# the argument count
my $command_arguments = $#ARGV + 1;
# check for three command arguments
if($command_arguments == 3){
# assign amino acid
$amino_acid = $ARGV[0];
# assign position
$position = int($ARGV[1]) - 1;
# check the position
if($position < 0 or $position > 2){
print "Position must be either: 1,2, or 3!";
exit 1;

# assign the nucleotide
$nucleotide = $ARGV[2];
# return the amino acid, position and nucleotide
return $amino_acid,$position,$nucleotide;
} else {
# if we don't get three arguments

# show some help messages
print "Given $command_arguments command arguements, I need three!\n";
print " \n";
print "In the mean time, I'll load the driver....\n\n";
# set all the values as blank strings
$amino_acid = '';
$position = '';
$nucleotide = '';
# return the blank string
return $amino_acid,$position,$nucleotide;

# trim trailing and leading whitespace from a string
# expects a single string arguement
# returns the new string
sub trim_string{
my ($trim_string) = @_;
$trim_string =~ s/^\s+//;
$trim_string =~ s/\s+$//;
return $trim_string;

# load the amino acid & codon data from a file
# no arguments
# returns the raw data lines of the file as an array
sub load_data{
# load the codon map to aminos from a file
my $amino_to_codon_txt="amino_to_codon.txt";
open(DAT, $amino_to_codon_txt) || die("Could not open file: $amino_to_codon_txt!");
my @raw_data=;

return @raw_data;

# load the mappings in the data files into the data maps of this
# program
# expects one arguement, the raw data array of lines from the data file
# returns nothing, loads global data maps
sub load_maps{
my (@raw_data) = @_;
my $line;
foreach $line (@raw_data){
# clean the line terminator
my $loop_amino_acid;
my $loop_codon;
# split the line on the tab charater to seperate the amino acids from its codons
($loop_amino_acid,$loop_codon) = split(/\t/,$line);
# trim the codon string
$loop_codon = &trim_string($loop_codon);
# trim the amino acid string
$loop_amino_acid = &trim_string($loop_amino_acid);
# set the codons for the given amino acid in the data
$amino_acid_to_codon_map{$loop_amino_acid} = $loop_codon;
# split the codons on the comma
my @codon_array = split(/,/, $loop_codon);
my $loop_codon_inner;
# for each codon for this amino acid
foreach $loop_codon_inner (@codon_array){
# trim the codon
$loop_codon_inner = &trim_string($loop_codon_inner);
# set the amino acid for this codon in the map
$codon_to_amino_acid_map{$loop_codon_inner} = $loop_amino_acid;

# run the mutation steps
# expects three arguments, the amino acid, the position and the nucleotide
# returns an array of amino acids encoded by this mutation
sub mutate{
my($amino_acid,$position,$nucleotide) = @_;
my $three = 3;
# the length of the nucleotide
my $nucleotide_length = length $nucleotide;
# the first three characters of the nucleotide
my $loop_codon = substr $nucleotide,$position,$three;
# the codons for this amino acid
my $codon_string = $amino_acid_to_codon_map{$amino_acid};

# some help messages or progress
print "Your amino acid: '$amino_acid'\n";
print "Its codons: $codon_string\n";
print "Will mutate at position: $position\n";
print "On nucleotide: $nucleotide\n";

my %encoded_amino_acids = ();
# while the loop codon is not blank
while ($loop_codon ne '') {
# check that the position work will not be out of range
if($position + 3 <= $nucleotide_length){
my $loop_codon_inner;
# split the codon string on the comma
my @codon_array = split(/,/, $amino_acid_to_codon_map{$amino_acid});
foreach $loop_codon_inner (@codon_array){
# trim the codon
$loop_codon_inner = &trim_string($loop_codon_inner);
# compare the left side of the mutation to the right side
my($left,$right) = &compare_codons($loop_codon,$loop_codon_inner);
# get the acid that maps to this mutation
my $acid = $codon_to_amino_acid_map{$loop_codon};
# add the acid to the return array
$encoded_amino_acids{$acid} = $loop_codon;
# show the difference
print "$left ==> $right\n";

# move the position
$position = $position + 3;
# get the next loop codon
$loop_codon = substr $nucleotide,$position,3;

} else {
# if the position changes are out of range, set the break condition
$loop_codon = '';
# return the kets of the of amino acid map, we just want the amino acids encoded by
# this mutation. we use the map to model a set
return keys %encoded_amino_acids;


# compare two codons
# expects two arguments, each a codon
sub compare_codons{
my($left_codon,$right_codon) = @_;

my $new_left;
my $new_right;

my $three = 3;
my $index = 0;
# compare each position of the codon
while($index < $three){
my $left_char = substr $left_codon,$index,1;
my $right_char = substr $right_codon,$index,1;

# if the positions match, do nothing
if($left_char eq $right_char){
$new_left .= $left_char;
$new_right .= $right_char;
} else {
# if the positions do not change, show a base change
$new_left .= "($left_char)";
$new_right .= "($right_char)";
# increment the index
#return the mutation presentation string


# print the array of acids
# expects on argument, an array of acids
# returns nothing
sub print_acids{
my @acids = @_;
my $loop_acid;
foreach $loop_acid (@acids){
print "$loop_acid\n";

print "\n";

# the command line driver
# expects three arguments, the acid, the position and the nucleotide
# returns nothing
sub command_driver{
my($amino_acid,$position,$nucleotide) = @_;
# load the data
my @raw_data = &load_data();
# load the data maps with the data from the file
# perform the mutations
my @acids = &mutate($amino_acid,$position,$nucleotide);
# show what acids are encoded by the mutations

# hard coded driver, just runs the command line driver with pre-defined variables
# expects no arguments
# returns nothing
sub hard_coded_driver{
my $amino_acid = 'Valine';
my $position = 1;
my $nucleotide = 'GGGAAACCC';


$amino_acid = 'Valine';
$position = 3;
$nucleotide = 'GGGAAACCC';


$amino_acid = 'Serine';
$position = 2;
$nucleotide = 'GGGAAACCC';


# load the command line
my($amino_acid,$position,$nucleotide) = &load_command_line();
# if the variables are blank, then there were no arguments given, and so we will run the
# hard coded driver
if($amino_acid eq '' and $position eq '' and $nucleotide eq ''){
} else {
# otherwise, we will run on the given arguments

No comments:

Share on Twitter