Show functions.pl syntax highlighted
#!/usr/bin/perl
use strict vars;
use vars qw(
$GV
$GVportal
$PRM
@modes
$DBH
$zeroPos
$GENEMARK_speciesModel
$GENSCAN_speciesModel
);
########################################
### Source Data Functions
########################################
# source database
my %attr = (PrintError=>1,RaiseError=>1);
sub getEvidence_das{
do 'das/dasFunctions.pl';
do 'das/dasQuery.pl';
my $evidenceHasRef = &queryDASevidence_genomeSequence();
return $evidenceHasRef;
}
sub getGenomeSequence_das{
return $PRM->{GenomeSequence}; # already defined and set in getEvidence_das
}
sub init_das{
do 'das/dasFunctions.pl';
dasParam(); # function to set chr, start and stop from cookie
return;
}
sub init_plantgdb{
if ($PRM->{chr} =~ /Zm/){
$GV->{specieName} = "Zea mays";
}else{
$GV->{specieName} = "Sorghum bicolor";
}
}
sub init_xgdb{
# incitiazes global values for xgdb implementation, add from xgdb cookie
# my $tmp;
# my @sessionCookie = cookie($GV->{SessCookieName}); # reads session id from cookie and accesses php session variables
# my $id = $sessionCookie[0];
# if ($id ne ""){
# my $session = PHP::Session->new($id, { create => 1, auto_save => 1 });
# if ( param("altCONTEXT") && (!$session->is_registered("altCONTEXT") || $session->get("altCONTEXT") ne param("altCONTEXT") ) ){
# $session->set(altCONTEXT => param("altCONTEXT"));
# }
# $GV->{altCONTEXT} = $session->get("altCONTEXT");
#
# if ( param("dbid") ne "" && (!$session->is_registered("dbid") || $session->get("dbid") ne param("dbid")) ) {
# $session->set(dbid => param("dbid"));
# }
# $GV->{dbid} = $session->get("dbid");
#
# if ( param("blastDB") && (!$session->is_registered("blastDB") || $session->get("blastDB") ne param("blastDB")) ){
# $session->set(blastDB => param("blastDB"));
# }
# $GV->{blastDB} = $session->get("blastDB");
#
# if ( !$GV->{altCONTEXT} || $GV->{dbid} eq "" || !$GV->{blastDB} ){
# bailOut("Incomplete parameters: altCONTEXT, dbid, blastDB");
# }
# temporary fix to use length of genome segment
if (length($PRM->{chr}) >= 2 ){
$GV->{altCONTEXT} = "BAC";
$GV->{blastDB} = $GV->{altblastDB};
$GV->{CHR_SELECT_BOX} = 0;
}else{
$GV->{altCONTEXT} = "chr";
$GV->{blastDB} = "GENOME";
}
# }else{
# bailOut("Please login first.");
# }
return;
}
sub GenomeContextLink_xgdb{
my ($chr,$lp,$rp) = @_;
my $link = "$GV->{rootPATH}$GV->{ucaSSIpath}";
$link .= (length($chr) <= 2) ? "getRegion.pl?dbid=$GV->{dbid}&chr=$chr&l_pos=$lp&r_pos=$rp" : "getGSEG_Region.pl?dbid=$GV->{dbid}&gseg_gi=$chr&l_pos=$lp&r_pos=$rp" ;
return $link;
}
sub GenomeContextLink_plantgdb{
my ($chr,$lp,$rp) = @_;
my $link = "$GV->{rootPATH}search/display/data.php?Seq_ID=$chr";
return $link;
}
sub getEvidence_plantgdb{
my %evidenceHash;
#collects [database name],[method],[gi/name],[unique record id],[genome start position],[genome stop position],[score],[exon number],[link to alignment or exon source] for exons in current range
#should accurately reflect exons drawn in Genome Plot
my $protein_EvidenceSql = "select 'PlantGDB','GeneSeqer_protein_homologous',Protein_ID,Protein_ID,exon_start,exon_end,exon_score,1,concat(\"http://www.plantgdb.org/search/display/Align_Display.php?Display_Alignment=/DATA/PlantGDB/Alignment/GeneProtein_alignDir/ZM/GSStug/Protein/\",GSScontig_ID,\".txt\") from Protein_Exon_NEW where (exon_start <= $PRM->{end})&&(exon_end >= $PRM->{start})&&(GSScontig_ID = \"$PRM->{chr}\")";
#print STDERR "$cdna_EvidenceSql";
my $ref=$GV->{DBH}->selectall_arrayref($protein_EvidenceSql);
my $evidenceHashRef = getExons($ref,\%evidenceHash);
my $est_EvidenceSql = "select 'PlantGDB','GeneSeqer_EST_native',EST_ID,EST_ID,exon_start,exon_end,exon_score,1,concat(\"http://www.plantgdb.org/search/display/Align_Display.php?Display_Alignment=/DATA/PlantGDB/Alignment/GeneProtein_alignDir/ZM/GSStug/Protein/\",GSScontig_ID,\".txt\") from EST_Exon where (exon_start <= $PRM->{end})&&(exon_end >= $PRM->{start})&&(GSScontig_ID = \"$PRM->{chr}\")";
#print STDERR "\n$est_EvidenceSql\n";
my $ref2=$GV->{DBH}->selectall_arrayref($est_EvidenceSql);
$evidenceHashRef = getExons($ref2,$evidenceHashRef);
return $evidenceHashRef;
}
sub getEvidence_xgdb{
init_xgdb();
my %evidenceHash;
#collects [database name],[method],[gi/name],[unique record id],[genome start position],[genome stop position],[score],[exon number],[link to alignment or exon source] for exons in current range
#should accurately reflect exons drawn in Genome Plot
my $tablePrefix = ($GV->{altCONTEXT} eq "BAC") ? "gseg_" : "";
my $chrfieldName = ($GV->{altCONTEXT} eq "BAC") ? "gseg_gi" : "chr";
my $exPath = ($GV->{altCONTEXT} eq "BAC") ? "gsegSRC=$GV->{blastDB}&" : ""; # temp for xGDB
my $cdna_EvidenceSql = "select '$GV->{dbTitle}','GeneSeqer_cDNA_native',uid,gi,gseg_start,gseg_stop,score,num,concat(\"$GV->{rootPATH}$GV->{ucaSSIpath}getGSQ.pl?${exPath}dbid=$GV->{dbid}&resid=1&pgs_uid=\",uid) from ${tablePrefix}cdna_good_pgs as a, ${tablePrefix}cdna_good_pgs_exons as b where (a.uid=b.pgs_uid)&&(b.gseg_stop <= $PRM->{end})&&(b.gseg_stop>=$PRM->{start})&&(a.$chrfieldName='$PRM->{chr}')";
my $ref=$GV->{DBH}->selectall_arrayref($cdna_EvidenceSql);
my $evidenceHashRef = getExons($ref,\%evidenceHash);
my $est_EvidenceSql = "select '$GV->{dbTitle}','GeneSeqer_EST_native',uid,gi,gseg_start,gseg_stop,score,num,concat(\"$GV->{rootPATH}$GV->{ucaSSIpath}getGSQ.pl?${exPath}dbid=$GV->{dbid}&resid=0&pgs_uid=\",uid) from ${tablePrefix}est_good_pgs as a, ${tablePrefix}est_good_pgs_exons as b where (a.uid=b.pgs_uid)&&(b.gseg_stop <= $PRM->{end})&&(b.gseg_stop>=$PRM->{start})&&(a.$chrfieldName='$PRM->{chr}')";
my $ref2=$GV->{DBH}->selectall_arrayref($est_EvidenceSql);
$evidenceHashRef = getExons($ref2,$evidenceHashRef);
return $evidenceHashRef;
}
sub getGenomeSequence_plantgdb{
# uses html page for retrieving sequence
my $link = "http://www.plantgdb.org/PlantGDB-cgi/search/display.cgi?Action=FASTA&hit_names=$PRM->{chr}";
my $seq = get($link);
$seq =~ s/.+?<pre>>.+?\n//is; # remove all html up to defline
$seq =~ s/<\/pre>.+//is; # remove trailing html
$seq =~ s/\W//sg; # remove all line breaks
$seq = substr($seq,$PRM->{start}-1,$PRM->{end}-1); # takes substring of GSS sequence
return $seq;
}
sub getGenomeSequence_xgdb{
init_xgdb();
my $seq;
my $link = "$GV->{rootPATH}$GV->{ucaSSIpath}/returnFASTA.pl?db=$GV->{blastDB}&dbid=$GV->{dbid}&hits=$PRM->{chr}:$PRM->{start}:$PRM->{end}";
print STDERR $link;
my $seq = get($link);
$seq =~ s/.+?<pre>>.+?\n//is; # remove all html up to defline
$seq =~ s/<\/pre>.+//is; # remove trailing html
$seq =~ s/\W//sg; # remove all line breaks
#$seq = substr($seq,$PRM->{start}-1,$PRM->{end}-1);
return $seq;
#open(SR,$DBver[$dbid]->{seqFILE}) || die "UCAimage cannot open genome sequence file";
#seek(SR,($DBver[$dbid]->{genomeST}->[($pc - 1)] + $pl - 1),0);
#read(SR,$seqs,$pr - $pl + 1);
#close(SR);
# return $seq;
}
sub getImageMap_plantgdb{
# returns image html + image map + scale variable
my $link = "http://www.plantgdb.org/search/display/GSScontig_Display.php?UCA=0&Display_GSSmem=0&Display_EST=1&Display_ProteinAGS=0&Display_Protein=1&startPos=$PRM->{start}&endPos=$PRM->{end}&Seq_ID=$PRM->{chr}&imageWidth=$PRM->{imgWidth}&UCA=1&username=$PRM->{USERid}";
# needs to return $PRM->{GenomeSequence} and scale information
my $imagelinkALL = get("${link}&imagemapFlag=1");
$imagelinkALL .= "<img src=\"${link}&imageFlag=1\" border=0 useMap=\"#GSScontig_GSS\">";
return $imagelinkALL;
}
sub getImageMap_xgdb{
# returns image html + image map + scale variable
init_xgdb();
my $link = $GV->{rootPATH}.$GV->{ucaSSIpath}."UCAimage.pl?l_pos=$PRM->{start}&r_pos=$PRM->{end}&dbid=$GV->{dbid}&imgWidth=$PRM->{imgWidth}";
$link .= ($GV->{altCONTEXT} eq "BAC") ? "&gseg_gi=$PRM->{chr}&altCONTEXT=$GV->{altCONTEXT}" : "&chr=$PRM->{chr}";
print STDERR $link;
my $imagelinkALL = get("${link}");
return $imagelinkALL;
}
sub getImageMap_das{
do 'das/makeImage.pl';
my $imagelinkALL = &returnImageMap();
# add to global parameter
$imagelinkALL .= "<script>var GenomeSequence = '$PRM->{GenomeSequence}';</script>";
return $imagelinkALL;
}
sub getScale_das{
my $zeroPos = int($PRM->{start});
my $StartX=10;
my $Margin=10;
my $imgWidth = $PRM->{imgWidth};
#my $zeroPos = int($start); # change to local
my $seqLen = $PRM->{end} - $zeroPos + 1;
my $ratio=$seqLen/($PRM->{imgWidth}-2*$Margin);
my $rulerLen=$seqLen/$ratio;
my $scale=$seqLen/$rulerLen;
$scale = sprintf("%.2f", $scale);
return ($scale,$zeroPos,$StartX,$Margin); # [base to pixel scale] , [base position of start of graphic], [left padding white space in graphic
}
sub getScale_xgdb{
my $zeroPos = int($PRM->{start});
my $StartX=20;
my $Margin=10;
my $imgWidth = $PRM->{imgWidth};
my $unit=1000;
my $len=$PRM->{end}-$PRM->{start}+1;
if($len<1000){
$unit=10;
}elsif($len<10000){
$unit=100;
}
#my $zeroPos = int($start); # change to local
my $seqLen = $PRM->{end} - $zeroPos + 1;
my $ratio=$seqLen/($PRM->{imgWidth}-$StartX-2*$Margin);
my $rulerLen=$seqLen/$ratio;
my $scale=$seqLen/$rulerLen;
$scale = sprintf("%.2f", $scale);
return ($scale,$zeroPos,$StartX,$Margin); # [base to pixel scale] , [base position of start of graphic], [left padding white space in graphic]
}
sub getScale_plantgdb{
my $zeroPos = int($PRM->{start});
my $StartX=35;
my $Margin=35;
my $imgWidth = $PRM->{imgWidth};
my $zeroPos = $PRM->{start};
my $seqLen =$PRM->{end} - $zeroPos + 1;
my $scale=$seqLen/($PRM->{imgWidth}-$StartX-$Margin);
$scale = sprintf("%.2f", $scale);
return ($scale,$zeroPos,$StartX,$Margin);
}
########################################
### End Source Data Functions
########################################
########################################
### Database Regulatory Functions
########################################
sub getUserId_xgdb{
my @sessionCookie = cookie($GV->{SessCookieName}); # reads session id from cookie and accesses php session variables
my $id = $sessionCookie[0];
print STDERR "id=$id\n";
if ($id ne ""){
my $session = PHP::Session->new($id, { create => 1,save_path => $GV->{session_path} });
if ($session->is_registered($GV->{SessLoginParam})){
my $USERid = $session->get($GV->{SessLoginParam});
print STDERR "U=$USERid";
return $USERid;
}else{
$session->destroy;
return;
}
}
return;
}
sub getUserId_das{
return "anonymous";
}
sub getAdminOwnership{
# not checked
# find annotations currently checked out by administrator, returns hash{geneid} = admin who checked it out
my $sql = "select uid as uid, max(checked_out_date) as maxDate from admin_session where dbName='$GV->{dbTitle}' group by uid";
my $latestREF = $GV->{LDBH}->selectall_hashref($sql,'uid');
my $expire_secs = 60*30;
my %ownedHash = ();
for my $k (keys %$latestREF){
my $sql = "select uid,USERid,geneName from admin_session where uid = '$k' and checked_out_date = '$$latestREF{$k}{'maxDate'}' and (UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(checked_out_date)) < $expire_secs and dbName = '$GV->{dbTitle}' and returned = 0";
my $oREF = $GV->{LDBH}->selectall_hashref($sql,'uid');
if (keys(%$oREF)){
$ownedHash{$k} = $$oREF{$k}{'USERid'};
}
}
return \%ownedHash;
}
sub adminAnnotation{
return;
}
sub blank{
}
sub getUserGroup{
# distinguish admin and other groups
my $sql = "select ACCgroup from users where USERid = '$PRM->{USERid}'";
my @ref = $GV->{LDBH}->selectrow_array($sql);
if ($ref[0] eq "ADMIN"){
return 1;
}else{
return 0;
}
}
sub addUserAnnotation{
my ($stat) = @_;
my $info = $PRM->{info};
my $strand = ($info =~ /^comp/)? 'r': 'f';
my ($UAstart) = $info =~ /^[^\d]+(\d+)/;
my ($UAend) = $info =~ /(\d+)\)+$/;
$PRM->{mRNAseq} =~ s/\s//g; # no whitespace for sequence entries
$PRM->{proteinseq} =~ s/\s//g;
# print STDERR "$PRM->{USERid}\n";
my $sql = "insert into user_gene_annotation (USERid,geneId,chr,strand,l_pos,r_pos,gene_structure,description,CDSstart,CDSstop,proteinId,geneAliases,proteinAliases,status,modDate,evidence,annotation_type,mRNAseq,proteinseq,GSeqEdits,organism)";
$sql .= " VALUES (\"$PRM->{USERid}\",\"$PRM->{UCAannid}\",\"$PRM->{chr}\",'$strand',$UAstart,$UAend,\"$PRM->{info}\",\"$PRM->{desc}\",'$PRM->{cds_start}','$PRM->{cds_end}',\"$PRM->{prod}\",\"$PRM->{geneAlias}\",\"$PRM->{protAlias}\",'$stat',NOW(),\"$PRM->{Esource}\",\"$PRM->{annotation_type}\",\"$PRM->{mRNAseq}\",\"$PRM->{proteinseq}\",\"$PRM->{GSeqEdits}\",\"$GV->{specieName}\");";
my $sth = $GV->{ADBH}->do($sql) or return 0; # returns 1 if successful update, 0 if unsuccessful, # geneId is unique index in database
$sql = "select last_insert_id();";
my ($id) = $GV->{ADBH}->selectrow_array($sql);
$PRM->{uid} = $id;
if ($stat eq "aREVIEW"){
AdminNotify();
}
return 1;
}
sub removeAnnotation{
my ($USERid, $uid) = @_;
if (($PRM->{owner} eq $PRM->{USERid})||( getUserGroup($PRM->{USERid}) )){ # if owned or isAdmin
my $sql = "delete from user_gene_annotation where uid = '$PRM->{uid}'";
my $sth = $GV->{ADBH}->do($sql);
return 1;
}
bailOut("Delete action not permitted");
}
sub updateAnnotation{
my ($stat) = @_;
my $strand = ($PRM->{info} =~ /^comp/)? 'r': 'f';
my $info = $PRM->{info};
my ($UAstart) = $info =~ /^[^\d]+(\d+)/;
my ($UAend) = $info =~ /(\d+)\)+$/;
$PRM->{mRNAseq} =~ s/\s//g; # no whitespace for sequence entries
$PRM->{proteinseq} =~ s/\s//g;
my $sql = "update user_gene_annotation set USERid = \"".(($PRM->{owner})?$PRM->{owner}:$PRM->{USERid})."\" ,geneId = '$PRM->{UCAannid}', chr =\"$PRM->{chr}\", strand = \"$strand\",l_pos = $UAstart, r_pos = $UAend, gene_structure = \"$PRM->{info}\", description = \"$PRM->{desc}\", CDSstart = \"$PRM->{cds_start}\", CDSstop = \"$PRM->{cds_end}\", proteinId = \"$PRM->{prod}\", geneAliases = \"$PRM->{geneAlias}\", proteinAliases = \"$PRM->{protAlias}\", status = \"$stat\", annotation_type = \"$PRM->{annotation_type}\", mRNAseq = \"$PRM->{mRNAseq}\", proteinseq =\"$PRM->{proteinseq}\", modDate = NOW(), GSeqEdits = \"$PRM->{GSeqEdits}\",evidence = \"$PRM->{Esource}\" where uid = $PRM->{uid}";
my $sth = $GV->{ADBH}->do($sql) or return 0;
if ($stat eq "aREVIEW"){
AdminNotify();
}
return 1; # returns 1 if successful update, 0 if unsuccessful, # geneId is unique index in database
}
sub loadUCA {
# loads UCA values from database
my $ucaQuery=qq{
select uid,USERid,geneId,proteinId,chr,l_pos,r_pos,gene_structure,description,geneAliases,proteinAliases,CDSstart,CDSstop,status,DATE_FORMAT(modDate,'\%W, \%M \%d, \%Y'),DATE_FORMAT(modDate,'\%r'),evidence,mRNAseq,proteinseq,GSeqEdits,organism
from user_gene_annotation
where uid = \"$PRM->{uid}\"
};
my @arr = $GV->{ADBH}->selectrow_array($ucaQuery);
if ( $#arr < 0 ){
return 0;
}
my ($uid,$USERid,$geneId,$proteinId,$chr,$l_pos,$r_pos,$gene_structure,$desc,$geneAliases,$proteinAliases,$cds_start,$cds_end,$status,$modDate,$modTime,$Esource,$mRNA,$protein,$GSeqEdits,$organism) = @arr;
$PRM->{owner} = $USERid;
$PRM->{uid} = $uid;
$PRM->{UCAannid} = $geneId;
$PRM->{chr} = $chr;
$PRM->{info} = $gene_structure;
$PRM->{desc} = $desc;
$PRM->{Esource} = $Esource;
$PRM->{geneAlias} = $geneAliases;
$PRM->{protAlias} = $proteinAliases;
$PRM->{prod} = $proteinId;
$PRM->{modifyState} = $status;
$PRM->{modDate} = $modDate;
$PRM->{modTime} = $modTime;
$PRM->{status} = $status;
($PRM->{start},$PRM->{end}) = $gene_structure =~ /\((\d+).+?(\d+)\)/;
$PRM->{cds_start} = $cds_start;
$PRM->{cds_end} = $cds_end;
$PRM->{mRNAseq} = $mRNA;
$PRM->{proteinseq} = $protein;
$PRM->{GSeqEdits} = $GSeqEdits;
$PRM->{specieName} = $organism;
return 1;
}
sub extraAdminSubmit{
# sends email to Annotator and Administrator
my $AdminEmail = $GV->{AdminEmail};
my $emailTXT = $PRM->{emailTXT};
my $sql = "select fullname,email from users where USERid = '$PRM->{owner}' ";
my $ref = $GV->{LDBH}->selectall_arrayref($sql);
my $fullname = $ref->[0]->[0];
my $userEmail = $ref->[0]->[1];
if ( ($userEmail =~ /(.+?)\@(.+)/) && ($AdminEmail =~ /(.+?)\@(.+)/ ) ){ # email address check, send will crash if email address is not in correct format
open(MAIL, "| /usr/sbin/sendmail -t");
my $mailTXT .= "To: $userEmail\n";
$mailTXT .= "CC: $AdminEmail\n";
$mailTXT .= "From: $AdminEmail\n";
$mailTXT .= "Subject: Accepted User Contributed Annotation at $GV->{dbTitle}\n\n";
$mailTXT .= "Dear $fullname, \n\n";
$mailTXT .= "Your annotation $PRM->{UCAannid} has been ";
$mailTXT .= ($PRM->{status} eq "ACCEPTED") ? "accepted.\n\n" : "rejected. \n\n";
$mailTXT .= "Administrator comments:\n-------------------------------\n $emailTXT \n-------------------------------\n";
# $mailTXT .= "Gene Annotation: <a href=''>$PRM->{UCAannid}</a>\n";
$mailTXT .= "Region: ".(&{$GV->{GenomeContextLinkFunction}}($PRM->{chr},$PRM->{start},$PRM->{end}))."\n";
$mailTXT .= "Thank you for submitting a gene annotation to the $GV->{specieName} at $GV->{dbTitle} community.\n\n\n".$GV->{HTMLpath};
print STDERR $mailTXT;
print MAIL $mailTXT;
close(MAIL);
} # end if valid email address
return;
}
sub AdminNotify{
# sends email to Annotator and Administrator
my $AdminEmail = $GV->{AdminEmail};
my $emailTXT = $PRM->{emailTXT};
print STDERR "open";
if ( ($AdminEmail =~ /(.+?)\@(.+)/ ) ){ # email address check, send will crash if email address is not in correct format
open(MAIL, "| /usr/sbin/sendmail -t");
my $mailTXT .= "To: $AdminEmail\n";
$mailTXT .= "From: $AdminEmail\n";
$mailTXT .= "Subject: New User Contributed Annotation at $GV->{dbTitle} to be Reviewed\n\n";
$mailTXT .= "Administrators,\n Please review this new User Contributed Annotation, $PRM->{UCAannid}, at $GV->{dbTitle}, submitted by $PRM->{USERid}.\n";
$mailTXT .= "Region: ".(&{$GV->{GenomeContextLinkFunction}}($PRM->{chr},$PRM->{start},$PRM->{end}))."\n";
$mailTXT .= "\n\nAdministration Checkout (login required):\n http://www.plantgdb.org$GV->{CGIPATH}AdminAnnotation.pl";
print STDERR "he ".$mailTXT;
print MAIL $mailTXT;
close(MAIL);
} # end if valid email address
return;
}
sub printEditLink{
# add init function to establish global log in parameters?
my $link = ($PRM->{USERid} eq $PRM->{owner}) ? "<a href='$GV->{CGIPATH}AnnotationTool.pl?uid=$PRM->{uid}$GV->{tmpLinkParam}' target=_blank>edit</a>" : "";
}
sub printAnnotation{
my $info = $PRM->{info};
my $strand = ($info =~ /^comp/)? 'r': 'f';
my ($UAstart) = $info =~ /^[^\d]+(\d+)/;
my ($UAend) = $info =~ /(\d+)\)+$/;
my $modifyDate = strftime( "%A, %B %d, %Y",localtime(time));
my $pcr = ($PRM->{cds_start}) ? "$PRM->{cds_start}-$PRM->{cds_end}" : "";
my $org = $PRM->{specieName} || $GV->{speciesName};
my $txt =<<END_TXT;
Annotation Id:
$PRM->{UCAannid}
USER Id:
$PRM->{USERid}
Modify Date:
$modifyDate
Organism:
$org
Chromosome/Genome Segment:
$PRM->{chr}
Strand:
$strand
Left Position:
$UAstart
Right Position:
$UAend
Gene Structure:
$PRM->{info}
Protein Coding Region:
$pcr
mRNA sequence:
$PRM->{mRNAseq}
Protein Sequence:
$PRM->{proteinseq}
Description:
$PRM->{desc}
Exon Origins:
[start][stop][source of exon][score][database name][parent unique Id][parent name][link to parent]
$PRM->{Esource}
Putative Protein Product:
$PRM->{prod}
Gene Aliases:
$PRM->{geneAliases}
Protein Aliases:
$PRM->{proteinAliases}
Genome Sequence Edits:
$PRM->{GSeqEdits}
END_TXT
return $txt;
}
sub printDetail{
my $info = $PRM->{info};
my $strand = ($info =~ /^comp/)? 'r': 'f';
my ($UAstart) = $info =~ /^[^\d]+(\d+)/;
my ($UAend) = $info =~ /(\d+)\)+$/;
my $modifyDate = ($PRM->{modDate}) ? "$PRM->{modDate} $PRM->{modTime}": strftime( "%A, %B %d, %Y",localtime(time));
my $pcr = ($PRM->{cds_start} > 0) ? "$PRM->{cds_start}-$PRM->{cds_end}" : "";
my $esource_links = $PRM->{Esource};
$esource_links =~ s/<newline>/\n/g;
# $esource_links =~ s/(http:.+?)([<\s])/<a href="$1">$1<\/a>$2/g;
$esource_links =~ s/(http:\S+)/<a href="$1">$1<\/a> /g;
my $org = $PRM->{specieName} || $GV->{speciesName};
my $txt =<<END_TXT;
<b>Annotation Id:</b>
$PRM->{UCAannid}
<b>USER Id:</b>
$PRM->{owner}
<b>Last Modified:</b>
$modifyDate
<b>Organism</b>
$org
<b>Chromosome / Genome Segment:</b>
$PRM->{chr}
<b>Strand:</b>
$strand
<b>Left Position:</b>
$UAstart
<b>Right Position:</b>
$UAend
<b>Gene Structure:</b>
<textarea id=info rows=2 cols=40>
$PRM->{info}
</textarea>
<b>Protein Coding Region:</b>
$pcr
<b>mRNA sequence:</b>
<textarea id="mRNAseq" rows=3 cols=45>
$PRM->{mRNAseq}
</textarea>
<b>Protein Sequence:</b>
<textarea id="proteinseq" rows=3 cols=45>
$PRM->{proteinseq}
</textarea>
<b>Description:</b>
$PRM->{desc}
<b>Exon Origins:</b>
[start][stop][source of exon][score][database name][parent unique Id][parent name][link to parent]
<div style="height=100px;width=500px;overflow:auto; border:thin black solid;text-align:left">
$esource_links
</div>
<b>Putative Protein Product:</b>
$PRM->{prod}
<b>Gene Aliases:</b>
$PRM->{geneAliases}
<b>Protein Aliases:</b>
$PRM->{proteinAliases}
<b>Genome Sequence Edits:</b>
$PRM->{GSeqEdits}
END_TXT
return $txt;
}
########################################
### end Database Regulatory Functions
########################################
####################################################################
# General Functions
####################################################################
my ($CHR_SELECT_BOX,$RangeChrFIELD);
sub getExons{
# adds array of evidence to evidence hash
my ($ref,$evidenceHashRef) = @_; #[reference to array of evidence], [current evidenceHash reference], [type of evidence]
my %eH = %$evidenceHashRef;
for (my $i=0;$i<scalar(@{$ref});$i++){
my ($dbName,$exon_method,$seqName,$seqId,$exon_start,$exon_stop,$exon_score,$exon_num,$exon_link) = @{$ref->[$i]};
if ((min($exon_start,$exon_stop) >= $zeroPos)&&(max($exon_start,$exon_stop) <= $PRM->{end} )){ # exons within window
my $eKey = "$exon_start $exon_stop"; # two spaces for exon key
if ($eH{$eKey}){
$eH{$eKey}->{maxScore} = ($exon_score > $eH{$eKey}->{maxScore}) ? $exon_score : $eH{$eKey}->{maxScore};
$eH{$eKey}->{members}->{"$seqId"} = {
name => $seqName,
score => $exon_score,
number => $exon_num, # not used so far
method => $exon_method,
link => $exon_link,
dbName => $dbName
};
}else{
$eH{$eKey} = {
maxScore => $exon_score,
members => {
"$seqId"=>{
name => $seqName,
score => $exon_score,
number => $exon_num,
method => $exon_method,
link => $exon_link,
dbName => $dbName
}
}
};
}
} # end if within window
} # end for
return \%eH;
}
sub getEvidenceTable{
my ($evidenceHashref) = @_;
my %evidenceHash = %$evidenceHashref;
my $groupcount = 1;
my @evidencelist = sort by_coord keys %evidenceHash;
my @cellWidths = ('10px','120px','45px','210px');
my $etableJscript = "";
my $eTable = "<table style='border:gray solid thin;font-family:Arial;font-size:10px;padding:0px;spacing:0px;empty-cells:hide' width='420px'>
<tr><td colspan=4><font class=s2>Evidence Table<\/font>    <input class=utButton type=button onclick='clickHideExons();' value='only display selected exons' id='hideButton'><\/td><\/tr>
<tr><td width='$cellWidths[0]'><\/td><td align=center width='$cellWidths[1]'>Exon Coordinates</td><td width='$cellWidths[2]'>Score<\/td><td width='$cellWidths[3]' align=center>Evidence supporting exon<\/td><\/tr>
<tr><td colspan=4>
<div id='eTable' style='position=relative;top:0;left:0;width:420px;height:$GV->{eTableHeight}px;overflow:auto'>
<table width='400px' !class=mainTable style='spacing:0px;padding:0px;border:0px;margin:0px;background:lightskyblue'>";
my $memberseqs = "";
my %evidenceHashC = %evidenceHash;
foreach my $k (@evidencelist){ # each group of evidence table
my ($groupMin, $groupMax);
if ($evidenceHash{$k}){
my @coord = split / /, $k;
my $rowcolor = ($groupcount % 2) ? "#ededb1":"#CCCC99";
$groupMin = min($coord[0],$coord[1]);
$groupMax = max($coord[0],$coord[1]);
$eTable .= "\n\n\n<tr style='padding:0px;spacing:0px;' id=\"frow".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."\"><td valign=middle style='width:$cellWidths[0];font-size:10px;'>$groupcount</td><td>
<table id=\"table".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."\" style=\"background:lavender;padding:0px;spacing:0px;text-align;left;font-family:Arial;font-size:10px;border:thin gray solid;width:".($cellWidths[1] + $cellWidths[2] + $cellWidths[3])."\"><tr id=\"".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."\"><td width='$cellWidths[1]'><input type=radio name='e$groupcount' value='".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."' onClick='selectradio($groupcount,\"".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."\",1);'";
$eTable .= ($PRM->{info} =~ /$coord[1]\.\.$coord[0]/ || $PRM->{info} =~ /$coord[0]\.\.$coord[1]/) ? " checked=checked " : "";
$eTable .= ">".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."<\/td><td width='$cellWidths[2]'>".$evidenceHash{$k}->{maxScore}.'</td><td width="$cellWidths[3]">'.&evidence_links($evidenceHash{$k}->{members})."<\/td><\/tr>";
#$etableJscript .= "eTableExons['".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."'] = '".&TypeList(\%evidenceHash,$k)."';\n";
$etableJscript .= &TypeList(\%evidenceHash,$k);
my $InmembersHR = $evidenceHash{$k}->{members};
delete $evidenceHash{$k};
delete $evidenceHashC{$k};
my ($InmembersHR2) ; # tmp variable should be deleted!
($groupMin,$groupMax,$InmembersHR2) = GroupExon(min($coord[0],$coord[1]),max($coord[0],$coord[1]),$InmembersHR2,\%evidenceHashC); # gets min and max coordinates for exon group
my @evidencelist2 = sort by_coord keys %evidenceHash;
foreach my $k2 (@evidencelist2){
if ($evidenceHash{$k2}){
my @coord2 = split / /, $k2;
if ( ($groupMin <= max($coord2[0],$coord2[1])) and ($groupMax >= min($coord2[0],$coord2[1])) and ( SeqShare($InmembersHR,$evidenceHash{$k2}->{members}) ) ){
$eTable .= "<tr id=\"".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."\"><td width='$cellWidths[1]'><input type=radio name='e$groupcount' value='".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."' onClick='selectradio($groupcount,\"".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."\",1);'";
$eTable .= ">".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."<\/td><td width='$cellWidths[2]'>".$evidenceHash{$k2}->{maxScore}.'</td><td width="$cellWidths[3]">'.&evidence_links($evidenceHash{$k2}->{members})."<\/td><\/tr>";
#test line for above # $eTable .= ">"."keys ".keys(%$InmembersHR)." $InmembersHR ".join(",",keys(%{$InmembersHR}))."<br> ".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."<\/td><td>".$evidenceHash{$k2}->{maxScore}.'</td><td>'.&evidence_links($evidenceHash{$k2}->{members})."<\/td><\/tr>";
AddSeqs($InmembersHR,$evidenceHash{$k2}->{members});
#$etableJscript .= "eTableExons['".min($coord2[0],$coord2[1])." ".max($coord2[0],$coord2[1])."'] = '".&TypeList(\%evidenceHash,$k2)."';\n";
$etableJscript .= &TypeList(\%evidenceHash,$k2);
delete $evidenceHash{$k2};
delete $evidenceHashC{$k2};
}
}
}
$eTable .= "<\/table><\/td><\/tr>";
$groupMin = $groupMax = "";
$groupcount++;
}
}
$eTable .= '</table></div></td></tr></table>';
$eTable .= "<script>var groupAmt = $groupcount;<\/script>";
return ($eTable,$etableJscript);
}
sub GroupExon{
# tiles out boundaries of exon group, returns members of group and boundaries
my ($groupMin, $groupMax, $membersHR, $eHashRefT) = @_; #,$evidenceHashRef) = @_;
my %eHashT = %{$eHashRefT};
for my $k (keys %eHashT){
my @coord = split / /, $k;
# if ( ($groupMin <= max($coord[0],$coord[1])) and ($groupMax >= min($coord[0],$coord[1])) and &SeqShare($membersHR,$eHashT{$k}->{members}) ){
if ( ($groupMin <= max($coord[0],$coord[1])) and ($groupMax >= min($coord[0],$coord[1])) and &SeqShare($membersHR,$eHashT{$k}->{members}) ){
# $membersHR = &SeqShare($membersHR,$eHashT{$k}->{members});
AddSeqs($membersHR,$eHashT{$k}->{members});
$groupMin = min($groupMin,min($coord[0],$coord[1]));
$groupMax = max($groupMax,max($coord[0],$coord[1]));
delete $eHashT{$k};
}
}
if ( keys(%eHashT) == keys(%$eHashRefT) ){ # return if range gets all overlapping exons
return ($groupMin,$groupMax,$membersHR);
}else{ # recurse to expand range
($groupMin,$groupMax,$membersHR) = GroupExon($groupMin,$groupMax,$membersHR,\%eHashT);
}
}
sub evidence_links{
# links for evidence, in evidence table
my ($membersHR) = @_;
my $link_str;
for my $k (keys %$membersHR){
$link_str .= "<a href='$membersHR->{$k}->{link}' target='_blank'>$k</a> \n";
}
return $link_str;
}
sub SeqShare2{
# checks for member duplicity within an exon variant group, else returns list of merged members
my ($membersHR,$newmembersHR) = @_;
for my $k (keys %$newmembersHR){
if ($membersHR->{$k}){
#print STDERR "SeqShare $k\n";
my %tmp = ();
return \%tmp;
}
$membersHR->{$k} = $newmembersHR->{$k};
}
return $membersHR;
}
sub SeqShare{
# checks for member duplicity within an exon variant group, else returns list of merged members
my ($membersHR,$newmembersHR) = @_;
for my $k (keys %$newmembersHR){
if ($membersHR->{$k}){
my %tmp = ();
return 0;
}
}
return 1;
}
sub AddSeqs{
my ($membersHR,$newmembersHR) = @_;
for my $k (keys %$newmembersHR){
$membersHR->{$k} = $newmembersHR->{$k};
}
return;
}
sub MemSeq(){
my ($memberseqs, $newlist) = @_;
my @members = $newlist =~ /[^,]+/g;
my $c = 0;
for (my $i=0;$i<scalar(@members);$i++){
my $pat = $members[$i];
$pat =~ s/[\.\-]/\./g;
if ($memberseqs =~ /$pat/){
#print STDERR "$memberseqs $pat\n";
return 0;
}
}
return 1;
}
sub TypeList(){
# format of Esource; returns Esource line for an exon
my ($evidenceHR,$exon) = @_;
my @coord = split / /,$exon;
my $linkstr;
my $membersHR = $evidenceHR->{$exon}->{members};
for my $k (keys %{$membersHR}){
$linkstr .= ( ($linkstr ne "") ? "<newline>" : "").min($coord[0],$coord[1])." ".max($coord[0],$coord[1])." $membersHR->{$k}->{method} $membersHR->{$k}->{score} $membersHR->{$k}->{dbName} $k $membersHR->{$k}->{name} $membersHR->{$k}->{link}";
}
$linkstr = "eTableExons['".min($coord[0],$coord[1])." ".max($coord[0],$coord[1])."'] = \"$linkstr\";\n";
return $linkstr;
}
sub bailOut{
my ($txt) = @_;
print header();
$txt = ($txt eq "") ? "Improper parameters. Please enter the Gene Annotation Tool from a legitimate link." : $txt;
print $txt;
exit();
}
sub min{
my ($a,$b) = @_;
if ($a > $b){
return $b;
}else{
return $a;
}
}
sub max{
my ($a,$b) = @_;
if ($a < $b){
return $b;
}else{
return $a;
}
}
sub by_coord{
my @s1 = split / /,$a;
my @s2 = split / /,$b;
$s1[0] <=> $s2[0]
||
$s1[1] <=> $s2[1]
}
return 1;
See more files for this project here