diff -Nru libgo-perl-0.13/GO/Handlers/go_xref.pm libgo-perl-0.15/GO/Handlers/go_xref.pm --- libgo-perl-0.13/GO/Handlers/go_xref.pm 2010-05-12 19:16:49.000000000 +0000 +++ libgo-perl-0.15/GO/Handlers/go_xref.pm 2011-08-01 22:10:56.000000000 +0000 @@ -33,10 +33,11 @@ sub e_term { my $self = shift; my $t = shift; - my ($name, $id) = $t->lget(qw(name id)); - foreach ($t->get_dbxref) { - $self->printf("%s:%s > $name ; $acc\n", - $_->sget_db, $_->sget_acc); + my $name = $t->sget_name; + my $id = $t->sget_id; + foreach ($t->get_xref_analog) { + $self->printf("%s:%s > $name ; $id\n", + $_->sget_dbname, $_->sget_acc); } return; } diff -Nru libgo-perl-0.13/GO/Handlers/obj.pm libgo-perl-0.15/GO/Handlers/obj.pm --- libgo-perl-0.13/GO/Handlers/obj.pm 2010-05-31 19:18:37.000000000 +0000 +++ libgo-perl-0.15/GO/Handlers/obj.pm 2012-10-29 20:20:54.000000000 +0000 @@ -67,7 +67,6 @@ $parser = GO::Parser->new({handler=>'obj'}); $parser->parse(@files); my $graph = $parser->graph; - =cut @@ -80,7 +79,6 @@ *graph = \&g; *ontology = \&g; - sub apph { my $self = shift; $self->{apph} = shift if @_; @@ -280,13 +278,14 @@ $ldef->add_intersection($isect); } elsif ($k eq UNION_OF) { - $term->add_equivalent_to_union_of_term($v); + my $obj = stag_get($sn, TO); + $term->add_equivalent_to_union_of_term($obj); } elsif ($k eq DISJOINT_FROM) { $term->add_disjoint_from_term($v); } else { - warn("add method for $k"); +# warn("add method for $k"); $term->stag->add($k, $v); # $self->throw("don't know what to do with $k"); diff -Nru libgo-perl-0.13/GO/Handlers/obo_godb_flat.pm libgo-perl-0.15/GO/Handlers/obo_godb_flat.pm --- libgo-perl-0.13/GO/Handlers/obo_godb_flat.pm 2010-05-12 19:16:49.000000000 +0000 +++ libgo-perl-0.15/GO/Handlers/obo_godb_flat.pm 2011-08-01 22:10:56.000000000 +0000 @@ -150,12 +150,16 @@ # now evidence and evidence dbxref for my $ev ($assoc->get_evidence) { - + # prioritize PMIDs + my ($ref) = grep {$_ =~ /^PMID/} $ev->get_ref(); + if (!$ref) { + $ref = $ev->sget_ref; + } $self->dump_table('evidence', [ ++$self->{pk}{evidence}, $ev->sget_evcode, $self->{pk}{association}, - $self->get_dbxref_id($ev->sget_ref), # only the first one here + $self->get_dbxref_id($ref), # only the first one here $ev->sget_with || "", # put only the first one here, I dunno why ]); diff -Nru libgo-perl-0.13/GO/Handlers/prolog.pm libgo-perl-0.15/GO/Handlers/prolog.pm --- libgo-perl-0.13/GO/Handlers/prolog.pm 2010-08-02 01:45:00.000000000 +0000 +++ libgo-perl-0.15/GO/Handlers/prolog.pm 2013-05-24 05:46:32.000000000 +0000 @@ -22,7 +22,7 @@ $self->factq('metadata_db:idspace_uri'=>[$1,$2]); } if ($hdr->get_ontology) { - $self->factq('ontology'=>[$hdr->get_ontology]); + $self->factq('ontology'=>[$hdr->sget_ontology]); } foreach ($hdr->get_subsetdef) { my $id = $_->sget_id; @@ -39,6 +39,14 @@ foreach ($hdr->get_import) { $self->factq('ontol_db:import_directive'=>[$_]); } + foreach ($hdr->subnodes) { + my $n = $_->name; + if ($n =~ /^treat/) { + $n =~ s/\-/_/g; + my @vals = split(' ',$_->data); + $self->factq("ontol_db:$n"=>[@vals]); + } + } $self->nl; return; } @@ -60,7 +68,23 @@ my $name = $typedef->sget_name; $self->factq('metadata_db:entity_label', [$id, $name]) if $name; my @is_as = $typedef->get_is_a; - $self->rfactq($_, 'subclass', [$id, $_]) foreach @is_as; + foreach my $is_a (@is_as) { + if (ref($is_a)) { + my $gci_rel = $is_a->sget('@/gci_relation'); + if ($gci_rel) { + $self->rfactq($_, + 'gci_subclass', + [$id,$is_a->data, $gci_rel, $is_a->sget('@/gci_filler')]); + } + else { + $self->rfactq($_, 'subclass', [$id, $is_a->data]); + } + } + else { + $self->rfactq($_, 'subclass', [$id, $is_a]); + } + } + if ($ont) { $self->factq('metadata_db:entity_resource', [$id, $ont]); } @@ -175,10 +199,27 @@ #my @is_as = $term->findval_is_a; my @is_as = $term->get_is_a; - $self->rfactq($_,'subclass', [$id, ref($_) ? $_->get('.') : $_], $name_h->{$_}) foreach @is_as; + foreach my $is_a (@is_as) { + if (ref($is_a)) { + my $gci_rel = $is_a->sget('@/gci_relation'); + if ($gci_rel) { + $self->rfactq($_, + 'gci_subclass', + [$id,$is_a->get('.'), $gci_rel, $is_a->sget('@/gci_filler')]); + } + else { + #$self->rfactq($_,'subclass', [$id, ref($is_a) ? $_->get('.') : $_], $name_h->{$_}); + $self->rfactq($_, 'subclass', [$id, $is_a->get('.')], $name_h->{$_}); + } + } + else { + $self->rfactq($_,'subclass', [$id, $is_a], $name_h->{$is_a}); + } + } + my @equivs = $term->get_equivalent_to; - $self->rfactq($_, 'equivalent_class', [$id, $_]) foreach @equivs; + $self->rfactq($_, 'equivalent_class', [$id, ref($_) ? $_->get('.') : $_]) foreach @equivs; my @xp = $term->get_intersection_of; if (scalar(@xp) == 1) { @@ -232,10 +273,19 @@ foreach (@rels) { my @args = ($id, $_->get_type, convert_to_ref($_->get_to), map { convert_to_ref($_) } $_->get_additional_argument); - $self->rfactq($_, - 'restriction', - [@args], - $name_h->{$_->get_to}); + my $gci_rel = $_->sget('@/gci_relation'); + if ($gci_rel) { + $self->rfactq($_, + 'gci_restriction', + [@args, $gci_rel, $_->sget('@/gci_filler')], + $name_h->{$_->get_to}); + } + else { + $self->rfactq($_, + 'restriction', + [@args], + $name_h->{$_->get_to}); + } foreach my $cardp (qw(cardinality minCardinality maxCardinality)) { my $card = $_->sget('@/'.$cardp); if ($card) { @@ -420,6 +470,31 @@ } $self->fact($pred, [$aid,$id,'has_role',$term_acc]); + + my $aspect = $assoc->sget_aspect; + if ($aspect) { + if (!$self->{_written_aspect}) { + $self->{_written_aspect} = {}; + } + if (!$self->{_written_aspect}->{$term_acc}) { + $self->{_written_aspect}->{$term_acc} = 1; + my $ont = ''; + if ($aspect eq 'F') { + $ont = 'molecular_function'; + } + elsif ($aspect eq 'P') { + $ont = 'biological_process'; + } + elsif ($aspect eq 'C') { + $ont = 'cellular_component'; + } + if ($ont) { + $self->fact('metadata_db:entity_resource',[$term_acc,$ont]); + } + } + + } + my @evs = $assoc->get_evidence; my $ne=0; foreach my $ev (@evs) { @@ -525,9 +600,9 @@ sub e_instance { my ($self, $inst) = @_; my $id = $inst->get_id; - my $class = $inst->get_instance_of; $self->factq('inst', [$id]); - $self->factq('inst_of',[$id,$class]); + $self->factq('inst_of',[$id,$_]) + foreach $inst->get_instance_of; my $name = $inst->sget_name; $self->factq('metadata_db:entity_label', [$id, $name]) if $name; $self->export_tags($inst); diff -Nru libgo-perl-0.13/GO/IO/RDFXML.pm libgo-perl-0.15/GO/IO/RDFXML.pm --- libgo-perl-0.13/GO/IO/RDFXML.pm 2010-05-12 19:16:50.000000000 +0000 +++ libgo-perl-0.15/GO/IO/RDFXML.pm 2011-08-01 22:10:56.000000000 +0000 @@ -415,12 +415,13 @@ my $tag = shift; my $content = shift; - $self->{writer}->dataElement($tag, - $self->__strip_non_ascii($content)); - + $self->{writer}->dataElement($tag, $self->__strip_non_ascii($content)); } -sub __strip_non_ascii { + +## WARNING: it is though that this code does not correctly get all +## problem encoded characters; deprecated for __strip_non_ascii. +sub __strip_non_ascii_old { my $self = shift; my $string = shift; @@ -429,6 +430,40 @@ return $string; } + +## NOTE: taken from GO::Parsers::go_assoc_parser +## Meant to solve slippery encoding issues, see: +## "Re: [Software-group] RDF XML go full" +## As demonstrated in the code below, this does not actually remove unicode +## characters, but rather seems to force them down. +# push @INC, "/home/sjcarbon/local/src/svn/geneontology/go-dev/trunk/go-perl"; +# use GO::IO::RDFXML; +# use FileHandle; +# $out = new FileHandle(">-"); +# $x = GO::IO::RDFXML->new(-output=>$out); +# $foo = "asdf" +# $bar = "okテストay" +# print $x->__strip_non_ascii($foo); +# print $x->__strip_non_ascii($bar); +sub __strip_non_ascii { + my $self = shift; + $_ = shift; + + # UNICODE causes problems for XML and DB + # delete 8th bit + tr [\200-\377] + [\000-\177]; # see 'man perlop', section on tr/ + # weird ascii characters should be excluded + tr/\0-\10//d; # remove weird characters; ascii 0-8 + # preserve \11 (9 - tab) and \12 (10-linefeed) + tr/\13\14//d; # remove weird characters; 11,12 + # preserve \15 (13 - carriage return) + tr/\16-\37//d; # remove 14-31 (all rest before space) + tr/\177//d; # remove DEL character + + return $_; +} + sub __make_go_from_acc { my $self = shift; my $acc = shift; diff -Nru libgo-perl-0.13/GO/Metadata/Panther.pm libgo-perl-0.15/GO/Metadata/Panther.pm --- libgo-perl-0.13/GO/Metadata/Panther.pm 2010-07-08 16:58:58.000000000 +0000 +++ libgo-perl-0.15/GO/Metadata/Panther.pm 2011-08-01 22:10:56.000000000 +0000 @@ -2,12 +2,14 @@ use strict; use warnings; use Exporter; -use base qw/Exporter/; use Memoize; use List::Util qw/sum first/; use Data::Dumper; +use Carp; + +use base qw/GO::Metadata::UniProt::Species Exporter/; +our @EXPORT_OK = qw/panther_codes panther_all valid_panther_codes/; -our @EXPORT_OK = qw/@species/; =head1 NAME @@ -15,462 +17,315 @@ =head1 SYNOPSIS - use GO::Metadata::Panther qw/@species/; - - for my $species (@species) { - # do something - } - - -Or - use GO::Metadata::Panther; my $s = GO::Metadata::Panther->code('YEAST'); =head1 DESCRIPTION +Inherits functions from L. + Accesses information related to species in the Panther F file. This file can be fetched from: L -Each item in the exportable C<@species> array contains a hash -reference for each species. The items in that hash are: - -=over - -=item code - -A scalar or the UniProt species code. - -=item ncbi_taxa_id - -A scalar reference of NCBI taxa ids that items in the GO database -match. This should only be one id, but sometimes it's useful to scan -multiple. - -=back - -For a complete list of every UniProt species matched to a NCBI taxa -L - - - =cut -# These need to be in the order you wish to view them on the AmiGO -# dist png. -# -# For reference genomes, the first number is the ncbi_taxa_id list -# needs to be the reference in AmiGO::Aid::ReferenceGenome - +# Information needed but not provided by UniProt's speclist.txt file. -our @species = +our %species = ( # # A # - { # Anopheles gambiae - code => 'ANOGA', - ncbi_taxa_id => [ 7165 ], - prefer => [ 'Gene' ], - }, - - { # Arabidopsis thaliana - code => 'ARATH', - ncbi_taxa_id => [ 3702 ], - }, - - { # Aquifex aeolicus - code => 'AQUAE', - ncbi_taxa_id => [ 63363 ], - }, - - { # Ashbya gossypii ATCC 10895 - code => 'ASHGO', - ncbi_taxa_id => [ 33169 ], - }, + ANOGA => { prefer => [ qw/ENSEMBL UniProtKB/ ] }, + ARATH => { id_filter => sub { + if ($_[0] eq 'gene') { + return ('TAIR', "locus:$_[1]"); + } + return @_; + } + }, + AQUAE => {}, + ASHGO => { also_node => [ 284811 ] }, # # B # - { # Bacillus subtilis, - code => 'BACSU', - ncbi_taxa_id => [ 1423 ], - }, - - { # Bacteroides thetaiotaomicron - code => 'BACTN', - ncbi_taxa_id => [ 818 ], - }, - - { # Bos taurus - code => 'BOVIN', - ncbi_taxa_id => [ 9913 ], - }, - - { # Bradyrhizobium japonicum - code => 'BRAJA', - # matches the two UniProtKB items in GO - ncbi_taxa_id => [ 375 ], - }, + BACSU => {}, + BACTN => {}, + BOVIN => { prefer => [ 'UniProtKB', 'ENSEMBL' ] }, + BRAJA => {}, # # C # - { # Caenorhabditis briggsae - code => 'CAEBR', - ncbi_taxa_id => [ 6238 ], - }, - - { # Caenorhabditis elegans - code => 'CAEEL', - ncbi_taxa_id => [ 6239 ], - prefer => [ 'WB' ], - }, - - { # Canis lupus familiaris - code => 'CANFA', - ncbi_taxa_id => [ 9615 ], - prefer => [ 'UniProtKB' ], - }, - - { # Chlamydia trachomatis - code => 'CHLTA', - ncbi_taxa_id => [ 315277 ], - - }, - - { # Chlamydomonas reinhardtii - code => 'CHLRE', - ncbi_taxa_id => [ 3055 ], - # found 11 of them - }, - - { - code => 'CHLAA', - ncbi_taxa_id => [ 324602 ], - }, - - { # Ciona intestinalis - code => 'CIOIN', - ncbi_taxa_id => [ 7719 ], - }, + CAEBR => {}, + CAEEL => { prefer => [ 'WB' ], + id_filter => sub { + $_[0] = 'WB' if ($_[1] =~ m/^WB/); + return @_; + } + }, + CANFA => { prefer => [ 'ENSEMBL' ] }, + CHLTA => {}, + CHLRE => {}, + CHLAA => {}, + CIOIN => { prefer => [ 'ENSEMBL' ] }, # # D # - { # Danio rerio - code => 'DANRE', - ncbi_taxa_id => [ 7955 ], - prefer => [ 'ZFIN' ], - }, - - { # Deinococcus radiodurans - code => 'DEIRA', - ncbi_taxa_id => [ 1299 ], - }, - - { # Dictyostelium discoideum - code => 'DICDI', - ncbi_taxa_id => [ 44689 ], - }, - - { # Drosophila melanogaster - code => 'DROME', - ncbi_taxa_id => [ 7227 ], - prefer => [ 'FB' ], - }, + DANRE => { prefer => [ 'ZFIN', 'ENSEMBL', 'UniProtKB' ] }, + DEIRA => {}, + DICDI => {}, + DROME => { prefer => [ 'FB' ], + id_filter => sub { + $_[0] = 'FB' if ($_[1] =~ m/^FB/); + return @_; + } + }, # # E # - { # Emericella nidulans - code => 'EMENI', - ncbi_taxa_id => [ 162425 ], - }, - - { # Entamoeba histolytica - code => 'ENTHI', - ncbi_taxa_id => [ 5759 ] - }, - - { # Escherichia coli - code => 'ECOLI', - ncbi_taxa_id => [ 83333, 511145 ], - prefer => [ 'EcoCyc' ], - }, + EMENI => {}, + ENTHI => {}, + ECOLI => { also_node => [ 562, 511145 ], + prefer => [ 'EcoCyc', 'UniProtKB' ] }, # # G # - { # Gallus gallus - code => 'CHICK', - ncbi_taxa_id => [ 9031 ], - prefer => [ 'UniProtKB' ], - }, - - { # Geobacter sulfurreducens - code => 'GEOSL', - ncbi_taxa_id => [ 35554 ], - }, - - { # Gloeobacter violaceus - code => 'GLOVI', - ncbi_taxa_id => [ 33072, 251221 ], - # 251221 is only here to match GLOVI|ENTREZ:2601616|UniProtKB - prefer => [ 'UniProtKB' ], - }, + CHICK => { prefer => [ 'UniProtKB', 'ENSEMBL', 'NCBI' ] }, + GEOSL => {}, + GLOVI => { also_node => [ 251221 ] }, + # # H # - { # Homo sapiens - code => 'HUMAN', - ncbi_taxa_id => [ 9606 ], - prefer => [ 'ENSEMBL', 'UniProtKB' ], - }, + HUMAN => { prefer => [ 'UniProtKB', 'ENSEMBL' ] }, # # L # - { # Leishmania major - code => 'LEIMA', - ncbi_taxa_id => [ 5664, 347515 ], - }, - - { # Leptospira interrogans - code => 'LEPIN', - # only gets one - ncbi_taxa_id => [ 173 ], - }, + LEIMA => { also_node => [ 347515 ] }, + LEPIN => {}, # # M # - { # Macaca mulatta - code => 'MACMU', - ncbi_taxa_id => [ 9544 ], - }, - - { # Methanosarcina acetivorans - code => 'METAC', - ncbi_taxa_id => [ 2214 ], - }, - - { # Monodelphis domestica - code => 'MONDO', - ncbi_taxa_id => [ 13616 ], - }, - - { # Mus musculus - code => 'MOUSE', - ncbi_taxa_id => [ 10090 ], - prefer => [ 'MGI' ], - }, + MACMU => { prefer => [ 'UniProtKB', 'ENSEMBL' ] }, + METAC => {}, + MONDO => { prefer => [ 'ENSEMBL' ] }, + MOUSE => { prefer => [ 'MGI', 'UniProtKB', 'ENSEMBL' ], + id_filter => sub { + if (($_[0] eq 'MGI') and ($_[1] !~ m/^MGI:/)) { + return ('MGI', "MGI:$_[1]"); + } + return @_; + } + }, # # N # - { # Neurospora crassa - code => 'NEUCR', - ncbi_taxa_id => [ 5141 ], - }, + NEUCR => {}, # # O # - { # Ornithorhynchus anatinus - code => 'ORNAN', - ncbi_taxa_id => [ 9258 ], - }, - - { # Oryza sativa - code => 'ORYSJ', - ncbi_taxa_id => [ 39947 ], - }, + ORNAN => { prefer => [ 'ENSEMBL' ] }, + ORYSJ => {}, # # P # - { # Pan troglodytes - code => 'PANTR', - ncbi_taxa_id => [ 9598 ], - }, - - { # Plasmodium yoelii - code => 'PLAYO', - ncbi_taxa_id => [ 73239 ], - }, - - { # Pseudomonas aeruginosa - code => 'PSEA7', - ncbi_taxa_id => [ 381754 ], - }, + PANTR => { prefer => [ 'ENSEMBL', 'UniProtKB' ] }, + PLAYO => {}, + PSEA7 => {}, # # R # - { # Rattus norvegicus - code => 'RAT', - ncbi_taxa_id => [ 10116 ], - prefer => [ 'RGD', 'UniProtKB' ], - }, + RAT => { prefer => [ 'RGD', 'UniProtKB', 'ENSEMBL' ] }, # # S # - { # Saccharomyces cerevisiae - code => 'YEAST', - ncbi_taxa_id => [ 4932 ], - }, - - { # Schizosaccharomyces pombe - code => 'SCHPO', - ncbi_taxa_id => [ 4896 ], - }, - - { # Streptomyces coelicolor - code => 'STRCO', - ncbi_taxa_id => [ 1902 ], - }, - - { # Strongylocentrotus purpuratus - code => 'STRPU', - ncbi_taxa_id => [ 7668 ], - }, - - { # Sulfolobus solfataricus - code => 'SULSO', - ncbi_taxa_id => [ 2287 ], - }, + YEAST => {}, + SCHPO => {}, + STRCO => {}, + STRPU => {}, + SULSO => {}, # # T # - { # Takifugu rubripes - code => 'FUGRU', # UniProt calls this: TAKRU - ncbi_taxa_id => [ 31033 ], - }, - - { # Tetrahymena thermophila - code => 'TETTH', - ncbi_taxa_id => [ 5911, 312017 ], - }, - - { # Thermotoga maritima - code => 'THEMA', - ncbi_taxa_id => [ 2336 ], - }, + FUGRU => { is => 'TAKRU' }, + TAKRU => { was => 'FUGRU', + prefer => [ 'ENSEMBL' ], + }, + TETTH => { also_node => [ 312017 ] }, + THEMA => {}, # # X # - { # Xenopus', '(Silurana) tropicalis - code => 'XENTR', - ncbi_taxa_id => [ 8364 ], - }, - + XENTR => { prefer => [ 'UniProtKB', 'ENSEMBL' ] }, ); +=head2 Exportable Subroutines +=over -=head2 Constructors +=item panther_codes() -The constructors scans C<@species> for the requested data and returns -the object that matches the data. Otherwise it returns a false false. +Returns the list of UniProt species codes that are used in Panther clusters. + +=cut +sub panther_codes{ + return map { + defined $species{$_}->{is} ? () : $_; + } keys %species; +} +sub codes{ + carp "Please use panther_codes() instead of codes()"; + panther_codes(@_); +} -=over -=item my $s = GO::Metadata::Panther->code(I) +=item GO::Metadata::Panther->panther_all() -Return an object filled with the species reference from the UniProtKB -species code. +Returns a list of C objects that are used in Panther clusters. =cut -memoize('code'); -sub code{ - my $class = shift; - my $code = shift; - - for my $species (@species) { - if ($species->{code} eq $code) { - return bless $species, $class; - } - } - return undef; +sub panther_all{ + my $c = shift; + return $c->new(panther_codes()); +} +sub all { + carp 'Please panther_all() instead if all()'; + return shift()->panther_all(@_); } -=item my $s = GO::Metadata::Panther->ncbi(I) +=item valid_codes(...) -Greate an object from the I. +Returns a true value in every argument is a UniProt species code used +in Panther cluster. Otherwise returns false. =cut -sub ncbi{ - my $class = shift; - my $ncbi = shift; - - for my $species (@species) { - if (first { - $ncbi == $_; - } @{ $species->{ncbi_taxa_id} }) { - return bless $species, $class; - } +sub valid_panther_codes{ + for my $code (@_) { + return undef if (!exists $species{$code}); } - return undef + return '1'; } -=back -=head2 Function +=back -Functions that can be used outside of the OO interface. +=head2 OO Function =over -=item GO::Metadata::Panther::codes() +=item GO::Metadata::Panther-Enew(...); -Returns a list of all UniProt species codes in C<@species>. +This basically hands things off to L's +new function. Populates that with other Panther/GO specific +information, and does some error correction. =cut -sub codes{ - return map { $_->{code} } @species; -} - -=item GO::Metadata::Panther::valid_codes(I) +our %_new_cache; +sub new{ + my $c = shift; + + my @have; + my @all = map { + if ($_new_cache{$_}) { + push @have, $_new_cache{$_}; + (); + } else { + $_; + } + } @_; -Send it a list of panther Unicode codes, returns true if they are all -present in C<@species>. Othewise returns false. + ########## + # Fix up also_node entries (see ECOLI) + @all = map { + my $all = $_; + my $out = $all; + if ($all =~ m/^\d+$/) { + BLA: + for my $code (keys %species) { + for my $node (@{ $species{$code}->{also_node} }) { + if ($all eq $node) { + $out = $code; + last BLA; + } + } + } + } + $out; + } @all; + # This bugs me + ########## + + @all = map { + if (!$_->ncbi_taxon_id()) { + warn 'Skipping unknown NCBI taxon ID, check: SELECT * FROM species WHERE ncbi_taxa_id=0'; + (); + } else { + $_; + } + } $c->SUPER::new(map { + if ($species{$_} && $species{$_}->{is}) { + warn "$_ -> $species{$_}->{is}"; + $species{$_}->{is}; + } else { + $_; + } + } @all) if (scalar @all); -=cut -sub valid_codes{ - return scalar(@_) == sum(map { - __PACKAGE__->code($_) ? 1 : 0; - } @_); -} + for (@all) { + if ($species{$_->code()}) { + while (my ($k,$v) = each %{ $species{$_->code} }) { + $_->{$k} = $v; + } + } else { + warn $_->code . ' Not a Panther family.'; + } + } -=back + for my $all (@all) { + $_new_cache{$all->{node}} = $all; + $_new_cache{$all->{code}} = $all; + } + push @all, @have; -=head2 OO Function + return undef if (0 == scalar @all); + return $all[0] if (1 == scalar @all); + return @all; +} -=over -=item $s->ncbi_ids() +=item $s->ncbi_taxa_ids() Returns the list of NCBI taxa identifiers associated with the UniProt species code. In a perfect word this will only every return one @@ -480,11 +335,62 @@ =cut sub ncbi_ids{ my $s = shift; - return @{ $s->{ncbi_taxa_id} }; + my @out = ($s->{node}); + push @out, @{ $s->{also_node} } if ($s->{also_node}); + return @out; } +=item $s->prefers() + +Returns a list of id types (generally to be populated in +C) in order of preference of use. If a null list, +we have never encountered a conflict that needed resolving. + +=cut +sub prefers{ + my $s = shift; + + if ($s->{prefer}) { + return @{ $s->{prefer} }; + } + return qw/UniProtKB/; +} + +# this is not fully in use. +sub reject{ + my $s = shift; + + if ($s->{reject}) { + return @{ $s->{reject} }; + } + return qw/GeneID/; +} + +# sub prefered{ +# my $s = shift; +# my $v = shift + +# return first { $v eq $_ } $s->preferes(); +# } + +sub id_filter{ + my $s = shift; + my ($k, $v) = (shift, shift); + $k = 'UniProtKB' if ($k =~ m/UniProt/i); + + if ($s->{id_filter}) { + return &{ $s->{id_filter} }($k, $v); + } + return ($k, $v); +} + + =back +=head2 SEE ALSO + +L + =head2 AUTHOR Sven Heinicke Esven@genomics.princeton.edu diff -Nru libgo-perl-0.13/GO/Parsers/go_assoc_parser.pm libgo-perl-0.15/GO/Parsers/go_assoc_parser.pm --- libgo-perl-0.13/GO/Parsers/go_assoc_parser.pm 2010-05-31 19:11:06.000000000 +0000 +++ libgo-perl-0.15/GO/Parsers/go_assoc_parser.pm 2011-08-01 22:10:55.000000000 +0000 @@ -405,9 +405,10 @@ # no longer checks for cardinality errors } + my @refs = split(/\|/, $ref); map { $self->event(REF, $_) - } split(/\|/, $ref); + } @refs; $self->end_event(EVIDENCE); #@last = @vals; @last = diff -Nru libgo-perl-0.13/GO/Parsers/ncbi_taxonomy_parser.pm libgo-perl-0.15/GO/Parsers/ncbi_taxonomy_parser.pm --- libgo-perl-0.13/GO/Parsers/ncbi_taxonomy_parser.pm 2010-05-12 19:16:49.000000000 +0000 +++ libgo-perl-0.15/GO/Parsers/ncbi_taxonomy_parser.pm 2011-12-13 06:55:08.000000000 +0000 @@ -78,6 +78,7 @@ $self->event(header=> [ ['default-namespace'=>'ncbi_taxonomy'], + ['ontology'=>'ncbitaxon'], [remark=>'autogenerated via GO::Parsers::ncbi_taxonomy_parser'], (map {[synonymtypedef=>[[id=>syn($_)],[name=>$_],[scope=>$synonymtypes{$_}->[0]]]]} keys %synonymtypes), ]); @@ -168,6 +169,8 @@ $s = $1; $xref =~ s/\s+/_/g; $xref =~ tr/\(\)//d; + $xref =~ tr/\[\]//d; + $xref =~ s@,@\\,@g; push(@xrefs, [dbxref=>[[acc=>$xref],[dbname=>"NCBITaxonRef"]]]); } $self->event(SYNONYM,[ diff -Nru libgo-perl-0.13/GO/Parsers/obo_text_parser.pm libgo-perl-0.15/GO/Parsers/obo_text_parser.pm --- libgo-perl-0.13/GO/Parsers/obo_text_parser.pm 2010-05-31 19:20:53.000000000 +0000 +++ libgo-perl-0.15/GO/Parsers/obo_text_parser.pm 2012-11-20 17:12:05.000000000 +0000 @@ -674,7 +674,7 @@ $self->parse_err("expected ) at end of genus. Got: $next_c followed by $diff_expr"); } } - elsif ($expr =~ /^([\w\:]+)\^(.*)/) { + elsif ($expr =~ /^([\w\:\.\-]+)\^(.*)/) { my $genus = $1; my $diff_expr = $2; my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr); @@ -683,7 +683,7 @@ @$diffs]]; return ($stag,$rest); } - elsif ($expr =~ /^([\w\:]+)(.*)/) { + elsif ($expr =~ /^([\w\:\.\-]+)(.*)/) { return ($1,$2); } else { @@ -718,6 +718,10 @@ $next_c = substr($rest,0,1); if ($next_c eq '^' || $next_c eq ',') { my ($next_diffs,$next_rest) = $self->parse_differentia_with_rest(substr($rest,1)); + if (!$next_diffs) { + $self->parse_err("problem parsing differentia: $rest. Expr: $term_expr"); + return ([$diff],$rest); + } return ([$diff,@$next_diffs],$next_rest); } elsif ($next_c eq '') { @@ -727,19 +731,19 @@ return ([$diff],$rest); } else { - $self->parse_err("expected ^ or ). Got: $next_c followed_by: $rest"); + $self->parse_err("expected ^ or ) in differentium. Got: $next_c followed_by: $rest. Expr: $term_expr"); } } else { - $self->parse_err("exprected ). Got: $next_c followed by: $rest"); + $self->parse_err("expected ) to close differentium. Got: $next_c followed by: $rest. Expr: $term_expr"); } } else { - $self->parse_err("expected ). Got: \"\""); + $self->parse_err("expected ). Got: \"\". Expr: $term_expr"); } } else { - $self->parse_err("expect relation(...). Got: $expr"); + $self->parse_err("expect relation(...). Got: $expr. "); } } diff -Nru libgo-perl-0.13/MANIFEST libgo-perl-0.15/MANIFEST --- libgo-perl-0.13/MANIFEST 2010-08-10 17:08:13.000000000 +0000 +++ libgo-perl-0.15/MANIFEST 2013-06-12 16:20:56.000000000 +0000 @@ -198,4 +198,5 @@ GO/xsl/oboxml_to_summary_table.xsl GO/xsl/owl_to_oboxml.xsl GO/xsl/text_html.xsl -META.yml Module meta-data (added by MakeMaker) +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff -Nru libgo-perl-0.13/META.json libgo-perl-0.15/META.json --- libgo-perl-0.13/META.json 1970-01-01 00:00:00.000000000 +0000 +++ libgo-perl-0.15/META.json 2013-06-12 16:20:56.000000000 +0000 @@ -0,0 +1,42 @@ +{ + "abstract" : "GO Perl", + "author" : [ + "Chris Mungall " + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.120921", + "license" : [ + "unknown" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "go-perl", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Data::Dumper" : "0", + "Data::Stag" : "0.07" + } + } + }, + "release_status" : "stable", + "version" : "0.15" +} diff -Nru libgo-perl-0.13/META.yml libgo-perl-0.15/META.yml --- libgo-perl-0.13/META.yml 2010-08-10 17:08:12.000000000 +0000 +++ libgo-perl-0.15/META.yml 2013-06-12 16:20:56.000000000 +0000 @@ -1,23 +1,23 @@ ---- #YAML:1.0 -name: go-perl -version: 0.13 -abstract: GO Perl +--- +abstract: 'GO Perl' author: - - Chris Mungall -license: unknown -distribution_type: module -configure_requires: - ExtUtils::MakeMaker: 0 + - 'Chris Mungall ' build_requires: - ExtUtils::MakeMaker: 0 -requires: - Data::Dumper: 0 - Data::Stag: 0.07 -no_index: - directory: - - t - - inc -generated_by: ExtUtils::MakeMaker version 6.50 + ExtUtils::MakeMaker: 0 +configure_requires: + ExtUtils::MakeMaker: 0 +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.120921' +license: unknown meta-spec: - url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: go-perl +no_index: + directory: + - t + - inc +requires: + Data::Dumper: 0 + Data::Stag: 0.07 +version: 0.15 diff -Nru libgo-perl-0.13/Makefile.PL libgo-perl-0.15/Makefile.PL --- libgo-perl-0.13/Makefile.PL 2010-05-12 19:16:50.000000000 +0000 +++ libgo-perl-0.15/Makefile.PL 2013-06-12 16:17:40.000000000 +0000 @@ -144,7 +144,8 @@ # directory for source XSLs; this only makes sense if go-perl # is a subdir of go-dev -XSL_SRC_DIR = ../xml/xsl +#XSL_SRC_DIR = ../xml/xsl +XSL_SRC_DIR = xsl # xsls which go-perl can use XSL = chadoxml_to_oboxml oboxml_filter oboxml_to_chadoxml oboxml_to_dig oboxml_to_godb_prestore oboxml_to_obotext oboxml_to_owl oboxml_to_simple_owl oboxml_to_racer owl_to_oboxml oboxml_to_summary_table ipr_to_oboxml oboxml_to_obd_prestore text_html diff -Nru libgo-perl-0.13/debian/README.Debian libgo-perl-0.15/debian/README.Debian --- libgo-perl-0.13/debian/README.Debian 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/README.Debian 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -Notes on how this package is tested. -──────────────────────────────────── - -The maintainer relies on the package's regression tests that are executed at -build time on his computer. - -In the future, build logs for this package will be published in the Debian -website so that you can inspect the results… - - -- Charles Plessy Wed, 30 Jun 2010 13:02:47 +0900 diff -Nru libgo-perl-0.13/debian/README.test libgo-perl-0.15/debian/README.test --- libgo-perl-0.13/debian/README.test 1970-01-01 00:00:00.000000000 +0000 +++ libgo-perl-0.15/debian/README.test 2014-02-01 11:08:00.000000000 +0000 @@ -0,0 +1,10 @@ +Notes on how this package is tested. +──────────────────────────────────── + +The maintainer relies on the package's regression tests that are executed at +build time on his computer. + +In the future, build logs for this package will be published in the Debian +website so that you can inspect the results… + + -- Charles Plessy Wed, 30 Jun 2010 13:02:47 +0900 diff -Nru libgo-perl-0.13/debian/changelog libgo-perl-0.15/debian/changelog --- libgo-perl-0.13/debian/changelog 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/changelog 2014-02-01 11:59:08.000000000 +0000 @@ -1,3 +1,22 @@ +libgo-perl (0.15-1) unstable; urgency=medium + + * New upstream release. + * Syntax corrections in machine-readable debian/copyright file. + * Normalised debian/control with the command “cme fix”. + * Using Debhelper 9. + * Renamed README.Debian README.test as it is all about tests. + * Patch through the “3.0 (quilt)” source format. + * Remove dependances on “perl-modules”, not needed anymore. + * Normalised VCS URLs. + * Conforms with Policy 3.9.5. + * Build-depend on BioPerl, xsltproc and XML::Parser::PerlSAX + for the regression tests. + + [Olivier Sallou] + * d/patches/fix_test_xsltproc: fix xsl files to be spec compliant + + -- Charles Plessy Fri, 31 Jan 2014 21:58:06 +0900 + libgo-perl (0.13-3) unstable; urgency=low * Clean upload without the t/data/* that got into 0.13-2 by mistake. diff -Nru libgo-perl-0.13/debian/compat libgo-perl-0.15/debian/compat --- libgo-perl-0.13/debian/compat 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/compat 2014-02-01 11:08:00.000000000 +0000 @@ -1 +1 @@ -7 +9 diff -Nru libgo-perl-0.13/debian/control libgo-perl-0.15/debian/control --- libgo-perl-0.13/debian/control 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/control 2014-02-01 11:59:14.000000000 +0000 @@ -1,21 +1,33 @@ Source: libgo-perl +Maintainer: Debian Med Packaging Team +Uploaders: Charles Plessy , + Laszlo Kajan Section: perl Priority: optional -Build-Depends: debhelper (>= 7.3), quilt (>= 0.46-7~) -Build-Depends-Indep: perl, perl-modules, libdata-stag-perl -Maintainer: Debian Med Packaging Team -DM-Upload-Allowed: yes -Uploaders: Charles Plessy , Laszlo Kajan -Standards-Version: 3.9.1 -Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/libgo-perl/trunk/ -Vcs-Svn: svn://svn.debian.org/debian-med/trunk/packages/libgo-perl/trunk/ +Build-Depends: debhelper (>= 9) +Build-Depends-Indep: perl, + libdata-stag-perl, + libxml-libxslt-perl, + libxml-writer-perl, + libxml-checker-perl, + xsltproc, + libbio-perl-perl +Standards-Version: 3.9.5 +Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/libgo-perl/trunk/ +Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/libgo-perl/trunk/ Homepage: http://geneontology.sourceforge.net/ Package: libgo-perl Architecture: all -Depends: ${perl:Depends}, ${misc:Depends}, perl-modules, - libdata-stag-perl, libgraphviz-perl -Recommends: libxml-libxml-perl, libxml-libxslt-perl, libxml-writer-perl, libxml-checker-perl, xsltproc +Depends: ${perl:Depends}, + ${misc:Depends}, + libdata-stag-perl, + libgraphviz-perl +Recommends: libxml-libxml-perl, + libxml-libxslt-perl, + libxml-writer-perl, + libxml-checker-perl, + xsltproc Suggests: bioperl Description: perl modules for GO and other OBO ontologies This is a collection of perl code for dealing with Gene Ontologies (GO) and diff -Nru libgo-perl-0.13/debian/copyright libgo-perl-0.15/debian/copyright --- libgo-perl-0.13/debian/copyright 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/copyright 2014-02-01 11:08:00.000000000 +0000 @@ -1,5 +1,5 @@ -Format: http://dep.debian.net/deps/dep5/ -Source: http://search.cpan.org/CPAN/authors/id/C/CM/CMUNGALL/ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Source: http://search.cpan.org/CPAN/authors/id/C/CM/CMUNGALL/go-perl-0.15.tar.gz Files: * Copyright: 2000-2010 Chris Mungall @@ -16,6 +16,6 @@ . b) the "Artistic License" which comes with Perl. Comment: On Debian systems, the complete text of the latest version of the GNU -General Public License version 1 can be found in -‘/usr/share/common-licenses/GPL-1’, while the complete text of the Artistic -License can be found in ‘/usr/share/common-licenses/Artistic’. + General Public License version 1 can be found in + ‘/usr/share/common-licenses/GPL-1’, while the complete text of the Artistic + License can be found in ‘/usr/share/common-licenses/Artistic’. diff -Nru libgo-perl-0.13/debian/patches/fix_test_xsltproc libgo-perl-0.15/debian/patches/fix_test_xsltproc --- libgo-perl-0.13/debian/patches/fix_test_xsltproc 1970-01-01 00:00:00.000000000 +0000 +++ libgo-perl-0.15/debian/patches/fix_test_xsltproc 2014-02-01 11:49:35.000000000 +0000 @@ -0,0 +1,27 @@ +Subject: xsl is not compliant +Description: XSLT spec does not allow a name with "xmlns", + so apply a patch to use a different name +Author: Olivier Sallou +Last-Updated: 2014-02-01 +Forwarded: yes +Bug: https://rt.cpan.org/Public/Bug/Display.html?id=92636 +--- a/GO/xsl/oboxml_to_owl.xsl ++++ b/GO/xsl/oboxml_to_owl.xsl +@@ -815,7 +815,7 @@ + + + +- ++ + + + +@@ -860,7 +860,7 @@ + + + +- ++ + + + diff -Nru libgo-perl-0.13/debian/patches/series libgo-perl-0.15/debian/patches/series --- libgo-perl-0.13/debian/patches/series 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/patches/series 2014-02-01 11:28:58.000000000 +0000 @@ -1,3 +1,4 @@ fix-whatis-entries.patch fix-missing-manpages fix-pod.patch +fix_test_xsltproc diff -Nru libgo-perl-0.13/debian/rules libgo-perl-0.15/debian/rules --- libgo-perl-0.13/debian/rules 2014-02-01 17:25:15.000000000 +0000 +++ libgo-perl-0.15/debian/rules 2014-02-01 11:08:00.000000000 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/make -f %: - dh --with quilt $@ + dh $@ override_dh_install: dh_install diff -Nru libgo-perl-0.13/debian/source/format libgo-perl-0.15/debian/source/format --- libgo-perl-0.13/debian/source/format 1970-01-01 00:00:00.000000000 +0000 +++ libgo-perl-0.15/debian/source/format 2014-02-01 17:25:16.140401383 +0000 @@ -0,0 +1 @@ +3.0 (quilt) diff -Nru libgo-perl-0.13/go-perl.pod libgo-perl-0.15/go-perl.pod --- libgo-perl-0.13/go-perl.pod 2010-08-10 17:07:16.000000000 +0000 +++ libgo-perl-0.15/go-perl.pod 2013-06-12 16:18:27.000000000 +0000 @@ -298,5 +298,5 @@ =cut package go-perl; -$VERSION='0.13'; +$VERSION='0.15'; 1;