@article {pmid37829294, year = {2023}, author = {Meyer, R and Appeltans, W and Duncan, WD and Dimitrova, M and Gan, YM and Stjernegaard Jeppesen, T and Mungall, C and Paul, DL and Provoost, P and Robertson, T and Schriml, L and Suominen, S and Walls, R and Sweetlove, M and Ung, V and Van de Putte, A and Wallis, E and Wieczorek, J and Buttigieg, PL}, title = {Aligning Standards Communities for Omics Biodiversity Data: Sustainable Darwin Core-MIxS Interoperability.}, journal = {Biodiversity data journal}, volume = {11}, number = {}, pages = {e112420}, pmid = {37829294}, issn = {1314-2828}, abstract = {The standardization of data, encompassing both primary and contextual information (metadata), plays a pivotal role in facilitating data (re-)use, integration, and knowledge generation. However, the biodiversity and omics communities, converging on omics biodiversity data, have historically developed and adopted their own distinct standards, hindering effective (meta)data integration and collaboration. In response to this challenge, the Task Group (TG) for Sustainable DwC-MIxS Interoperability was established. Convening experts from the Biodiversity Information Standards (TDWG) and the Genomic Standards Consortium (GSC) alongside external stakeholders, the TG aimed to promote sustainable interoperability between the Minimum Information about any (x) Sequence (MIxS) and Darwin Core (DwC) specifications. To achieve this goal, the TG utilized the Simple Standard for Sharing Ontology Mappings (SSSOM) to create a comprehensive mapping of DwC keys to MIxS keys. This mapping, combined with the development of the MIxS-DwC extension, enables the incorporation of MIxS core terms into DwC-compliant metadata records, facilitating seamless data exchange between MIxS and DwC user communities. Through the implementation of this translation layer, data produced in either MIxS- or DwC-compliant formats can now be efficiently brokered, breaking down silos and fostering closer collaboration between the biodiversity and omics communities. To ensure its sustainability and lasting impact, TDWG and GSC have both signed a Memorandum of Understanding (MoU) on creating a continuous model to synchronize their standards. These achievements mark a significant step forward in enhancing data sharing and utilization across domains, thereby unlocking new opportunities for scientific discovery and advancement.}, } @article {pmid30612540, year = {2019}, author = {Singh, I and Kuscuoglu, M and Harkins, DM and Sutton, G and Fouts, DE and Nelson, KE}, title = {OMeta: an ontology-based, data-driven metadata tracking system.}, journal = {BMC bioinformatics}, volume = {20}, number = {1}, pages = {8}, pmid = {30612540}, issn = {1471-2105}, support = {HHSN272200900007C/AI/NIAID NIH HHS/United States ; U19 AI110819/AI/NIAID NIH HHS/United States ; HHSN272200900007C,U19AI110819//National Institute of Allergy and Infectious Diseases/ ; }, mesh = {*Biological Ontologies ; Databases, Factual ; *Metadata ; Metagenomics ; Phylogeny ; *Software ; User-Computer Interface ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: The development of high-throughput sequencing and analysis has accelerated multi-omics studies of thousands of microbial species, metagenomes, and infectious disease pathogens. Omics studies are enabling genotype-phenotype association studies which identify genetic determinants of pathogen virulence and drug resistance, as well as phylogenetic studies designed to track the origin and spread of disease outbreaks. These omics studies are complex and often employ multiple assay technologies including genomics, metagenomics, transcriptomics, proteomics, and metabolomics. To maximize the impact of omics studies, it is essential that data be accompanied by detailed contextual metadata (e.g., specimen, spatial-temporal, phenotypic characteristics) in clear, organized, and consistent formats. Over the years, many metadata standards developed by various metadata standards initiatives have arisen; the Genomic Standards Consortium's minimal information standards (MIxS), the GSCID/BRC Project and Sample Application Standard. Some tools exist for tracking metadata, but they do not provide event based capabilities to configure, collect, validate, and distribute metadata. To address this gap in the scientific community, an event based data-driven application, OMeta, was created that allows users to quickly configure, collect, validate, distribute, and integrate metadata.

RESULTS: A data-driven web application, OMeta, has been developed for use by researchers consisting of a browser-based interface, a command-line interface (CLI), and server-side components that provide an intuitive platform for configuring, capturing, viewing, and sharing metadata. Project and sample metadata can be set based on existing standards or based on projects goals. Recorded information includes details on the biological samples, procedures, protocols, and experimental technologies, etc. This information can be organized based on events, including sample collection, sample quantification, sequencing assay, and analysis results. OMeta enables configuration in various presentation types: checkbox, file, drop-box, ontology, and fields can be configured to use the National Center for Biomedical Ontology (NCBO), a biomedical ontology server. Furthermore, OMeta maintains a complete audit trail of all changes made by users and allows metadata export in comma separated value (CSV) format for convenient deposition of data into public databases.

CONCLUSIONS: We present, OMeta, a web-based software application that is built on data-driven principles for configuring and customizing data standards, capturing, curating, and sharing metadata.}, } @article {pmid30357420, year = {2019}, author = {Mukherjee, S and Stamatis, D and Bertsch, J and Ovchinnikova, G and Katta, HY and Mojica, A and Chen, IA and Kyrpides, NC and Reddy, T}, title = {Genomes OnLine database (GOLD) v.7: updates and new features.}, journal = {Nucleic acids research}, volume = {47}, number = {D1}, pages = {D649-D659}, pmid = {30357420}, issn = {1362-4962}, mesh = {Databases, Genetic/*standards ; Gene Ontology ; Genomics/*methods ; Software/*standards ; }, abstract = {The Genomes Online Database (GOLD) (https://gold.jgi.doe.gov) is an open online resource, which maintains an up-to-date catalog of genome and metagenome projects in the context of a comprehensive list of associated metadata. Information in GOLD is organized into four levels: Study, Biosample/Organism, Sequencing Project and Analysis Project. Currently GOLD hosts information on 33 415 Studies, 49 826 Biosamples, 313 324 Organisms, 215 881 Sequencing Projects and 174 454 Analysis Projects with a total of 541 metadata fields, of which 80 are based on controlled vocabulary (CV) terms. GOLD provides a user-friendly web interface to browse sequencing projects and launch advanced search tools across four classification levels. Users submit metadata on a wide range of Sequencing and Analysis Projects in GOLD before depositing sequence data to the Integrated Microbial Genomes (IMG) system for analysis. GOLD conforms with and supports the rules set by the Genomic Standards Consortium (GSC) Minimum Information standards. The current version of GOLD (v.7) has seen the number of projects and associated metadata increase exponentially over the years. This paper provides an update on the current status of GOLD and highlights the new features added over the last two years.}, } @article {pmid28787424, year = {2017}, author = {Bowers, RM and Kyrpides, NC and Stepanauskas, R and Harmon-Smith, M and Doud, D and Reddy, TBK and Schulz, F and Jarett, J and Rivers, AR and Eloe-Fadrosh, EA and Tringe, SG and Ivanova, NN and Copeland, A and Clum, A and Becraft, ED and Malmstrom, RR and Birren, B and Podar, M and Bork, P and Weinstock, GM and Garrity, GM and Dodsworth, JA and Yooseph, S and Sutton, G and Glöckner, FO and Gilbert, JA and Nelson, WC and Hallam, SJ and Jungbluth, SP and Ettema, TJG and Tighe, S and Konstantinidis, KT and Liu, WT and Baker, BJ and Rattei, T and Eisen, JA and Hedlund, B and McMahon, KD and Fierer, N and Knight, R and Finn, R and Cochrane, G and Karsch-Mizrachi, I and Tyson, GW and Rinke, C and , and Lapidus, A and Meyer, F and Yilmaz, P and Parks, DH and Eren, AM and Schriml, L and Banfield, JF and Hugenholtz, P and Woyke, T}, title = {Minimum information about a single amplified genome (MISAG) and a metagenome-assembled genome (MIMAG) of bacteria and archaea.}, journal = {Nature biotechnology}, volume = {35}, number = {8}, pages = {725-731}, pmid = {28787424}, issn = {1546-1696}, support = {310039/ERC_/European Research Council/International ; R01 AI123037/AI/NIAID NIH HHS/United States ; R01 DE024463/DE/NIDCR NIH HHS/United States ; R01 HG004857/HG/NHGRI NIH HHS/United States ; }, mesh = {Genome, Archaeal/*genetics ; Genome, Bacterial/genetics ; Genomics/*methods/standards ; Metagenomics/*methods/standards ; Sequence Analysis, DNA ; }, abstract = {We present two standards developed by the Genomic Standards Consortium (GSC) for reporting bacterial and archaeal genome sequences. Both are extensions of the Minimum Information about Any (x) Sequence (MIxS). The standards are the Minimum Information about a Single Amplified Genome (MISAG) and the Minimum Information about a Metagenome-Assembled Genome (MIMAG), including, but not limited to, assembly quality, and estimates of genome completeness and contamination. These standards can be used in combination with other GSC checklists, including the Minimum Information about a Genome Sequence (MIGS), Minimum Information about a Metagenomic Sequence (MIMS), and Minimum Information about a Marker Gene Sequence (MIMARKS). Community-wide adoption of MISAG and MIMAG will facilitate more robust comparative genomic analyses of bacterial and archaeal diversity.}, } @article {pmid27794040, year = {2017}, author = {Mukherjee, S and Stamatis, D and Bertsch, J and Ovchinnikova, G and Verezemska, O and Isbandi, M and Thomas, AD and Ali, R and Sharma, K and Kyrpides, NC and Reddy, TB}, title = {Genomes OnLine Database (GOLD) v.6: data updates and feature enhancements.}, journal = {Nucleic acids research}, volume = {45}, number = {D1}, pages = {D446-D456}, pmid = {27794040}, issn = {1362-4962}, mesh = {Computational Biology/*methods ; Data Mining ; *Databases, Nucleic Acid ; *Genome ; Genomics/*methods ; Metagenome ; Metagenomics/methods ; Software ; User-Computer Interface ; }, abstract = {The Genomes Online Database (GOLD) (https://gold.jgi.doe.gov) is a manually curated data management system that catalogs sequencing projects with associated metadata from around the world. In the current version of GOLD (v.6), all projects are organized based on a four level classification system in the form of a Study, Organism (for isolates) or Biosample (for environmental samples), Sequencing Project and Analysis Project. Currently, GOLD provides information for 26 117 Studies, 239 100 Organisms, 15 887 Biosamples, 97 212 Sequencing Projects and 78 579 Analysis Projects. These are integrated with over 312 metadata fields from which 58 are controlled vocabularies with 2067 terms. The web interface facilitates submission of a diverse range of Sequencing Projects (such as isolate genome, single-cell genome, metagenome, metatranscriptome) and complex Analysis Projects (such as genome from metagenome, or combined assembly from multiple Sequencing Projects). GOLD provides a seamless interface with the Integrated Microbial Genomes (IMG) system and supports and promotes the Genomic Standards Consortium (GSC) Minimum Information standards. This paper describes the data updates and additional features added during the last two years.}, } @article {pmid27668169, year = {2016}, author = {Endrullat, C and Glökler, J and Franke, P and Frohme, M}, title = {Standardization and quality management in next-generation sequencing.}, journal = {Applied & translational genomics}, volume = {10}, number = {}, pages = {2-9}, pmid = {27668169}, issn = {2212-0661}, abstract = {DNA sequencing continues to evolve quickly even after > 30 years. Many new platforms suddenly appeared and former established systems have vanished in almost the same manner. Since establishment of next-generation sequencing devices, this progress gains momentum due to the continually growing demand for higher throughput, lower costs and better quality of data. In consequence of this rapid development, standardized procedures and data formats as well as comprehensive quality management considerations are still scarce. Here, we listed and summarized current standardization efforts and quality management initiatives from companies, organizations and societies in form of published studies and ongoing projects. These comprise on the one hand quality documentation issues like technical notes, accreditation checklists and guidelines for validation of sequencing workflows. On the other hand, general standard proposals and quality metrics are developed and applied to the sequencing workflow steps with the main focus on upstream processes. Finally, certain standard developments for downstream pipeline data handling, processing and storage are discussed in brief. These standardization approaches represent a first basis for continuing work in order to prospectively implement next-generation sequencing in important areas such as clinical diagnostics, where reliable results and fast processing is crucial. Additionally, these efforts will exert a decisive influence on traceability and reproducibility of sequence data.}, } @article {pmid25348402, year = {2015}, author = {Reddy, TB and Thomas, AD and Stamatis, D and Bertsch, J and Isbandi, M and Jansson, J and Mallajosyula, J and Pagani, I and Lobos, EA and Kyrpides, NC}, title = {The Genomes OnLine Database (GOLD) v.5: a metadata management system based on a four level (meta)genome project classification.}, journal = {Nucleic acids research}, volume = {43}, number = {Database issue}, pages = {D1099-106}, pmid = {25348402}, issn = {1362-4962}, mesh = {*Databases, Nucleic Acid ; *Genomics ; Internet ; *Metagenomics ; }, abstract = {The Genomes OnLine Database (GOLD; http://www.genomesonline.org) is a comprehensive online resource to catalog and monitor genetic studies worldwide. GOLD provides up-to-date status on complete and ongoing sequencing projects along with a broad array of curated metadata. Here we report version 5 (v.5) of the database. The newly designed database schema and web user interface supports several new features including the implementation of a four level (meta)genome project classification system and a simplified intuitive web interface to access reports and launch search tools. The database currently hosts information for about 19,200 studies, 56,000 Biosamples, 56,000 sequencing projects and 39,400 analysis projects. More than just a catalog of worldwide genome projects, GOLD is a manually curated, quality-controlled metadata warehouse. The problems encountered in integrating disparate and varying quality data into GOLD are briefly highlighted. GOLD fully supports and follows the Genomic Standards Consortium (GSC) Minimum Information standards.}, } @article {pmid25197446, year = {2014}, author = {Field, D and Sterk, P and Kottmann, R and De Smet, JW and Amaral-Zettler, L and Cochrane, G and Cole, JR and Davies, N and Dawyndt, P and Garrity, GM and Gilbert, JA and Glöckner, FO and Hirschman, L and Klenk, HP and Knight, R and Kyrpides, N and Meyer, F and Karsch-Mizrachi, I and Morrison, N and Robbins, R and San Gil, I and Sansone, S and Schriml, L and Tatusova, T and Ussery, D and Yilmaz, P and White, O and Wooley, J and Caporaso, G}, title = {Genomic standards consortium projects.}, journal = {Standards in genomic sciences}, volume = {9}, number = {3}, pages = {599-601}, pmid = {25197446}, issn = {1944-3277}, abstract = {The Genomic Standards Consortium (GSC) is an open-membership community that was founded in 2005 to work towards the development, implementation and harmonization of standards in the field of genomics. Starting with the defined task of establishing a minimal set of descriptions the GSC has evolved into an active standards-setting body that currently has 18 ongoing projects, with additional projects regularly proposed from within and outside the GSC. Here we describe our recently enacted policy for proposing new activities that are intended to be taken on by the GSC, along with the template for proposing such new activities.}, } @article {pmid23451295, year = {2012}, author = {Tuama, EÓ and Deck, J and Dröge, G and Döring, M and Field, D and Kottmann, R and Ma, J and Mori, H and Morrison, N and Sterk, P and Sugawara, H and Wieczorek, J and Wu, L and Yilmaz, P}, title = {Meeting Report: Hackathon-Workshop on Darwin Core and MIxS Standards Alignment (February 2012).}, journal = {Standards in genomic sciences}, volume = {7}, number = {1}, pages = {166-170}, pmid = {23451295}, issn = {1944-3277}, abstract = {The Global Biodiversity Information Facility and the Genomic Standards Consortium convened a joint workshop at the University of Oxford, 27-29 February 2012, with a small group of experts from Europe, USA, China and Japan, to continue the alignment of the Darwin Core with the MIxS and related genomics standards. Several reference mappings were produced as well as test expressions of MIxS in RDF. The use and management of controlled vocabulary terms was considered in relation to both GBIF and the GSC, and tools for working with terms were reviewed. Extensions for publishing genomic biodiversity data to the GBIF network via a Darwin Core Archive were prototyped and work begun on preparing translations of the Darwin Core to Japanese and Chinese. Five genomic repositories were identified for engagement to begin the process of testing the publishing of genomic data to the GBIF network commencing with the SILVA rRNA database.}, } @article {pmid23451294, year = {2012}, author = {Robbins, RJ and Amaral-Zettler, L and Bik, H and Blum, S and Edwards, J and Field, D and Garrity, G and Gilbert, JA and Kottmann, R and Krishtalka, L and Lapp, H and Lawrence, C and Morrison, N and Tuama, EÓ and Parr, C and San Gil, I and Schindel, D and Schriml, L and Vieglas, D and Wooley, J}, title = {RCN4GSC Workshop Report: Managing Data at the Interface of Biodiversity and (Meta)Genomics, March 2011.}, journal = {Standards in genomic sciences}, volume = {7}, number = {1}, pages = {159-165}, pmid = {23451294}, issn = {1944-3277}, abstract = {Building on the planning efforts of the RCN4GSC project, a workshop was convened in San Diego to bring together experts from genomics and metagenomics, biodiversity, ecology, and bioinformatics with the charge to identify potential for positive interactions and progress, especially building on successes at establishing data standards by the GSC and by the biodiversity and ecological communities. Until recently, the contribution of microbial life to the biomass and biodiversity of the biosphere was largely overlooked (because it was resistant to systematic study). Now, emerging genomic and metagenomic tools are making investigation possible. Initial research findings suggest that major advances are in the offing. Although different research communities share some overlapping concepts and traditions, they differ significantly in sampling approaches, vocabularies and workflows. Likewise, their definitions of 'fitness for use' for data differ significantly, as this concept stems from the specific research questions of most importance in the different fields. Nevertheless, there is little doubt that there is much to be gained from greater coordination and integration. As a first step toward interoperability of the information systems used by the different communities, participants agreed to conduct a case study on two of the leading data standards from the two formerly disparate fields: (a) GSC's standard checklists for genomics and metagenomics and (b) TDWG's Darwin Core standard, used primarily in taxonomy and systematic biology.}, } @article {pmid23451293, year = {2012}, author = {Robbins, RJ and Cochrane, G and Davies, N and Dawyndt, P and Kottmann, R and Krishtalka, LK and Morrison, N and Tuama, EÓ and San Gil, I and Wooley, J}, title = {RCN4GSC Workshop Report: Modeling a Testbed for Managing Data at the Interface of Biodiversity and (Meta)Genomics, April 2011.}, journal = {Standards in genomic sciences}, volume = {7}, number = {1}, pages = {153-158}, pmid = {23451293}, issn = {1944-3277}, abstract = {At the GSC11 meeting (4-6 April 2011, Hinxton, England, the GSC's genomic biodiversity working group (GBWG) developed an initial model for a data management testbed at the interface of biodiversity with genomics and metagenomics. With representatives of the Global Biodiversity Information Facility (GBIF) participating, it was agreed that the most useful course of action would be for GBIF to collaborate with the GSC in its ongoing GBWG workshops to achieve common goals around interoperability/data integration across (meta)-genomic and species level data. It was determined that a quick comparison should be made of the contents of the Darwin Core (DwC) and the GSC data checklists, with a goal of determining their degree of overlap and compatibility. An ad-hoc task group lead by Renzo Kottman and Peter Dawyndt undertook an initial comparison between the Darwin Core (DwC) standard used by the Global Biodiversity Information Facility (GBIF) and the MIxS checklists put forward by the Genomic Standards Consortium (GSC). A term-by-term comparison showed that DwC and GSC concepts complement each other far more than they compete with each other. Because the preliminary analysis done at this meeting was based on expertise with GSC standards, but not with DwC standards, the group recommended that a joint meeting of DwC and GSC experts be convened as soon as possible to continue this joint assessment and to propose additional work going forward.}, } @article {pmid23409219, year = {2012}, author = {Robbins, RJ and Beach, J and Blum, S and Dawyndt, P and Deck, J and Kottmann, R and Morrison, N and Tuama, EÓ and San Gil, I and Vieglas, D and Wieczorek, J and Wooley, J}, title = {RCN4GSC Meeting Report: Initiating a Testbed for Managing Data at the Interface of Biodiversity and Genomics/Metagenomics, May 2011.}, journal = {Standards in genomic sciences}, volume = {7}, number = {1}, pages = {171-174}, pmid = {23409219}, issn = {1944-3277}, abstract = {Following up on efforts from two earlier workshops, a meeting was convened in San Diego to (a) establish working connections between experts in the use of the Darwin Core and the GSC MIxS standards, (b) conduct mutual briefings to promote knowledge exchange and to increase the understanding of the two communities' approaches, constraints, community goals, subtleties, etc., (c) perform an element-by-element comparison of the two standards, assessing the compatibility and complementarity of the two approaches, (d) propose and consider possible use cases and test beds in which a joint annotation approach might be tried, to useful scientific effect, and (e) propose additional action items necessary to continue the development of this joint effort. Several focused working teams were identified to continue the work after the meeting ended.}, } @article {pmid22675605, year = {2012}, author = {Gilbert, JA and Catlett, C and Desai, N and Knight, R and White, O and Robbins, R and Sankaran, R and Sansone, SA and Field, D and Meyer, F}, title = {Conceptualizing a Genomics Software Institute (GSI).}, journal = {Standards in genomic sciences}, volume = {6}, number = {1}, pages = {136-144}, pmid = {22675605}, issn = {1944-3277}, abstract = {Microbial ecology has been enhanced greatly by the ongoing 'omics revolution, bringing half the world's biomass and most of its biodiversity into analytical view for the first time; indeed, it feels almost like the invention of the microscope and the discovery of the new world at the same time. With major microbial ecology research efforts accumulating prodigious quantities of sequence, protein, and metabolite data, we are now poised to address environmental microbial research at macro scales, and to begin to characterize and understand the dimensions of microbial biodiversity on the planet. What is currently impeding progress is the need for a framework within which the research community can develop, exchange and discuss predictive ecosystem models that describe the biodiversity and functional interactions. Such a framework must encompass data and metadata transparency and interoperation; data and results validation, curation, and search; application programming interfaces for modeling and analysis tools; and human and technical processes and services necessary to ensure broad adoption. Here we discuss the need for focused community interaction to augment and deepen established community efforts, beginning with the Genomic Standards Consortium (GSC), to create a science-driven strategic plan for a Genomic Software Institute (GSI).}, } @article {pmid21935468, year = {2011}, author = {Hankeln, W and Wendel, NJ and Gerken, J and Waldmann, J and Buttigieg, PL and Kostadinov, I and Kottmann, R and Yilmaz, P and Glöckner, FO}, title = {CDinFusion--submission-ready, on-line integration of sequence and contextual data.}, journal = {PloS one}, volume = {6}, number = {9}, pages = {e24797}, pmid = {21935468}, issn = {1932-6203}, mesh = {Computational Biology/*methods ; Databases, Genetic ; Genomics ; *Software ; }, abstract = {State of the art (DNA) sequencing methods applied in "Omics" studies grant insight into the 'blueprints' of organisms from all domains of life. Sequencing is carried out around the globe and the data is submitted to the public repositories of the International Nucleotide Sequence Database Collaboration. However, the context in which these studies are conducted often gets lost, because experimental data, as well as information about the environment are rarely submitted along with the sequence data. If these contextual or metadata are missing, key opportunities of comparison and analysis across studies and habitats are hampered or even impossible. To address this problem, the Genomic Standards Consortium (GSC) promotes checklists and standards to better describe our sequence data collection and to promote the capturing, exchange and integration of sequence data with contextual data. In a recent community effort the GSC has developed a series of recommendations for contextual data that should be submitted along with sequence data. To support the scientific community to significantly enhance the quality and quantity of contextual data in the public sequence data repositories, specialized software tools are needed. In this work we present CDinFusion, a web-based tool to integrate contextual and sequence data in (Multi)FASTA format prior to submission. The tool is open source and available under the Lesser GNU Public License 3. A public installation is hosted and maintained at the Max Planck Institute for Marine Microbiology at http://www.megx.net/cdinfusion. The tool may also be installed locally using the open source code available at http://code.google.com/p/cdinfusion.}, } @article {pmid21713030, year = {2011}, author = {Field, D and Amaral-Zettler, L and Cochrane, G and Cole, JR and Dawyndt, P and Garrity, GM and Gilbert, J and Glöckner, FO and Hirschman, L and Karsch-Mizrachi, I and Klenk, HP and Knight, R and Kottmann, R and Kyrpides, N and Meyer, F and San Gil, I and Sansone, SA and Schriml, LM and Sterk, P and Tatusova, T and Ussery, DW and White, O and Wooley, J}, title = {The Genomic Standards Consortium.}, journal = {PLoS biology}, volume = {9}, number = {6}, pages = {e1001088}, pmid = {21713030}, issn = {1545-7885}, mesh = {*Databases, Genetic ; Genomics/*standards ; *International Cooperation ; Metagenome ; }, abstract = {A vast and rich body of information has grown up as a result of the world's enthusiasm for 'omics technologies. Finding ways to describe and make available this information that maximise its usefulness has become a major effort across the 'omics world. At the heart of this effort is the Genomic Standards Consortium (GSC), an open-membership organization that drives community-based standardization activities, Here we provide a short history of the GSC, provide an overview of its range of current activities, and make a call for the scientific community to join forces to improve the quality and quantity of contextual information about our public collections of genomes, metagenomes, and marker gene sequences.}, } @article {pmid21677865, year = {2011}, author = {Morrison, N and Hancock, D and Hirschman, L and Dawyndt, P and Verslyppe, B and Kyrpides, N and Kottmann, R and Yilmaz, P and Glöckner, FO and Grethe, J and Booth, T and Sterk, P and Nenadic, G and Field, D}, title = {Data shopping in an open marketplace: Introducing the Ontogrator web application for marking up data using ontologies and browsing using facets.}, journal = {Standards in genomic sciences}, volume = {4}, number = {2}, pages = {286-292}, pmid = {21677865}, issn = {1944-3277}, abstract = {In the future, we hope to see an open and thriving data market in which users can find and select data from a wide range of data providers. In such an open access market, data are products that must be packaged accordingly. Increasingly, eCommerce sellers present heterogeneous product lines to buyers using faceted browsing. Using this approach we have developed the Ontogrator platform, which allows for rapid retrieval of data in a way that would be familiar to any online shopper. Using Knowledge Organization Systems (KOS), especially ontologies, Ontogrator uses text mining to mark up data and faceted browsing to help users navigate, query and retrieve data. Ontogrator offers the potential to impact scientific research in two major ways: 1) by significantly improving the retrieval of relevant information; and 2) by significantly reducing the time required to compose standard database queries and assemble information for further research. Here we present a pilot implementation developed in collaboration with the Genomic Standards Consortium (GSC) that includes content from the StrainInfo, GOLD, CAMERA, Silva and Pubmed databases. This implementation demonstrates the power of ontogration and highlights that the usefulness of this approach is fully dependent on both the quality of data and the KOS (ontologies) used. Ideally, the use and further expansion of this collaborative system will help to surface issues associated with the underlying quality of annotation and could lead to a systematic means for accessing integrated data resources.}, } @article {pmid21677864, year = {2011}, author = {Duhaime, MB and Kottmann, R and Field, D and Glöckner, FO}, title = {Enriching public descriptions of marine phages using the Genomic Standards Consortium MIGS standard.}, journal = {Standards in genomic sciences}, volume = {4}, number = {2}, pages = {271-285}, pmid = {21677864}, issn = {1944-3277}, abstract = {In any sequencing project, the possible depth of comparative analysis is determined largely by the amount and quality of the accompanying contextual data. The structure, content, and storage of this contextual data should be standardized to ensure consistent coverage of all sequenced entities and facilitate comparisons. The Genomic Standards Consortium (GSC) has developed the "Minimum Information about Genome/Metagenome Sequences (MIGS/MIMS)" checklist for the description of genomes and here we annotate all 30 publicly available marine bacteriophage sequences to the MIGS standard. These annotations build on existing International Nucleotide Sequence Database Collaboration (INSDC) records, and confirm, as expected that current submissions lack most MIGS fields. MIGS fields were manually curated from the literature and placed in XML format as specified by the Genomic Contextual Data Markup Language (GCDML). These "machine-readable" reports were then analyzed to highlight patterns describing this collection of genomes. Completed reports are provided in GCDML. This work represents one step towards the annotation of our complete collection of genome sequences and shows the utility of capturing richer metadata along with raw sequences.}, } @article {pmid21552244, year = {2011}, author = {Yilmaz, P and Kottmann, R and Field, D and Knight, R and Cole, JR and Amaral-Zettler, L and Gilbert, JA and Karsch-Mizrachi, I and Johnston, A and Cochrane, G and Vaughan, R and Hunter, C and Park, J and Morrison, N and Rocca-Serra, P and Sterk, P and Arumugam, M and Bailey, M and Baumgartner, L and Birren, BW and Blaser, MJ and Bonazzi, V and Booth, T and Bork, P and Bushman, FD and Buttigieg, PL and Chain, PS and Charlson, E and Costello, EK and Huot-Creasy, H and Dawyndt, P and DeSantis, T and Fierer, N and Fuhrman, JA and Gallery, RE and Gevers, D and Gibbs, RA and San Gil, I and Gonzalez, A and Gordon, JI and Guralnick, R and Hankeln, W and Highlander, S and Hugenholtz, P and Jansson, J and Kau, AL and Kelley, ST and Kennedy, J and Knights, D and Koren, O and Kuczynski, J and Kyrpides, N and Larsen, R and Lauber, CL and Legg, T and Ley, RE and Lozupone, CA and Ludwig, W and Lyons, D and Maguire, E and Methé, BA and Meyer, F and Muegge, B and Nakielny, S and Nelson, KE and Nemergut, D and Neufeld, JD and Newbold, LK and Oliver, AE and Pace, NR and Palanisamy, G and Peplies, J and Petrosino, J and Proctor, L and Pruesse, E and Quast, C and Raes, J and Ratnasingham, S and Ravel, J and Relman, DA and Assunta-Sansone, S and Schloss, PD and Schriml, L and Sinha, R and Smith, MI and Sodergren, E and Spo, A and Stombaugh, J and Tiedje, JM and Ward, DV and Weinstock, GM and Wendel, D and White, O and Whiteley, A and Wilke, A and Wortman, JR and Yatsunenko, T and Glöckner, FO}, title = {Minimum information about a marker gene sequence (MIMARKS) and minimum information about any (x) sequence (MIxS) specifications.}, journal = {Nature biotechnology}, volume = {29}, number = {5}, pages = {415-420}, pmid = {21552244}, issn = {1546-1696}, support = {UH2 DK083981/DK/NIDDK NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; U54 HG003273/HG/NHGRI NIH HHS/United States ; K01 DK090285/DK/NIDDK NIH HHS/United States ; U01 HL098957/HL/NHLBI NIH HHS/United States ; R01 HG005975-02/HG/NHGRI NIH HHS/United States ; R01 HG005975/HG/NHGRI NIH HHS/United States ; P30 AI045008/AI/NIAID NIH HHS/United States ; }, mesh = {*Biomarkers ; Checklist ; Databases, Genetic ; *Environment ; Genes, rRNA ; Genetic Variation ; Humans ; Information Storage and Retrieval/standards ; Internet ; Metagenomics/*standards ; Programming Languages ; Sequence Analysis, DNA/*standards ; Software ; }, abstract = {Here we present a standard developed by the Genomic Standards Consortium (GSC) for reporting marker gene sequences--the minimum information about a marker gene sequence (MIMARKS). We also introduce a system for describing the environment from which a biological sample originates. The 'environmental packages' apply to any genome sequence of known origin and can be used in combination with MIMARKS and other GSC checklists. Finally, to establish a unified standard for describing sequence data and to provide a single point of entry for the scientific community to access and learn about GSC checklists, we present the minimum information about any (x) sequence (MIxS). Adoption of MIxS will enhance our ability to analyze natural genetic diversity documented by massive DNA sequencing efforts from myriad ecosystems in our ever-changing biosphere.}, } @article {pmid21304725, year = {2010}, author = {Gilbert, JA and Meyer, F and Knight, R and Field, D and Kyrpides, N and Yilmaz, P and Wooley, J}, title = {Meeting report: GSC M5 roundtable at the 13th International Society for Microbial Ecology meeting in Seattle, WA, USA August 22-27, 2010.}, journal = {Standards in genomic sciences}, volume = {3}, number = {3}, pages = {235-239}, pmid = {21304725}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the Metagenomics, Metadata, Metaanalysis, Models and Metainfrastructure (M5) Roundtable at the 13th International Society for Microbial Ecology Meeting in Seattle, WA, USA August 22-27, 2010. The Genomic Standards Consortium (GSC) hosted this meeting as a community engagement exercise to describe the GSC to the microbial ecology community during this important international meeting. The roundtable included five talks given by members of the GSC, and was followed by audience participation in the form of a roundtable discussion. This report summarizes this event. Further information on the GSC and its range of activities can be found at http://www.gensc.org.}, } @article {pmid21304724, year = {2010}, author = {Field, D and Sansone, S and Delong, EF and Sterk, P and Friedberg, I and Kottmann, R and Hirschman, L and Garrity, G and Cochrane, G and Wooley, J and Meyer, F and Hunter, S and White, O}, title = {Meeting Report: Metagenomics, Metadata and MetaAnalysis (M3) at ISMB 2010.}, journal = {Standards in genomic sciences}, volume = {3}, number = {3}, pages = {232-234}, pmid = {21304724}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the first day of the Metagenomics, Metadata and MetaAnalysis (M3) workshop held at the Intelligent Systems for Molecular Biology 2010 conference. The second day, which was dedicated to the inaugural meeting of the BioSharing initiative is presented in a separate report. The Genomic Standards Consortium (GSC) hosted the first day of this Special Interest Group (SIG) at ISMB to continue exploring the bottlenecks and emerging solutions for obtaining biological insights through large-scale comparative analysis of metagenomic datasets. The M3 SIG included invited and selected talks and a panel discussion at the end of the day involving the plenary speakers. Further information about the GSC and its range of activities can be found at http://gensc.org. Information about the newly established BioSharing effort can be found at http://biosharing.org/.}, } @article {pmid21304723, year = {2010}, author = {Glass, E and Meyer, F and Gilbert, JA and Field, D and Hunter, S and Kottmann, R and Kyrpides, N and Sansone, S and Schriml, L and Sterk, P and White, O and Wooley, J}, title = {Meeting Report from the Genomic Standards Consortium (GSC) Workshop 10.}, journal = {Standards in genomic sciences}, volume = {3}, number = {3}, pages = {225-231}, pmid = {21304723}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the 10th workshop of the Genomic Standards Consortium (GSC), held at Argonne National Laboratory, IL, USA. It was the second GSC workshop to have open registration and attracted over 60 participants who worked together to progress the full range of projects ongoing within the GSC. Overall, the primary focus of the workshop was on advancing the M5 platform for next-generation collaborative computational infrastructures. Other key outcomes included the formation of a GSC working group focused on MIGS/MIMS/MIENS compliance using the ISA software suite and the formal launch of the GSC Developer Working Group. Further information about the GSC and its range of activities can be found at http://gensc.org/.}, } @article {pmid21304722, year = {2010}, author = {Davidsen, T and Madupu, R and Sterk, P and Field, D and Garrity, G and Gilbert, J and Glöckner, FO and Hirschman, L and Kolker, E and Kottmann, R and Kyrpides, N and Meyer, F and Morrison, N and Schriml, L and Tatusova, T and Wooley, J}, title = {Meeting Report from the Genomic Standards Consortium (GSC) Workshop 9.}, journal = {Standards in genomic sciences}, volume = {3}, number = {3}, pages = {216-224}, pmid = {21304722}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the 9th workshop of the Genomic Standards Consortium (GSC), held at the J. Craig Venter Institute, Rockville, MD, USA. It was the first GSC workshop to have open registration and attracted over 90 participants. This workshop featured sessions that provided overviews of the full range of ongoing GSC projects. It included sessions on Standards in Genomic Sciences, the open access journal of the GSC, building standards for genome annotation, the M5 platform for next-generation collaborative computational infrastructures, building ties with the biodiversity research community and two discussion panels with government and industry participants. Progress was made on all fronts, and major outcomes included the completion of the MIENS specification for publication and the formation of the Biodiversity working group.}, } @article {pmid21304719, year = {2010}, author = {Hirschman, L and Sterk, P and Field, D and Wooley, J and Cochrane, G and Gilbert, J and Kolker, E and Kyrpides, N and Meyer, F and Mizrachi, I and Nakamura, Y and Sansone, SA and Schriml, L and Tatusova, T and White, O and Yilmaz, P}, title = {Meeting Report: "Metagenomics, Metadata and Meta-analysis" (M3) Workshop at the Pacific Symposium on Biocomputing 2010.}, journal = {Standards in genomic sciences}, volume = {2}, number = {3}, pages = {357-360}, pmid = {21304719}, issn = {1944-3277}, abstract = {This report summarizes the M3 Workshop held at the January 2010 Pacific Symposium on Biocomputing. The workshop, organized by Genomic Standards Consortium members, included five contributed talks, a series of short presentations from stakeholders in the genomics standards community, a poster session, and, in the evening, an open discussion session to review current projects and examine future directions for the GSC and its stakeholders.}, } @article {pmid21304696, year = {2010}, author = {Kyrpides, N and Field, D and Sterk, P and Kottmann, R and Glöckner, FO and Hirschman, L and Garrity, GM and Cochrane, G and Wooley, J}, title = {Meeting Report from the Genomic Standards Consortium (GSC) Workshop 8.}, journal = {Standards in genomic sciences}, volume = {3}, number = {1}, pages = {93-96}, pmid = {21304696}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the 8th meeting of the Genomic Standards Consortium held at the Department of Energy Joint Genome Institute in Walnut Creek, CA, USA on September 9-11, 2009. This three-day workshop marked the maturing of Genomic Standards Consortium from an informal gathering of researchers interested in developing standards in the field of genomic and metagenomics to an established community with a defined governance mechanism, its own open access journal, and a family of established standards for describing genomes, metagenomes and marker studies (i.e. ribosomal RNA gene surveys). There will be increased efforts within the GSC to reach out to the wider scientific community via a range of new projects. Further information about the GSC and its activities can be found at http://gensc.org/.}, } @article {pmid21045053, year = {2011}, author = {Sun, S and Chen, J and Li, W and Altintas, I and Lin, A and Peltier, S and Stocks, K and Allen, EE and Ellisman, M and Grethe, J and Wooley, J}, title = {Community cyberinfrastructure for Advanced Microbial Ecology Research and Analysis: the CAMERA resource.}, journal = {Nucleic acids research}, volume = {39}, number = {Database issue}, pages = {D546-51}, pmid = {21045053}, issn = {1362-4962}, support = {R01 RR025030-02/RR/NCRR NIH HHS/United States ; R01 RR025030-03/RR/NCRR NIH HHS/United States ; R01 RR025030/RR/NCRR NIH HHS/United States ; R01 RR025030-01/RR/NCRR NIH HHS/United States ; R01RR025030/RR/NCRR NIH HHS/United States ; }, mesh = {*Databases, Genetic ; Environment ; *Metagenome ; Metagenomics ; Software ; }, abstract = {The Community Cyberinfrastructure for Advanced Microbial Ecology Research and Analysis (CAMERA, http://camera.calit2.net/) is a database and associated computational infrastructure that provides a single system for depositing, locating, analyzing, visualizing and sharing data about microbial biology through an advanced web-based analysis portal. CAMERA collects and links metadata relevant to environmental metagenome data sets with annotation in a semantically-aware environment allowing users to write expressive semantic queries against the database. To meet the needs of the research community, users are able to query metadata categories such as habitat, sample type, time, location and other environmental physicochemical parameters. CAMERA is compliant with the standards promulgated by the Genomic Standards Consortium (GSC), and sustains a role within the GSC in extending standards for content and format of the metagenomic data and metadata and its submission to the CAMERA repository. To ensure wide, ready access to data and annotation, CAMERA also provides data submission tools to allow researchers to share and forward data to other metagenomics sites and community data archives such as GenBank. It has multiple interfaces for easy submission of large or complex data sets, and supports pre-registration of samples for sequencing. CAMERA integrates a growing list of tools and viewers for querying, analyzing, annotating and comparing metagenome and genome data.}, } @article {pmid21304668, year = {2009}, author = {Field, D and Friedberg, I and Sterk, P and Kottmann, R and Glöckner, FO and Hirschman, L and Garrity, GM and Cochrane, G and Wooley, J and Gilbert, J}, title = {Meeting Report: "Metagenomics, Metadata and Meta-analysis" (M3) Special Interest Group at ISMB 2009.}, journal = {Standards in genomic sciences}, volume = {1}, number = {3}, pages = {278-282}, pmid = {21304668}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the "Metagenomics, Metadata and Meta-analysis" (M3) Special Interest Group (SIG) meeting held at the Intelligent Systems for Molecular Biology 2009 conference. The Genomic Standards Consortium (GSC) hosted this meeting to explore the bottlenecks and emerging solutions for obtaining biological insights through large-scale comparative analysis of metagenomic datasets. The M3 SIG included 16 talks, half of which were selected from submitted abstracts, a poster session and a panel discussion involving members of the GSC Board. This report summarizes this one-day SIG, attempts to identify shared themes and recapitulates community recommendations for the future of this field. The GSC will also host an M3 workshop at the Pacific Symposium on Biocomputing (PSB) in January 2010. Further information about the GSC and its range of activities can be found at http://gensc.org/.}, } @article {pmid21304642, year = {2009}, author = {Wooley, JC and Field, D and Glöckner, FO}, title = {Extending Standards for Genomics and Metagenomics Data: A Research Coordination Network for the Genomic Standards Consortium (RCN4GSC).}, journal = {Standards in genomic sciences}, volume = {1}, number = {1}, pages = {87-90}, pmid = {21304642}, issn = {1944-3277}, abstract = {Through a newly established Research Coordination Network for the Genomic Standards Consortium (RCN4GSC), the GSC will continue its leadership in establishing and integrating genomic standards through community-based efforts. These efforts, undertaken in the context of genomic and metagenomic research aim to ensure the electronic capture of all genomic data and to facilitate the achievement of a community consensus around collecting and managing relevant contextual information connected to the sequence data. The GSC operates as an open, inclusive organization, welcoming inspired biologists with a commitment to community service. Within the collaborative framework of the ongoing, international activities of the GSC, the RCN will expand the range of research domains engaged in these standardization efforts and sustain scientific networking to encourage active participation by the broader community. The RCN4GSC, funded for five years by the US National Science Foundation, will primarily support outcome-focused working meetings and the exchange of early-career scientists between GSC research groups in order to advance key standards contributions such as GCDML. Focusing on the timely delivery of the extant GSC core projects, the RCN will also extend the pioneering efforts of the GSC to engage researchers active in developing ecological, environmental and biodiversity data standards. As the initial goals of the GSC are increasingly achieved, promoting the comprehensive use of effective standards will be essential to ensure the effective use of sequence and associated data, to provide access for all biologists to all of the information, and to create interdisciplinary opportunities for discovery. The RCN will facilitate these implementation activities through participation in major scientific conferences and presentations on scientific advances enabled by community usage of genomic standards.}, } @article {pmid21304639, year = {2009}, author = {Field, D and Sterk, P and Kyrpides, N and Kottmann, R and Glöckner, FO and Hirschman, L and Garrity, GM and Wooley, J and Gilna, P}, title = {Meeting Report from the Genomic Standards Consortium (GSC) Workshops 6 and 7.}, journal = {Standards in genomic sciences}, volume = {1}, number = {1}, pages = {68-71}, pmid = {21304639}, issn = {1944-3277}, abstract = {This report summarizes the proceedings of the 6th and 7th workshops of the Genomic Standards Consortium (GSC), held back-to-back in 2008. GSC 6 focused on furthering the activities of GSC working groups, GSC 7 focused on outreach to the wider community. GSC 6 was held October 10-14, 2008 at the European Bioinformatics Institute, Cambridge, United Kingdom and included a two-day workshop focused on the refinement of the Genomic Contextual Data Markup Language (GCDML). GSC 7 was held as the opening day of the International Congress on Metagenomics 2008 in San Diego California. Major achievements of these combined meetings included an agreement from the International Nucleotide Sequence Database Consortium (INSDC) to create a "MIGS" keyword for capturing "Minimum Information about a Genome Sequence" compliant information within INSDC (DDBJ/EMBL /Genbank) records, launch of GCDML 1.0, MIGS compliance of the first set of "Genomic Encyclopedia of Bacteria and Archaea" project genomes, approval of a proposal to extend MIGS to 16S rRNA sequences within a "Minimum Information about an Environmental Sequence", finalization of plans for the GSC eJournal, "Standards in Genomic Sciences" (SIGS), and the formation of a GSC Board. Subsequently, the GSC has been awarded a Research Co-ordination Network (RCN4GSC) grant from the National Science Foundation, held the first SIGS workshop and launched the journal. The GSC will also be hosting outreach workshops at both ISMB 2009 and PSB 2010 focused on "Metagenomics, Metadata and MetaAnalysis" (M(3)). Further information about the GSC and its range of activities can be found at http://gensc.org, including videos of all the presentations at GSC 7.}, } @article {pmid18564916, year = {2008}, author = {Garrity, GM and Field, D and Kyrpides, N and Hirschman, L and Sansone, SA and Angiuoli, S and Cole, JR and Glöckner, FO and Kolker, E and Kowalchuk, G and Moran, MA and Ussery, D and White, O}, title = {Toward a standards-compliant genomic and metagenomic publication record.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {157-160}, doi = {10.1089/omi.2008.A2B2}, pmid = {18564916}, issn = {1536-2310}, support = {BB/E025080/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Genomics/*standards ; *Guideline Adherence ; *Publications ; }, abstract = {Increasingly, we are aware as a community of the growing need to manage the avalanche of genomic and metagenomic data, in addition to related data types like ribosomal RNA and barcode sequences, in a way that tightly integrates contextual data with traditional literature in a machine-readable way. It is for this reason that the Genomic Standards Consortium (GSC) formed in 2005. Here we suggest that we move beyond the development of standards and tackle standards compliance and improved data capture at the level of the scientific publication. We are supported in this goal by the fact that the scientific community is in the midst of a publishing revolution. This revolution is marked by a growing shift away from a traditional dichotomy between "journal articles" and "database entries" and an increasing adoption of hybrid models of collecting and disseminating scientific information. With respect to genomes and metagenomes and related data types, we feel the scientific community would be best served by the immediate launch of a central repository of short, highly structured "Genome Notes" that must be standards compliant. This could be done in the context of an existing journal, but we also suggest the more radical solution of launching a new journal. Such a journal could be designed to cater to a wide range of standards-related content types that are not currently centralized in the published literature. It could also support the demand for centralizing aspects of the "gray literature" (documents developed by institutions or communities) such as the call by the GSC for a central repository of Standard Operating Procedures describing the genomic annotation pipelines of the major sequencing centers. We argue that such an "eJournal," published under the Open Access paradigm by the GSC, could be an attractive publishing forum for a broader range of standardization initiatives within, and beyond, the GSC and thereby fill an unoccupied yet increasingly important niche within the current research landscape.}, } @article {pmid18564915, year = {2008}, author = {Field, D and Garrity, GM and Sansone, SA and Sterk, P and Gray, T and Kyrpides, N and Hirschman, L and Glöckner, FO and Kottmann, R and Angiuoli, S and White, O and Dawyndt, P and Thomson, N and Gil, IS and Morrison, N and Tatusova, T and Mizrachi, I and Vaughan, R and Cochrane, G and Kagan, L and Murphy, S and Schriml, L and , }, title = {Meeting report: the fifth Genomic Standards Consortium (GSC) workshop.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {109-113}, doi = {10.1089/omi.2008.A3B3}, pmid = {18564915}, issn = {1536-2310}, support = {BB/E025080/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Education ; *Genomics ; Reference Standards ; }, abstract = {This meeting report summarizes the proceedings of the fifth Genomic Standards Consortium (GSC) workshop held December 12-14, 2007, at the European Bioinformatics Institute (EBI), Cambridge, UK. This fifth workshop served as a milestone event in the evolution of the GSC (launched in September 2005); the key outcome of the workshop was the finalization of a stable version of the MIGS specification (v2.0) for publication. This accomplishment enables, and also in some cases necessitates, downstream activities, which are described in the multiauthor, consensus-driven articles in this special issue of OMICS produced as a direct result of the workshop. This report briefly summarizes the workshop and overviews the special issue. In particular, it aims to explain how the various GSC-led projects are working together to help this community achieve its stated mission of further standardizing the descriptions of genomes and metagenomes and implementing improved mechanisms of data exchange and integration to enable more accurate comparative analyses. Further information about the GSC and its range of activities can be found at http://gensc.org.}, } @article {pmid18564914, year = {2008}, author = {Field, D and Glöckner, FO and Garrity, GM and Gray, T and Sterk, P and Cochrane, G and Vaughan, R and Kolker, E and Kottmann, R and Kyrpides, N and Angiuoli, S and Dawyndt, P and Guralnick, R and Goldstein, P and Hall, N and Hirschman, L and Kravitz, S and Lister, AL and Markowitz, V and Thomson, N and Whetzel, T}, title = {Meeting report: the fourth Genomic Standards Consortium (GSC) workshop.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {101-108}, doi = {10.1089/omi.2008.0014}, pmid = {18564914}, issn = {1536-2310}, mesh = {*Databases, Genetic ; Education ; *Genomics ; Programming Languages ; Reference Standards ; }, abstract = {This meeting report summarizes the proceedings of the "eGenomics: Cataloguing our Complete Genome Collection IV" workshop held June 6-8, 2007, at the National Institute for Environmental eScience (NIEeS), Cambridge, United Kingdom. This fourth workshop of the Genomic Standards Consortium (GSC) was a mix of short presentations, strategy discussions, and technical sessions. Speakers provided progress reports on the development of the "Minimum Information about a Genome Sequence" (MIGS) specification and the closely integrated "Minimum Information about a Metagenome Sequence" (MIMS) specification. The key outcome of the workshop was consensus on the next version of the MIGS/MIMS specification (v1.2). This drove further definition and restructuring of the MIGS/MIMS XML schema (syntax). With respect to semantics, a term vetting group was established to ensure that terms are properly defined and submitted to the appropriate ontology projects. Perhaps the single most important outcome of the workshop was a proposal to move beyond the concept of "minimum" to create a far richer XML schema that would define a "Genomic Contextual Data Markup Language" (GCDML) suitable for wider semantic integration across databases. GCDML will contain not only curated information (e.g., compliant with MIGS/MIMS), but also be extended to include a variety of data processing and calculations. Further information about the Genomic Standards Consortium and its range of activities can be found at http://gensc.org.}, } @article {pmid18479205, year = {2008}, author = {Van Brabant, B and Gray, T and Verslyppe, B and Kyrpides, N and Dietrich, K and Glöckner, FO and Cole, J and Farris, R and Schriml, LM and De Vos, P and De Baets, B and Field, D and Dawyndt, P and , }, title = {Laying the foundation for a Genomic Rosetta Stone: creating information hubs through the use of consensus identifiers.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {123-127}, doi = {10.1089/omi.2008.0020}, pmid = {18479205}, issn = {1536-2310}, mesh = {Computational Biology ; *Databases, Genetic ; *Genomics ; }, abstract = {Given the growing wealth of downstream information, the integration of molecular and non-molecular data on a given organism has become a major challenge. For micro-organisms, this information now includes a growing collection of sequenced genes and complete genomes, and for communities of organisms it includes metagenomes. Integration of the data is facilitated by the existence of authoritative, community-recognized, consensus identifiers that may form the heart of so-called information knuckles. The Genomic Standards Consortium (GSC) is building a mapping of identifiers across a group of federated databases with the aim to improve navigation across these resources and to enable the integration of their information in the near future. In particular, this is possible because of the existence of INSDC Genome Project Identifiers (GPIDs) and accession numbers, and the ability of the community to define new consensus identifiers such as the culture identifiers used in the StrainInfo.net bioportal. Here we outline (1) the general design of the Genomic Rosetta Stone project, (2) introduce example linkages between key databases (that cover information about genomes, 16S rRNA gene sequences, and microbial biological resource centers), and (3) make an open call for participation in this project providing a vision for its future use.}, } @article {pmid18479204, year = {2008}, author = {Kottmann, R and Gray, T and Murphy, S and Kagan, L and Kravitz, S and Lombardot, T and Field, D and Glöckner, FO and , }, title = {A standard MIGS/MIMS compliant XML Schema: toward the development of the Genomic Contextual Data Markup Language (GCDML).}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {115-121}, doi = {10.1089/omi.2008.0A10}, pmid = {18479204}, issn = {1536-2310}, mesh = {*Databases, Genetic ; *Genomics ; *Programming Languages ; }, abstract = {The Genomic Contextual Data Markup Language (GCDML) is a core project of the Genomic Standards Consortium (GSC) that implements the "Minimum Information about a Genome Sequence" (MIGS) specification and its extension, the "Minimum Information about a Metagenome Sequence" (MIMS). GCDML is an XML Schema for generating MIGS/MIMS compliant reports for data entry, exchange, and storage. When mature, this sample-centric, strongly-typed schema will provide a diverse set of descriptors for describing the exact origin and processing of a biological sample, from sampling to sequencing, and subsequent analysis. Here we describe the need for such a project, outline design principles required to support the project, and make an open call for participation in defining the future content of GCDML. GCDML is freely available, and can be downloaded, along with documentation, from the GSC Web site (http://gensc.org).}, } @article {pmid18464787, year = {2008}, author = {Field, D and Garrity, G and Gray, T and Morrison, N and Selengut, J and Sterk, P and Tatusova, T and Thomson, N and Allen, MJ and Angiuoli, SV and Ashburner, M and Axelrod, N and Baldauf, S and Ballard, S and Boore, J and Cochrane, G and Cole, J and Dawyndt, P and De Vos, P and DePamphilis, C and Edwards, R and Faruque, N and Feldman, R and Gilbert, J and Gilna, P and Glöckner, FO and Goldstein, P and Guralnick, R and Haft, D and Hancock, D and Hermjakob, H and Hertz-Fowler, C and Hugenholtz, P and Joint, I and Kagan, L and Kane, M and Kennedy, J and Kowalchuk, G and Kottmann, R and Kolker, E and Kravitz, S and Kyrpides, N and Leebens-Mack, J and Lewis, SE and Li, K and Lister, AL and Lord, P and Maltsev, N and Markowitz, V and Martiny, J and Methe, B and Mizrachi, I and Moxon, R and Nelson, K and Parkhill, J and Proctor, L and White, O and Sansone, SA and Spiers, A and Stevens, R and Swift, P and Taylor, C and Tateno, Y and Tett, A and Turner, S and Ussery, D and Vaughan, B and Ward, N and Whetzel, T and San Gil, I and Wilson, G and Wipat, A}, title = {The minimum information about a genome sequence (MIGS) specification.}, journal = {Nature biotechnology}, volume = {26}, number = {5}, pages = {541-547}, pmid = {18464787}, issn = {1546-1696}, support = {BB/E025080/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; G8225539/MRC_/Medical Research Council/United Kingdom ; U54 HG004028/HG/NHGRI NIH HHS/United States ; Z99 LM999999/ImNIH/Intramural NIH HHS/United States ; }, mesh = {Chromosome Mapping/*methods/*standards ; Databases, Factual/*standards ; Information Dissemination/*methods ; Information Storage and Retrieval/*standards ; *Information Theory ; Internationality ; }, abstract = {With the quantity of genomic data increasing at an exponential rate, it is imperative that these data be captured electronically, in a standard format. Standardization activities must proceed within the auspices of open-access and international working bodies. To tackle the issues surrounding the development of better descriptions of genomic investigations, we have formed the Genomic Standards Consortium (GSC). Here, we introduce the minimum information about a genome sequence (MIGS) specification with the intent of promoting participation in its development and discussing the resources that will be required to develop improved mechanisms of metadata capture and exchange. As part of its wider goals, the GSC also supports improving the 'transparency' of the information contained in existing genomic databases.}, } @article {pmid18416669, year = {2008}, author = {Hirschman, L and Clark, C and Cohen, KB and Mardis, S and Luciano, J and Kottmann, R and Cole, J and Markowitz, V and Kyrpides, N and Morrison, N and Schriml, LM and Field, D and , }, title = {Habitat-Lite: a GSC case study based on free text terms for environmental metadata.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {129-136}, doi = {10.1089/omi.2008.0016}, pmid = {18416669}, issn = {1536-2310}, mesh = {Databases, Genetic ; *Genomics ; Reference Standards ; }, abstract = {There is an urgent need to capture metadata on the rapidly growing number of genomic, metagenomic and related sequences, such as 16S ribosomal genes. This need is a major focus within the Genomic Standards Consortium (GSC), and Habitat is a key metadata descriptor in the proposed "Minimum Information about a Genome Sequence" (MIGS) specification. The goal of the work described here is to provide a light-weight, easy-to-use (small) set of terms ("Habitat-Lite") that captures high-level information about habitat while preserving a mapping to the recently launched Environment Ontology (EnvO). Our motivation for building Habitat-Lite is to meet the needs of multiple users, such as annotators curating these data, database providers hosting the data, and biologists and bioinformaticians alike who need to search and employ such data in comparative analyses. Here, we report a case study based on semiautomated identification of terms from GenBank and GOLD. We estimate that the terms in the initial version of Habitat-Lite would provide useful labels for over 60% of the kinds of information found in the GenBank isolation_source field, and around 85% of the terms in the GOLD habitat field. We present a revised version of Habitat-Lite defined within the EnvO Environmental Ontology through a new category, EnvO-Lite-GSC. We invite the community's feedback on its further development to provide a minimum list of terms to capture high-level habitat information and to provide classification bins needed for future studies.}, } @article {pmid18407745, year = {2008}, author = {Gil, IS and Sheldon, W and Schmidt, T and Servilla, M and Aguilar, R and Gries, C and Gray, T and Field, D and Cole, J and Pan, JY and Palanisamy, G and Henshaw, D and O'Brien, M and Kinkel, L and McMahon, K and Kottmann, R and Amaral-Zettler, L and Hobbie, J and Goldstein, P and Guralnick, RP and Brunt, J and Michener, WK}, title = {Defining linkages between the GSC and NSF's LTER program: how the Ecological Metadata Language (EML) relates to GCDML and other outcomes.}, journal = {Omics : a journal of integrative biology}, volume = {12}, number = {2}, pages = {151-156}, doi = {10.1089/omi.2008.0015}, pmid = {18407745}, issn = {1536-2310}, mesh = {*Databases, Genetic ; Genome ; *Programming Languages ; }, abstract = {The Genomic Standards Consortium (GSC) invited a representative of the Long-Term Ecological Research (LTER) to its fifth workshop to present the Ecological Metadata Language (EML) metadata standard and its relationship to the Minimum Information about a Genome/Metagenome Sequence (MIGS/MIMS) and its implementation, the Genomic Contextual Data Markup Language (GCDML). The LTER is one of the top National Science Foundation (NSF) programs in biology since 1980, representing diverse ecosystems and creating long-term, interdisciplinary research, synthesis of information, and theory. The adoption of EML as the LTER network standard has been key to build network synthesis architectures based on high-quality standardized metadata. EML is the NSF-recognized metadata standard for LTER, and EML is a criteria used to review the LTER program progress. At the workshop, a potential crosswalk between the GCDML and EML was explored. Also, collaboration between the LTER and GSC developers was proposed to join efforts toward a common metadata cataloging designer's tool. The community adoption success of a metadata standard depends, among other factors, on the tools and trainings developed to use the standard. LTER's experience in embracing EML may help GSC to achieve similar success. A possible collaboration between LTER and GSC to provide training opportunities for GCDML and the associated tools is being explored. Finally, LTER is investigating EML enhancements to better accommodate genomics data, possibly integrating the GCDML schema into EML. All these action items have been accepted by the LTER contingent, and further collaboration between the GSC and LTER is expected.}, } @article {pmid17436031, year = {2007}, author = {Field, D and Kyrpides, N}, title = {The positive role of the ecological community in the genomic revolution.}, journal = {Microbial ecology}, volume = {53}, number = {3}, pages = {507-511}, pmid = {17436031}, issn = {0095-3628}, mesh = {Base Sequence ; Computational Biology/*trends ; *Ecology/methods/trends ; Environment ; Environmental Microbiology ; Genome/genetics ; *Genomics/trends ; }, abstract = {The exponential increase of genomic and metagenomic data, fueled in part by recent advancements in sequencing technology, are greatly expanding our understanding of the phylogenetic diversity and metabolic capacity present in the environment. Two of the central challenges that bioinformaticians and ecologists alike must face are the design of bioinformatic resources that facilitate the analysis of genomic and metagenomic data in a comparative context and the efficient capture and organization of the plethora of descriptive information required to usefully describe these data sets. In this commentary, we review three initiatives presented in the "new frontiers" session of the second SCOPE meeting on Microbial Environmental Genomics (MicroEnGen-II, Shanghai, June 12-15, 2006). These are (1) the Integrated Microbial Genomes Resources (IMG), (2) the Genomic Standards Consortium (GSC), and (3) the Natural Environment Research Council (NERC) Environmental Bioinformatics Centre (NEBC). These integrative bioinformatics and data management initiatives underscore the increasingly important role ecologists have to play in the genomic (metagenomic) revolution.}, } @article {pmid16901217, year = {2006}, author = {Morrison, N and Cochrane, G and Faruque, N and Tatusova, T and Tateno, Y and Hancock, D and Field, D}, title = {Concept of sample in OMICS technology.}, journal = {Omics : a journal of integrative biology}, volume = {10}, number = {2}, pages = {127-137}, doi = {10.1089/omi.2006.10.127}, pmid = {16901217}, issn = {1536-2310}, mesh = {Animals ; Databases, Nucleic Acid/*standards ; *Genome ; *Genome, Human ; Genomics/*standards ; Humans ; Oligonucleotide Array Sequence Analysis/standards ; Proteome/*genetics ; Proteomics/*standards ; }, abstract = {Fundamental biological processes can now be studied by applying the full range of OMICS technologies (genomics, transcriptomics, proteomics, metabolomics, and beyond) to the same biological sample. Clearly, it would be desirable if the concept of sample were shared among these technologies, especially as up until the time a biological sample is prepared for use in a specific OMICS assay, its description is inherently technology independent. Sharing a common informatic representation would encourage data sharing (rather than data replication), thereby reducing redundant data capture and the potential for error. This would result in a significant degree of harmonization across different OMICS data standardization activities, a task that is critical if we are to integrate data from these different data sources. Here, we review the current concept of sample in OMICS technologies as it is being dealt with by different OMICS standardization initiatives and discuss the special role that the newly formed Genomic Standards Consortium (GSC) might have to play in this domain.}, } @article {pmid16901213, year = {2006}, author = {Field, D and Morrison, N and Selengut, J and Sterk, P}, title = {Meeting report: eGenomics: Cataloguing our Complete Genome Collection II.}, journal = {Omics : a journal of integrative biology}, volume = {10}, number = {2}, pages = {100-104}, doi = {10.1089/omi.2006.10.100}, pmid = {16901213}, issn = {1536-2310}, mesh = {Animals ; Databases as Topic/*standards ; *Genome ; *Genome, Human ; Genomics/*standards ; Humans ; }, abstract = {This article summarizes the proceedings of the "eGenomics: Cataloguing our Complete Genome Collection II" workshop held November 10-11, 2005, at the European Bioinformatics Institute. This exploratory workshop, organized by members of the Genomic Standards Consortium (GSC), brought together researchers from the genomic, functional OMICS, and computational biology communities to discuss standardization activities across a range of projects. The workshop proceedings and outcomes are set to help guide the development of the GSC's Minimal Information about a Genome Sequence (MIGS) specification.}, }