{"dcterms:modified":"2023-07-19","dcterms:creator":"DR-NTU (Data)","@type":"ore:ResourceMap","@id":"https://researchdata.ntu.edu.sg/api/datasets/export?exporter=OAI_ORE&persistentId=doi:10.21979/N9/4YDZED","ore:describes":{"citation:depositor":"Zhou Xinrui","title":"Code and data for CFreeEnS","dateOfDeposit":"2019-04-03","kindOfData":".zip","subject":["Computer and Information Science","Medicine, Health and Life Sciences"],"author":{"citation:authorName":"Zhou, Xinrui","citation:authorAffiliation":"Nanyang Technological University"},"citation:dsDescription":{"citation:dsDescriptionValue":"A method called Context-Free Encoding Scheme (CFreeEnS) was proposed to encode protein sequence pairs into a numeric matrix. CFreeEnS takes advantage of rich information about the physiochemical and structural properties of amino acids. This encoding scheme keeps information about conserved properties of amino acids, which makes it possible for learning methods (e.g. random forest) to capture the cross-subtype antigenic pattern of influenza viruses. \r\nBesides, the CFreeEnS, free from dependence on carefully designed features, should be applicable to other applications in bioinformatics measuring the phenotype similarity from sequences. We have tested the method on four more datasets, namely the iAMP-2L dataset classifying antimicrobial peptides from non-antimicrobial peptides [5]; the tumor homing peptides dataset (TumorHPD); the HemoPI including hemolytic, non-hemolytic and semi-hemolytic peptides and the phage virion proteins. The predicting accuracy of 10-fold cross validation is compared with two reported methods. Results show that the CFreeEnS outperforms or at least is competitive with the traditional method using handcrafted features and a state-of-art method named m-NGSG."},"software":{"citation:softwareName":"python","citation:softwareVersion":"3.5"},"citation:datasetContact":{"citation:datasetContactName":"Zhou Xinrui","citation:datasetContactAffiliation":"Nanyang Technological University"},"publication":[{"publicationCitation":"Zhou, X., Yin, R., Zheng, J.,& Kwoh, C. K. (2019). An Encoding Scheme Capturing Generic Priors and Properties of Amino Acids Improves Protein Classification. IEEE Access, 77348-7356.","publicationIDType":"doi","publicationIDNumber":"10.1109/ACCESS.2018.2890096","publicationURL":"https://ieeexplore.ieee.org/document/8594660"},{"publicationCitation":"Zhou, X., Yin, R., Zheng, J., & Kwoh, C.-K. (2019). An encoding scheme capturing generic priors and properties of amino acids improves protein classification. IEEE Access, 7, 7348-7356.","publicationIDType":"handle","publicationIDNumber":"10356/105937","publicationURL":"https://hdl.handle.net/10356/105937"}],"citation:keyword":[{"citation:keywordValue":"encoding scheme"},{"citation:keywordValue":"protein classification"},{"citation:keywordValue":"antigenicity prediction"}],"@id":"doi:10.21979/N9/4YDZED","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"1.1","schema:name":"Code and data for CFreeEnS","schema:dateModified":"Wed Jun 19 10:34:41 SGT 2019","schema:datePublished":"2019-04-03","schema:license":"http://creativecommons.org/licenses/by-nc/4.0","dvcore:fileTermsOfAccess":{"dvcore:fileRequestAccess":false},"schema:includedInDataCatalog":"DR-NTU (Data)","ore:aggregates":[{"schema:description":"code for CFreeEnS and datasets for testing.","schema:name":"code-data.zip","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":875,"@id":"doi:10.21979/N9/4YDZED/XZJFYH","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/:persistentId?persistentId=doi:10.21979/N9/4YDZED/XZJFYH","@type":"ore:AggregatedResource","schema:fileFormat":"application/zip","dvcore:filesize":84201919,"dvcore:storageIdentifier":"file://169e1209406-9d7dd5593bed","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"c31ae8f272b49954def356f35979cd43"}}],"schema:hasPart":["doi:10.21979/N9/4YDZED/XZJFYH"]},"@context":{"author":"http://purl.org/dc/terms/creator","citation":"https://dataverse.org/schema/citation/","dateOfDeposit":"http://purl.org/dc/terms/dateSubmitted","dcterms":"http://purl.org/dc/terms/","dvcore":"https://dataverse.org/schema/core#","kindOfData":"http://rdf-vocabulary.ddialliance.org/discovery#kindOfData","ore":"http://www.openarchives.org/ore/terms/","publication":"http://purl.org/dc/terms/isReferencedBy","publicationCitation":"http://purl.org/dc/terms/bibliographicCitation","publicationIDNumber":"http://purl.org/spar/datacite/ResourceIdentifier","publicationIDType":"http://purl.org/spar/datacite/ResourceIdentifierScheme","publicationURL":"https://schema.org/distribution","schema":"http://schema.org/","software":"https://www.w3.org/TR/prov-o/#wasGeneratedBy","subject":"http://purl.org/dc/terms/subject","title":"http://purl.org/dc/terms/title"}}