{"dcterms:modified":"2023-08-11","dcterms:creator":"DR-NTU (Data)","@type":"ore:ResourceMap","@id":"https://researchdata.ntu.edu.sg/api/datasets/export?exporter=OAI_ORE&persistentId=doi:10.21979/N9/4RHC3D","ore:describes":{"citation:relatedMaterial":"GitHub page: \"merlion-ccs-2023\" in MERLIon-Challenge, GitHub, https://github.com/MERLIon-Challenge/merlion-ccs-2023, last updated 21 February 2023.","grantNumber":[{"citation:grantNumberAgency":"Nanyang Technological University","citation:grantNumberValue":"NAP Start Up M4081215.100"},{"citation:grantNumberAgency":"Nanyang Technological University","citation:grantNumberValue":"CRADLE@NTU JHU IO 90071537"},{"citation:grantNumberAgency":"National Research Foundation (NRF)","citation:grantNumberValue":"NRF2016-SOL002-011"}],"kindOfData":"Description text data","dateOfDeposit":"2023-07-20","title":"MERLIon CCS Challenge Development and Evaluation Datasets Open Preview (Documentation)","citation:depositor":"Chua, Victoria Yi Han","subject":["Engineering","Social Sciences"],"author":[{"citation:authorName":"Chua, Victoria Yi Han","citation:authorAffiliation":"Nanyang Technological University","authorIdentifierScheme":"ORCID","authorIdentifier":"0000-0002-0755-3148"},{"citation:authorName":"Styles, Suzy J","citation:authorAffiliation":"Nanyang Technological University","authorIdentifierScheme":"ORCID","authorIdentifier":"0000-0003-3517-9680"}],"citation:keyword":[{"citation:keywordValue":"Speech Processing"},{"citation:keywordValue":"Engineering Challenge"},{"citation:keywordValue":"Codeswitched Speech"},{"citation:keywordValue":"Accented Speech"},{"citation:keywordValue":"Language Identification"},{"citation:keywordValue":"Language Diarization"},{"citation:keywordValue":"Child-directed Speech"}],"citation:dsDescription":{"citation:dsDescriptionValue":"The inaugural Multilingual Everyday Recordings - Language Identification on Code-Switched Child-Directed Speech (MERLIon CCS) Challenge focuses on developing robust language identification and language diarization systems that are reliable for non-standard, accented, spontaneous code-switched, child-directed speech collected via Zoom. \n<br><br>\nThe inaugural MERLIon CCS Challenge is a special session at INTERSPEECH 2023. \n\nThis repository is a open preview containing documentation about the files that can be downloaded in the <a href =\"https://doi.org/10.21979/N9/ANXS8Z\"> development and evaluation sets for two Tasks in the 2023 MERLIon CCS Challenge. </a>\n<br><br>\nIn work arising from this corpus, please cite the dataset: <br>\nChua, Victoria Yi Han; Garcia Perera, Leibny Paola; Khudanpur, Sanjeev; Khong, Andy W. H.; Dauwels, Justin; Woon, Fei Ting; Styles, Suzy J, 2023, \"Development and Evaluation data for Multilingual Everyday Recordings - Language Identification on Code-Switched Child-Directed Speech (MERLIon CCS) Challenge\", <a href =\"https://doi.org/10.21979/N9/ANXS8Z\"> https://doi.org/10.21979/N9/ANXS8Z</a>, DR-NTU (Data), V1"},"citation:datasetContact":{"citation:datasetContactName":"Suzy J Styles","citation:datasetContactAffiliation":"Nanyang Technological University"},"publication":[{"publicationCitation":"Woon, F. T., Yogarrajah, E. C., Fong, S., Salleh, N. S. M., Sundaray, S., & Styles, S. J. (2021). Creating a corpus of multilingual parent-child speech remotely: Lessons learned in a large-scale onscreen picturebook sharing task. Frontiers in Psychology, 12, 734936.","publicationIDType":"doi","publicationIDNumber":"10.3389/fpsyg.2021.734936","publicationURL":"https://www.frontiersin.org/articles/10.3389/fpsyg.2021.734936/full"},{"publicationCitation":"Chua, Y. H. V. , Liu, H., Garcia Perera, L. P. , Woon, F. T., Wong, J., Zhang, X., Khudanpur, S., Khong,  A. W. H., Dauwels,  J. & Styles, S. J. (2023). MERLIon CCS Challenge: A English-Mandarin code-switching child-directed speech corpus for language identification and diarization. Accepted for Proc. Interspeech 2023.","publicationIDType":"arXiv","publicationIDNumber":"arxiv:2305.18881","publicationURL":"http://arxiv.org/abs/2305.18881"},{"publicationCitation":"Styles, S. J. , Chua, Y. H. V., Woon, F. T., Liu, H., Garcia Perera, L. P., Khudanpur, S., Khong, A. W. H., & Dauwels, J. (2023). Investigating model performance in language identification: beyond simple error statistics. Accepted for Interspeech 2023. http://arxiv.org/abs/2305.18925","publicationIDType":"arXiv","publicationIDNumber":"arxiv:2305.18925","publicationURL":"http://arxiv.org/abs/2305.18925"},{"publicationCitation":"Garcia Perera, L. P., Chua, Y. H. V., Liu, H., Woon, F. T., Khong, A. W. H., Dauwels, J. & Styles, S. J., \"MERLIon CCS Challenge Evaluation Plan Version 1.2\". ArXiv https://doi.org/10.48550/arXiv.2305.19493","publicationIDType":"arXiv","publicationIDNumber":"arxiv:2305.19493","publicationURL":"https://arxiv.org/abs/2305.19493"}],"software":[{"citation:softwareName":".csv"},{"citation:softwareName":".txt"}],"@id":"doi:10.21979/N9/4RHC3D","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"1.0","schema:name":"MERLIon CCS Challenge Development and Evaluation Datasets Open Preview (Documentation)","schema:dateModified":"2023-08-11 18:53:56.481","schema:datePublished":"2023-08-11","schema:license":"http://creativecommons.org/licenses/by-nc/4.0","dvcore:fileTermsOfAccess":{"dvcore:fileRequestAccess":true},"schema:includedInDataCatalog":"DR-NTU (Data)","ore:aggregates":[{"schema:description":"Contains the filenames of all audio recordings in the MERLIon CCS Challenge development set.","schema:name":"MERLIon-CCS-Challenge-2023_Development-Set_v001_File-List.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117006","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117006","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":10721,"dvcore:storageIdentifier":"file://1899b6a9c44-01008ece8597","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:DBZ0LJYDQuBp+JpHC4e2wQ==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"2bf13c03d7a06433eb126c2103e45f49"}},{"schema:description":"Contains metadata of the MERLIon CCS Challenge development set.  ","schema:name":"MERLIon-CCS-Challenge-2023_Development-Set_v001_METADATA.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=116998","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/116998","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":46723,"dvcore:storageIdentifier":"file://1899b6a9d25-962e6ca1f62d","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"5729c747c57f7719d16ab554173daf1f"}},{"schema:description":"Contains the release notes and dataset description of the MERLIon CCS Challenge development set. ","schema:name":"MERLIon-CCS-Challenge-2023_Development-Set_v001_RELEASE-NOTES.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=116997","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/116997","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":4159,"dvcore:storageIdentifier":"file://1899b6a9e22-efc89aa31692","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"2a4512e853e7ffdd47d4bb24550c55ca"}},{"schema:description":"Contains total lengths of English and Mandarin speech in milliseconds and number of English and Mandarin segments in each audio recording in the MERLIon CCS Challenge development set.","schema:name":"MERLIon-CCS-Challenge-2023_Development-Set_v001_Segment-Lengths-Counts.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=116999","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/116999","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":13528,"dvcore:storageIdentifier":"file://1899b6a9f12-8bbbd99b5324","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:cor05m8DAi1IsQux4AFq2w==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"99ab7ebc1258ebd102bf055f9e907fd6"}},{"schema:description":"Contains the filenames of all audio recordings in the MERLIon CCS Challenge evaluation set for Task 1 (Language Identification).","schema:name":"MERLIon-CCS-Challenge_Task-1_Evaluation-Set_v001_File-List.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117002","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117002","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":10935,"dvcore:storageIdentifier":"file://1899b6a950d-c8fa4f13e4b3","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:N/bgz9BG4KC27/vv64K5Cg==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"044092aeb13ec6861ee4eab00e9673b5"}},{"schema:description":"Contains metadata of the MERLIon CCS Challenge evaluation set for Task 1 (Language Identification).","schema:name":"MERLIon-CCS-Challenge_Task-1_Evaluation-Set_v001_METADATA.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117004","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117004","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":46891,"dvcore:storageIdentifier":"file://1899b6a95ef-df0089f606fb","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"559c7445f4503f8651a078aa7c9d8e6d"}},{"schema:description":"Contains the release notes and dataset description of the MERLIon CCS Challenge evaluation set for Task 1 (Language Identification).","schema:name":"MERLIon-CCS-Challenge_Task-1_Evaluation-Set_v001_RELEASE-NOTES.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117003","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117003","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":4504,"dvcore:storageIdentifier":"file://1899b6a96a1-e87746db9765","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"bb0d412c085b0af392e8e1fcd988afb1"}},{"schema:description":"Contains total lengths of English and Mandarin speech in milliseconds and number of English and Mandarin segments in each audio recording in the MERLIon CCS Challenge evaluation set for Task 1 (Language Identification).\n","schema:name":"MERLIon-CCS-Challenge_Task-1_Evaluation-Set_v001_Segment-Lengths-Counts.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117001","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117001","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":13765,"dvcore:storageIdentifier":"file://1899b6a978c-b5818e3ca09b","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:LG65AZfWQeL+tY4JqhxASg==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"392c4185a187512c3900a6fd00060f51"}},{"schema:description":"Contains the filenames of all audio recordings in the MERLIon CCS Challenge evaluation set for Task 2 (Language Diarization). ","schema:name":"MERLIon-CCS-Challenge_Task-2_Evaluation-Set_v001_File-List.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117000","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117000","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":10935,"dvcore:storageIdentifier":"file://1899b6a98bf-6b4747cdcb78","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:DWd9g07ws3tko2NOoPzfHw==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"085494abed8938a7e719cdfde9cfa069"}},{"schema:description":"Contains metadata of the MERLIon CCS Challenge evaluation set for Task 2 (Language Diarization).  ","schema:name":"MERLIon-CCS-Challenge_Task-2_Evaluation-Set_v001_METADATA.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=116995","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/116995","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":47410,"dvcore:storageIdentifier":"file://1899b6a99d3-6f26e73003c6","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"549467899677f8d2fdb55484680ca1cb"}},{"schema:description":"Contains the release notes and dataset description of the MERLIon CCS Challenge evaluation set for Task 2 (Language Diarization). ","schema:name":"MERLIon-CCS-Challenge_Task-2_Evaluation-Set_v001_RELEASE-NOTES.txt","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=116996","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/116996","@type":"ore:AggregatedResource","schema:fileFormat":"text/plain","dvcore:filesize":4347,"dvcore:storageIdentifier":"file://1899b6a9aad-2600c58f49a9","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"8d98a0af57777d8c0927fb0abcbcc224"}},{"schema:description":"Contains total lengths of English and Mandarin speech in milliseconds and number of English and Mandarin segments in each audio recording in the MERLIon CCS Challenge evaluation set for Task 2 (Language Diarization). ","schema:name":"MERLIon-CCS-Challenge_Task-2_Evaluation-Set_v001_Segment-Lengths-Counts.tab","dvcore:restricted":false,"schema:version":5,"dvcore:datasetVersionId":7140,"@id":"https://researchdata.ntu.edu.sg/file.xhtml?fileId=117005","schema:sameAs":"https://researchdata.ntu.edu.sg/api/access/datafile/117005","@type":"ore:AggregatedResource","schema:fileFormat":"text/tab-separated-values","dvcore:filesize":13765,"dvcore:storageIdentifier":"file://1899b6a9b88-5ec97291a5f7","dvcore:originalFileFormat":"text/csv","dvcore:originalFormatLabel":"Comma Separated Values","dvcore:UNF":"UNF:6:IG8foF6DZkWbVP4Mp5OAsA==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"2d174ec55f22ac1c7a81ca3cebe1c94a"}}],"schema:hasPart":["https://researchdata.ntu.edu.sg/file.xhtml?fileId=117006","https://researchdata.ntu.edu.sg/file.xhtml?fileId=116998","https://researchdata.ntu.edu.sg/file.xhtml?fileId=116997","https://researchdata.ntu.edu.sg/file.xhtml?fileId=116999","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117002","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117004","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117003","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117001","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117000","https://researchdata.ntu.edu.sg/file.xhtml?fileId=116995","https://researchdata.ntu.edu.sg/file.xhtml?fileId=116996","https://researchdata.ntu.edu.sg/file.xhtml?fileId=117005"]},"@context":{"author":"http://purl.org/dc/terms/creator","authorIdentifier":"http://purl.org/spar/datacite/AgentIdentifier","authorIdentifierScheme":"http://purl.org/spar/datacite/AgentIdentifierScheme","citation":"https://dataverse.org/schema/citation/","dateOfDeposit":"http://purl.org/dc/terms/dateSubmitted","dcterms":"http://purl.org/dc/terms/","dvcore":"https://dataverse.org/schema/core#","grantNumber":"https://schema.org/sponsor","kindOfData":"http://rdf-vocabulary.ddialliance.org/discovery#kindOfData","ore":"http://www.openarchives.org/ore/terms/","publication":"http://purl.org/dc/terms/isReferencedBy","publicationCitation":"http://purl.org/dc/terms/bibliographicCitation","publicationIDNumber":"http://purl.org/spar/datacite/ResourceIdentifier","publicationIDType":"http://purl.org/spar/datacite/ResourceIdentifierScheme","publicationURL":"https://schema.org/distribution","schema":"http://schema.org/","software":"https://www.w3.org/TR/prov-o/#wasGeneratedBy","subject":"http://purl.org/dc/terms/subject","title":"http://purl.org/dc/terms/title"}}