@article{10261213,author={Purushothaman, Anurenjan and Dutta, Debottam and Kumar, Rohit and Ganapathy, Sriram},journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},title={Speech Dereverberation With Frequency Domain Autoregressive Modeling},year={2024},volume={32},number={},pages={29-38},keywords={Frequency-domain analysis;Reverberation;Task analysis;Convolution;Predictive models;Mirrors;Analytical models;Dereverberation;end-to-end ASR;frequency domain auto-regressive modeling;joint modeling},doi={10.1109/TASLP.2023.3317570},}
Neurips
Multi-Source Music Generation with Latent Diffusion
Zhongweiyang Xu, Debottam Dutta, Yu-Lin Wei, and Romit Roy Choudhury
In Audio Imagination: NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound Generation 2024
@inproceedings{xu2024multisource,title={Multi-Source Music Generation with Latent Diffusion},author={Xu, Zhongweiyang and Dutta, Debottam and Wei, Yu-Lin and Choudhury, Romit Roy},booktitle={Audio Imagination: NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound Generation},year={2024},url={https://openreview.net/forum?id=bxzUnfWrgk},}
2023
Nature Sci. Data
Coswara: A respiratory sounds and symptoms dataset for remote screening of SARS-CoV-2 infection
Debarpan Bhattacharya, Neeraj Kumar Sharma, Debottam Dutta, Srikanth Raj Chetupalli, Pravin Mote, Sriram Ganapathy, C Chandrakiran, Sahiti Nori, K K Suhail, Sadhana Gonuguntla, and Murali Alagesan
This paper presents the Coswara dataset, a dataset containing diverse set of respiratory sounds and rich meta-data, recorded between April-2020 and February-2022 from 2635 individuals (1819 SARS-CoV-2 negative, 674 positive, and 142 recovered subjects). The respiratory sounds contained nine sound categories associated with variants of breathing, cough and speech. The rich metadata contained demographic information associated with age, gender and geographic location, as well as the health information relating to the symptoms, pre-existing respiratory ailments, comorbidity and SARS-CoV-2 test status. Our study is the first of its kind to manually annotate the audio quality of the entire dataset (amounting to 65 hours) through manual listening. The paper summarizes the data collection procedure, demographic, symptoms and audio data information. A COVID-19 classifier based on bi-directional long short-term (BLSTM) architecture, is trained and evaluated on the different population sub-groups contained in the dataset to understand the bias/fairness of the model. This enabled the analysis of the impact of gender, geographic location, date of recording, and language proficiency on the COVID-19 detection performance.
@article{Bhattacharya2023-ps,title={Coswara: A respiratory sounds and symptoms dataset for remote
screening of {SARS-CoV-2} infection},author={Bhattacharya, Debarpan and Sharma, Neeraj Kumar and Dutta, Debottam and Chetupalli, Srikanth Raj and Mote, Pravin and Ganapathy, Sriram and Chandrakiran, C and Nori, Sahiti and Suhail, K K and Gonuguntla, Sadhana and Alagesan, Murali},journal={Sci. Data},volume={10},number={1},pages={397},month=jun,year={2023},language={en},}
2022
ICASSP
The Second Dicova Challenge: Dataset and Performance Analysis for Diagnosis of Covid-19 Using Acoustics
@inproceedings{9747188,author={Sharma, Neeraj Kumar and Chetupalli, Srikanth Raj and Bhattacharya, Debarpan and Dutta, Debottam and Mote, Pravin and Ganapathy, Sriram},booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},title={The Second Dicova Challenge: Dataset and Performance Analysis for Diagnosis of Covid-19 Using Acoustics},year={2022},volume={},number={},pages={556-560},doi={10.1109/ICASSP43922.2022.9747188},}
Interspeech
Coswara: A website application enabling COVID-19 screening by analysing respiratory sound samples and health symptoms
@inproceedings{9632708,author={Dutta, Debottam and Agrawal, Purvi and Ganapathy, Sriram},booktitle={2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},title={A Multi-Head Relevance Weighting Framework for Learning Raw Waveform Audio Representations},year={2021},volume={},number={},pages={191-195},doi={10.1109/WASPAA52581.2021.9632708},}
Svadhyaya system for the Second Diagnosing COVID-19 using Acoustics Challenge 2021
Deepak Mittal, Amir H. Poorjam, Debottam Dutta, Debarpan Bhattacharya, Zemin Yu, Sriram Ganapathy, and Maneesh Singh
@article{dicova2FB,author={Mittal, Deepak and Poorjam, Amir H. and Dutta, Debottam and Bhattacharya, Debarpan and Yu, Zemin and Ganapathy, Sriram and Singh, Maneesh},title={Svadhyaya system for the Second Diagnosing COVID-19 using Acoustics Challenge 2021},publisher={arXiv},journal={arXiv preprint arXiv:2206.05462},year={2021},copyright={Creative Commons Attribution 4.0 International},}