@inproceedings{10888413,author={Basu, Sattwik and Dutta, Debottam and Wei, Yu-Lin and Roy Choudhury, Romit},booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},title={Estimating Multi-chirp Parameters using Curvature-guided Langevin Monte Carlo},year={2025},volume={},number={},pages={1-5},keywords={Monte Carlo methods;Chirp;Signal processing algorithms;Linear programming;Polynomials;Reliability;Noise measurement;Speech processing;Optimization;Signal to noise ratio;Chirps;Optimization;Parameter Estimation;Langevin Monte Carlo (LMC);Gaussian smoothing;Curvature},doi={10.1109/ICASSP49660.2025.10888413},}
2024
TASLP
Speech Dereverberation With Frequency Domain Autoregressive Modeling
Anurenjan Purushothaman,
Debottam Dutta, Rohit Kumar, and Sriram Ganapathy
IEEE/ACM Transactions on Audio, Speech, and Language Processing 2024
@article{10261213,author={Purushothaman, Anurenjan and Dutta, Debottam and Kumar, Rohit and Ganapathy, Sriram},journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},title={Speech Dereverberation With Frequency Domain Autoregressive Modeling},year={2024},volume={32},number={},pages={29-38},keywords={Frequency-domain analysis;Reverberation;Task analysis;Convolution;Predictive models;Mirrors;Analytical models;Dereverberation;end-to-end ASR;frequency domain auto-regressive modeling;joint modeling},doi={10.1109/TASLP.2023.3317570},}
Neurips
Multi-Source Music Generation with Latent Diffusion
Zhongweiyang Xu,
Debottam Dutta, Yu-Lin Wei, and Romit Roy Choudhury
In Audio Imagination: NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound Generation 2024
@inproceedings{xu2024multisource,title={Multi-Source Music Generation with Latent Diffusion},author={Xu, Zhongweiyang and Dutta, Debottam and Wei, Yu-Lin and Choudhury, Romit Roy},booktitle={Audio Imagination: NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound Generation},year={2024},url={https://openreview.net/forum?id=bxzUnfWrgk},}
2023
Nature Sci. Data
Coswara: A respiratory sounds and symptoms dataset for remote
screening of SARS-CoV-2 infection
Debarpan Bhattacharya, Neeraj Kumar Sharma,
Debottam Dutta, Srikanth Raj Chetupalli, Pravin Mote, Sriram Ganapathy, C Chandrakiran, Sahiti Nori, K K Suhail, Sadhana Gonuguntla, and Murali Alagesan
This paper presents the Coswara dataset, a dataset containing
diverse set of respiratory sounds and rich meta-data, recorded
between April-2020 and February-2022 from 2635 individuals (1819
SARS-CoV-2 negative, 674 positive, and 142 recovered subjects).
The respiratory sounds contained nine sound categories associated
with variants of breathing, cough and speech. The rich metadata
contained demographic information associated with age, gender and
geographic location, as well as the health information relating
to the symptoms, pre-existing respiratory ailments, comorbidity
and SARS-CoV-2 test status. Our study is the first of its kind to
manually annotate the audio quality of the entire dataset
(amounting to 65 hours) through manual listening. The paper
summarizes the data collection procedure, demographic, symptoms
and audio data information. A COVID-19 classifier based on
bi-directional long short-term (BLSTM) architecture, is trained
and evaluated on the different population sub-groups contained in
the dataset to understand the bias/fairness of the model. This
enabled the analysis of the impact of gender, geographic
location, date of recording, and language proficiency on the
COVID-19 detection performance.
@article{Bhattacharya2023-ps,title={Coswara: A respiratory sounds and symptoms dataset for remote
screening of {SARS-CoV-2} infection},author={Bhattacharya, Debarpan and Sharma, Neeraj Kumar and Dutta, Debottam and Chetupalli, Srikanth Raj and Mote, Pravin and Ganapathy, Sriram and Chandrakiran, C and Nori, Sahiti and Suhail, K K and Gonuguntla, Sadhana and Alagesan, Murali},journal={Sci. Data},volume={10},number={1},pages={397},month=jun,year={2023},language={en},}
2022
ICASSP
The Second Dicova Challenge: Dataset and Performance Analysis for Diagnosis of Covid-19 Using Acoustics
@inproceedings{9747188,author={Sharma, Neeraj Kumar and Chetupalli, Srikanth Raj and Bhattacharya, Debarpan and Dutta, Debottam and Mote, Pravin and Ganapathy, Sriram},booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},title={The Second Dicova Challenge: Dataset and Performance Analysis for Diagnosis of Covid-19 Using Acoustics},year={2022},volume={},number={},pages={556-560},doi={10.1109/ICASSP43922.2022.9747188},}
Interspeech
Coswara: A website application enabling COVID-19 screening by analysing respiratory sound samples and health symptoms
@inproceedings{9632708,author={Dutta, Debottam and Agrawal, Purvi and Ganapathy, Sriram},booktitle={2021 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},title={A Multi-Head Relevance Weighting Framework for Learning Raw Waveform Audio Representations},year={2021},volume={},number={},pages={191-195},doi={10.1109/WASPAA52581.2021.9632708},}
Svadhyaya system for the Second Diagnosing COVID-19 using Acoustics Challenge 2021
Deepak Mittal, Amir H. Poorjam,
Debottam Dutta, Debarpan Bhattacharya, Zemin Yu, Sriram Ganapathy, and Maneesh Singh
@article{dicova2FB,author={Mittal, Deepak and Poorjam, Amir H. and Dutta, Debottam and Bhattacharya, Debarpan and Yu, Zemin and Ganapathy, Sriram and Singh, Maneesh},title={Svadhyaya system for the Second Diagnosing COVID-19 using Acoustics Challenge 2021},publisher={arXiv},journal={arXiv preprint arXiv:2206.05462},year={2021},copyright={Creative Commons Attribution 4.0 International},}