Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…tion_page
  • Loading branch information
siddhu001 committed Nov 10, 2023
2 parents 99fd9d2 + 2ba3846 commit 22cbd96
Show file tree
Hide file tree
Showing 24 changed files with 537 additions and 44 deletions.
35 changes: 35 additions & 0 deletions _includes/figure.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{%- assign img_path = include.path | remove: ".jpg" | remove: ".jpeg" | remove: ".png" | remove: ".tiff" -%}

<figure>

<picture>
{% if site.imagemagick.enabled %}
{% for i in site.imagemagick.widths -%}
<source
class="responsive-img-srcset"
media="(max-width: {{ i }}px)"
srcset="{{ img_path | relative_url }}-{{ i }}.webp"
/>
{% endfor -%}
{% endif %}

<!-- Fallback to the original file -->
<img
src="{% if include.cache_bust %}{{ include.path | relative_url | bust_file_cache }}{% else %}{{ include.path | relative_url }}{% endif %}"
{% if include.class %}class="{{ include.class }}"{% endif %}
{% if include.width %}width="{{ include.width }}"{% else %}width="auto"{% endif %}
{% if include.height %}height="{{ include.height }}"{% else %}height="auto"{% endif %}
{% if include.min-width %}min-width="{{ include.min-width }}"{% endif %}
{% if include.min-height %}min-height="{{ include.min-height }}"{% endif %}
{% if include.max-width %}max-width="{{ include.max-width }}"{% endif %}
{% if include.max-height %}max-height="{{ include.max-height }}"{% endif %}
{% if include.alt %}alt="{{ include.alt }}"{% endif %}
{% if include.title %}title="{{ include.title }}"{% endif %}
{% if include.zoomable %}data-zoomable{% endif %}
onerror="this.onerror=null; $('.responsive-img-srcset').remove();"
/>
</picture>

{%- if include.caption -%}<figcaption class="caption">{{ include.caption }}</figcaption>{%- endif %}

</figure>
18 changes: 18 additions & 0 deletions _includes/scripts/analytics.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{%- if site.enable_google_analytics -%}
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id={{ site.google_analytics }}"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){ window.dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', '{{ site.google_analytics }}');
</script>
{%- endif -%}
{%- if site.enable_cronitor_analytics -%}
<!-- Cronitor RUM -->
<script async src="https://rum.cronitor.io/script.js"></script>
<script>
window.cronitor = window.cronitor || function() { (window.cronitor.q = window.cronitor.q || []).push(arguments); };
cronitor('config', { clientKey: '{{site.cronitor_analytics}}' });
</script>
{%- endif -%}
80 changes: 80 additions & 0 deletions _includes/scripts/progressBar.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{% if site.enable_progressbar %}

<!-- Scrolling Progress Bar -->
<script type="text/javascript">
/*
* This JavaScript code has been adapted from the article
* https://css-tricks.com/reading-position-indicator/ authored by Pankaj Parashar,
* published on the website https://css-tricks.com on the 7th of May, 2014.
* Couple of changes were made to the original code to make it compatible
* with the `al-foio` theme.
*/
const progressBar = $("#progress");
/*
* We set up the bar after all elements are done loading.
* In some cases, if the images in the page are larger than the intended
* size they'll have on the page, they'll be resized via CSS to accomodate
* the desired size. This mistake, however, breaks the computations as the
* scroll size is computed as soon as the elements finish loading.
* To account for this, a minimal delay was introduced before computing the
* values.
*/
window.onload = function () {
setTimeout(progressBarSetup, 50);
};
/*
* We set up the bar according to the browser.
* If the browser supports the progress element we use that.
* Otherwise, we resize the bar thru CSS styling
*/
function progressBarSetup() {
if ("max" in document.createElement("progress")) {
initializeProgressElement();
$(document).on("scroll", function() {
progressBar.attr({ value: getCurrentScrollPosition() });
});
$(window).on("resize", initializeProgressElement);
} else {
resizeProgressBar();
$(document).on("scroll", resizeProgressBar);
$(window).on("resize", resizeProgressBar);
}
}
/*
* The vertical scroll position is the same as the number of pixels that
* are hidden from view above the scrollable area. Thus, a value > 0 is
* how much the user has scrolled from the top
*/
function getCurrentScrollPosition() {
return $(window).scrollTop();
}

function initializeProgressElement() {
let navbarHeight = $("#navbar").outerHeight(true);
$("body").css({ "padding-top": navbarHeight });
$("progress-container").css({ "padding-top": navbarHeight });
progressBar.css({ top: navbarHeight });
progressBar.attr({
max: getDistanceToScroll(),
value: getCurrentScrollPosition(),
});
}
/*
* The offset between the html document height and the browser viewport
* height will be greater than zero if vertical scroll is possible.
* This is the distance the user can scroll
*/
function getDistanceToScroll() {
return $(document).height() - $(window).height();
}

function resizeProgressBar() {
progressBar.css({ width: getWidthPercentage() + "%" });
}
// The scroll ratio equals the percentage to resize the bar
function getWidthPercentage() {
return (getCurrentScrollPosition() / getDistanceToScroll()) * 100;
}
</script>

{%- endif %}
55 changes: 42 additions & 13 deletions _layouts/distill.html
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
<!DOCTYPE html>
<!-- _layouts/distill.html -->
<html>
<head>
{% include head.html %}
{%- include head.html %}

{% include scripts/jquery.html %}
{% include scripts/mathjax.html %}
<!-- Distill js -->
<script src="{{ '/assets/js/distillpub/template.v2.js' | relative_url }}"></script>
<script src="{{ '/assets/js/distillpub/transforms.v2.js' | relative_url }}"></script>
<script src="{{ '/assets/js/distillpub/overrides.js' | relative_url }}"></script>
{% if page._styles %}
<!-- Page/Post style -->
<style type="text/css">
{{ page._styles }}
</style>
{% endif %}
{%- endif %}
</head>

<d-front-matter>
Expand All @@ -18,7 +24,7 @@
"description": "{{ page.description }}",
"published": "{{ page.date | date: '%B %-d, %Y' }}",
"authors": [
{% for author in page.authors %}
{% for author in page.authors -%}
{
"author": "{{ author.name }}",
"authorURL": "{{ author.url }}",
Expand Down Expand Up @@ -48,14 +54,12 @@
}</script>
</d-front-matter>

<body class="{% if site.navbar_fixed %}fixed-top-nav{% endif %} {% unless site.footer_fixed %}sticky-bottom-footer{% endunless %}">
<body class="{%- if site.navbar_fixed -%}fixed-top-nav{%- endif -%} {%- unless site.footer_fixed -%}sticky-bottom-footer{%- endunless -%}">

<!-- Header -->

{% include header.html %}
{%- include header.html %}

<!-- Content -->

<div class="post distill">

<d-title>
Expand All @@ -66,6 +70,24 @@ <h1>{{ page.title }}</h1>
<d-byline></d-byline>

<d-article>
{% if page.toc -%}
<d-contents>
<nav class="l-text figcaption">
<h3>Contents</h3>
{% for section in page.toc -%}
<div><a href="#{{ section.name | slugify }}">{{ section.name }}</a></div>
{% if section.subsections -%}
<ul>
{% for subsection in section.subsections -%}
<li><a href="#{{ subsection.name | slugify }}">{{ subsection.name }}</a></li>
{% endfor %}
</ul>
{%- endif -%}
{%- endfor %}
</nav>
</d-contents>
{%- endif %}

{{ content }}
</d-article>

Expand All @@ -74,15 +96,22 @@ <h1>{{ page.title }}</h1>
<d-citation-list></d-citation-list>
</d-appendix>

<d-bibliography src="{{ page.bibliography | prepend: '/assets/bibliography/' | relative_url }}"></d-bibliography>

{%- if site.disqus_shortname and page.disqus_comments -%}
{% include disqus.html %}
{%- endif %}
{%- if site.giscus.repo and page.giscus_comments -%}
{% include giscus.html %}
{%- endif -%}

</div>

<!-- Footer -->
{%- include footer.html %}

{% include footer.html %}

{% include scripts/bootstrap.html %}
{% include scripts/analytics.html %}
{% include scripts/progressBar.html %}
</body>

<d-bibliography src="{{ page.bibliography | prepend: '/assets/bibliography/' | relative_url }}">
</d-bibliography>

</html>
11 changes: 11 additions & 0 deletions _pages/members.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ order: 1
</div>
<div class="col-sm mt-3 mt-md-0">
</div>
<div class="col-sm mt-3 mt-md-1">
<div class="square">
<a href="https://github.com/popcornell" target="_blank" rel="noopener noreferrer">
<img class="img-fluid rounded z-depth-1" src="{{ site.baseurl }}/assets/img/samuele.jpg">
</a></div>
<div class="caption">
Samuele Cornell
</div>
</div>
<div class="col-sm mt-3 mt-md-0">
</div>
</div>
<hr />

Expand Down
25 changes: 22 additions & 3 deletions _pages/sphinx-lunch.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,34 @@ A tentative schedule can be found [here](https://docs.google.com/spreadsheets/d/

## Future Talks (tentative schedule)

- October 5, 2023
- November 9, 2023
- Title: Universal Speech Enhancement: What Can We Do With Real Data?
- Speaker: Wangyou Zhang
- Abstract: Speech enhancement (SE) methods based on deep learning have shown impressive performance on many simulation conditions (TIMIT/WSJ/Librispeech/...+Noise), whereas the generalization to a wider range of real conditions has not been addressed. In fact, many high-performing SE methods tend to overfit the simulation condition in training, whose inductive bias may be easily violated in real conditions. In the era of large-scale pre-training, it is natural to ask whether we can make use of the large-scale real recording data to train a truly universal SE model that can be used for all speech-as-input tasks in real-world conditoins. In this talk, I try to answer the following two questions by summarizing exisiting works on these directions: 1) what can we do to utilize real data for SE training? 2) what models can be used to achieve universal SE? Finally, I will finish the talk by proposing new problems in the related topics.

- Novemver 16, 2023
- Title: TBD
- Speaker: Grant Strimel (Amazon)
- Speaker: Zhong-Qiu Wang
- Abstract: TBD

## Previous Talks

- November 2, 2023
- Title: Music generation with precise control
- Speakers: Chris Donahue and Shih-Lun Wu
- Abstract: In the first half of the session, Chris will discuss some recent work on generating music with precise control and composable outputs. Music audio generation has seen an explosion of activity - we now have the ability to generate music in broad styles with natural language control. However, despite the impressive breadth of these models, they have not yet had a salient impact on music in the real world. Instead, music AI models with more narrow capabilities have had disproportionate impact (e.g. source separation, voice cloning). In this talk, Chris will argue that current narrow models are more appealing to creators because they offer more creative potential for two reasons: (i) they offer precise and familiar forms of control, and (ii) their outputs are composable and integrate with conventional workflows. Chris will discuss two of his recent papers, SingSong (Donahue+ 23) and the Anticipatory Music Transformer (Thickstun+ 23) which seek to bring more creative potential to broadly-capable music generative models. In the second half of the session, Shih-Lun will introduce his recent work, Music ControlNet (Wu+ 23, unpublished), which imbues diffusion-based text-to-music generation models with precise melody, dynamics, and rhythm controls. Music ControlNet builds upon the ControlNet line of research in image generation, and adapts their framework to accept time-varying controls in audio domain. Shih-Lun will demonstrate that Music ControlNet can respond precisely to any composition of the controls it has been trained on, and can also generalize to out-of-distribution control signals that creators may realistically provide.

- October 12, 2023
- Title: Computational Audition through Imprecise labels
- Speaker: Ankit Shah
- Abstract: In this talk, we delve into computational auditory processing to mimic how humans and animals interpret sounds to interact with their surroundings effectively. The journey begins with the machine's challenge to recognize a vast array of sounds limited by the known sounds in our datasets. This limitation becomes glaring as current models require large labeled datasets for accuracy, which often isn't feasible in real-world settings due to data scarcity. We then spotlight core issues: the strength of sound labels within available datasets. The quandary is that even with a fraction of known sounds and limited data, inaccuracies in sound labeling lead to suboptimal models. Our focus shifts to devising strategies for sound modeling amidst inaccurate, weak or incomplete labels, termed as working with imprecise labeled data. Our exploration includes enhancing the existing annotations, understanding the effects of label noise and corruption, and innovating a co-training approach for learning sound events from web data without human intervention. We venture into exploiting additional cues like event counts and durations with negligible extra effort, introducing the concept of semi-weak labels. Lastly, the talk describes a unified framework encapsulating all our approaches, making a robust model capable of handling various labeling scenarios, paving a solid foundation for future endeavors in understanding and modeling the world of images (transferrable to sounds), irrespective of label availability. Through this, we aspire to bridge the gap between the human brain's natural sound-processing ability and machines, opening doors to a more harmonious interaction with the acoustic world around us.
- Bio: Ankit Shah is a Ph.D. student in the Language Technologies Institute in the School of Computer Science at Carnegie Mellon University. Ankit earned his master's in Language technologies at Carnegie Mellon University in 2019 and his bachelor's in electronics and communication engineering from the National Institute of Technology Karnataka Surathkal. He has worked in the industry for over 4 years as a verification engineer and project lead at ARM and as a Deep learning research Scientist at ReviveMed before joining the Ph.D. program. His areas of interest are audio understanding, machine learning, and deep learning. His thesis focuses on learning in the presence of weak, uncertain, and incomplete labels, where he has made several key contributions, including the setting up DCASE challenges on the topic. He has won the Gandhian Young Technological Innovator (GYTI) award in India for his contribution to building a never-ending learner of sound systems. His team recently emerged as a winning team in the NYC AI Hackathon challenge on LLM (Large Language Model and generative AI. He enjoys reading several books during the year, listens to music, and loves to travel. Further, he is keenly interested in Economics, Startups, Entrepreneurship, etc. Website: https://ankitshah009.github.io

## Previous Talks
- October 5, 2023
- Title: Adaptive Non-Causality for Speech Recognition
- Speaker: Grant Strimel (Amazon)
- Abstract: Streaming speech recognition architectures are employed for low-latency, real-time applications. Such architectures are often characterized by their causality – how much forward context is consumed before making a prediction on an individual frame. In this talk we will review prior approaches to balance competing objectives of low latency and the accuracy benefit derived from “look ahead” information. We then will discuss an approach we proposed called the Adaptive Non-Causal Attention Transducer (ANCAT). The architecture is non-causal in the traditional sense, but executes in a low-latency, streaming manner by dynamically choosing when to rely on future context and to what degree within the audio stream. The resulting mechanism, when coupled with novel regularization algorithms (which we will dive into) , delivers comparable accuracy to non-causal configurations while improving significantly upon latency, closing the gap with their fully-causal model counterparts.
- Bio: Grant Strimel is a Principal Scientist at Amazon AGI and part of the Alexa Speech Recognition and Deep Learning groups. He joined Alexa Pittsburgh in 2018 where the organization has now grown to over fifty scientists and engineers working on natural language processing experiences through both edge-first and cloud-centric solutions. His primary focus for Amazon has been on low-latency, real-time ML design for speech applications.

- September 28, 2023
- Title: Towards robust speech generation
Expand Down
24 changes: 24 additions & 0 deletions _plugins/details.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Code from http://movb.de/jekyll-details-support.html

module Jekyll
module Tags
class DetailsTag < Liquid::Block

def initialize(tag_name, markup, tokens)
super
@caption = markup
end

def render(context)
site = context.registers[:site]
converter = site.find_converter_instance(::Jekyll::Converters::Markdown)
caption = converter.convert(@caption).gsub(/<\/?p[^>]*>/, '').chomp
body = converter.convert(super(context))
"<details><summary>#{caption}</summary>#{body}</details>"
end

end
end
end

Liquid::Template.register_tag('details', Jekyll::Tags::DetailsTag)
2 changes: 1 addition & 1 deletion _posts/2022-01-16-11692-2023s.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Description here

- We will use [gradescope](https://www.gradescope.com/courses/496788)

### Syllable
### Syllabus
- This is a tentative schedule.
- The slides will be uploaded right before the lecture (in piazza).
- The vidoes will be uploaded irregulaly after the lecture due to the edit process (in piazza).
Expand Down
2 changes: 1 addition & 1 deletion _posts/2022-08-29-11751-2022f.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Description here

- We will use [gradescope](https://www.gradescope.com/courses/412024)

### Syllable
### Syllabus
- This is a tentative schedule.
- The slides will be uploaded right before the lecture.
- The vidoes will be uploaded irregulaly after the lecture due to the edit process.
Expand Down
2 changes: 1 addition & 1 deletion _posts/2023-08-27-11751-2023f.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Description here

- We will use [gradescope](https://www.gradescope.com/courses/564396)

### Syllable
### Syllabus
- This is a tentative schedule.
- The slides will be uploaded right before the lecture.
- The vidoes will be uploaded irregulaly after the lecture due to the edit process.
Expand Down
Loading

0 comments on commit 22cbd96

Please sign in to comment.