Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data collection carousel #75

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions frontend/components/AI4BContainer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"use client";

import PropTypes from "prop-types";
import { ReactNode } from "react";
import styles from "../src/app/AI4BContainer.module.css"; // Import custom CSS for responsive container
// Removed incorrect import

interface AI4BContainerProps {
children: ReactNode;
title?: string;
p?: number;
bgColor?: string;
color?: string;
}

export default function AI4BContainer({
children,
p = 6,
bgColor = "#fff",
color = "#000",
}: AI4BContainerProps) {
return (
<div
className={styles.container}
style={{
padding: p,
backgroundColor: bgColor,
color: color,
}}
>
{children}
</div>
);
}

// Define PropTypes for validation
AI4BContainer.propTypes = {
children: PropTypes.node.isRequired,
p: PropTypes.number,
bgColor: PropTypes.string,
color: PropTypes.string,
};
130 changes: 130 additions & 0 deletions frontend/components/DataCollectionCarousel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"use client";

import React, { useEffect, useState } from "react";
import dynamic from "next/dynamic";
import "owl.carousel/dist/assets/owl.carousel.css";
import "owl.carousel/dist/assets/owl.theme.default.css";
import $ from "jquery";
import { Box, Text, Heading, Stack, Flex, Image } from "@chakra-ui/react";
import { domainToASCII } from "url";
import { imagePrefix } from "@/app/config";

// Dynamically load OwlCarousel to avoid SSR issues
const OwlCarousel = dynamic(() => import("react-owl-carousel"), {
ssr: false,
});

const DataCollectionCarousel = () => {
const [isMounted, setIsMounted] = useState(false);

interface CardProps {
image: string;
text: string;
}

const Card = ({ image, text }: CardProps) => {
return (
<>
<Box>
<Image
boxSize={80}

objectFit="cover"
src={`${imagePrefix}/assets/data-collection/${image}`}
alt="{lang} - {location} Image"
/>
<Text align={"center"} fontSize={"sm"}>
{text}
</Text>
</Box>
</>
);
};

useEffect(() => {
if (typeof window !== "undefined") {
window.$ = window.jQuery = $;
}
setIsMounted(true);
}, []);

const options = {
loop: true,
center: true,
margin: 2,
autoplay: true,
dots: true,
autoplayTimeout: 3000,
smartSpeed: 800,
items: 1,
};

if (!isMounted) {
return null;
}

const images = [
{
lang: "Bengali",
location: "Birbhum, West Bengal",
image: "bengali-birbhum-west-bengal.jpg",
},
{
lang: "Bodo",
location: "Kokrajhar, Assam",
image: "bodo-kokrajhar-assam.jpg",
},
{ lang: "Dogri", location: "Jammu", image: "dogri-jammu.jpg" },
{ lang: "Kashmiri", location: "Srinagar", image: "kashmiri-srinagar.jpg" },
{
lang: "Manipuri",
location: "Imphal, Manipur",
image: "manipuri-imphal-manipur.jpg",
},
{
lang: "Nepali",
location: "Kalimpong, West Bengal",
image: "nepali-kalimpong-west-bengal.jpg",
},
{
lang: "Odia",
location: "Sambalpur, Odisha",
image: "odia-sambalpur-odisha.jpg",
},
{
lang: "Santali",
location: "Bolpur, West Bengal",
image: "santali-bolpur-west-bengal.jpg",
},
{
lang: "Sindhi",
location: "Thane, Maharashtra",
image: "sindhi-thane-maharashtra.jpg",
},
{
lang: "Tamil",
location: "Madurai, Tamil Nadu",
image: "tamil-madurai-tamil-nadu.jpg",
},
{
lang: "Konkani",
location: "Tiswadi Taluka, Goa",
image: "konkani-tiswari-taluka-goa.jpg",
},
];
return (
<>
<OwlCarousel className="owl-carousel owl-theme" {...options}>
{images.map((item, index) => (
<Card
key={index}
image={item.image}
text={`${item.lang} - ${item.location}`}
/>
))}
</OwlCarousel>
</>
);
};

export default DataCollectionCarousel;
85 changes: 30 additions & 55 deletions frontend/components/Datasets.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ import {
useBreakpointValue,
Wrap,
Divider,
Grid,
} from "@chakra-ui/react";
import Image from "next/image";
import axios from "axios";
import { useQuery } from "react-query";
import { API_URL } from "@/app/config";
import { imagePrefix } from "@/app/config";
import DataCollectionCarousel from "./DataCollectionCarousel";
import {
FaFileAudio,
FaFileAlt,
Expand All @@ -34,6 +36,7 @@ import {
FaLanguage,
FaKeyboard,
} from "react-icons/fa";
import AI4BContainer from "./AI4BContainer";

const datasetIcons: { [key: string]: React.ReactElement } = {
llm: <FaFileAlt color="#ff6600" size={50} />,
Expand Down Expand Up @@ -99,18 +102,19 @@ export default function Datasets() {
}, [error, data, isLoading]);

return (
<Container maxW={"7xl"}>
<Stack
align={"center"}
spacing={{ base: 8, md: 10 }}
p={10}
direction={{ base: "column", md: "row" }}
>
<Stack flex={1} spacing={{ base: 5, md: 10 }}>
<AI4BContainer>
<DataCollectionCarousel />


<Flex direction={{ base: "column", md: "row" }} gap={6}>


<Box flex={1}>
<Heading
lineHeight={1.1}
fontWeight={600}
fontSize={{ base: "3xl", sm: "4xl", lg: "6xl" }}
py={5}
>
<Text as={"span"} color={"a4borange"} position={"relative"}>
Pioneering
Expand All @@ -120,53 +124,20 @@ export default function Datasets() {
Data Collection!
</Text>
</Heading>
<Flex
flex={2}
justify={"center"}
align={"center"}
position={"relative"}
w={"full"}
>
<Box
position={"relative"}
rounded={"2xl"}
boxShadow={"2xl"}
width={"full"}
overflow={"hidden"}
>
<ChakraImage
alt={"Hero Image"}
src={`${imagePrefix}/assets/data-collection.png`}
/>
</Box>
</Flex>
<Text>
Early on in our journey, we recognized that advancing Indian
technology necessitates large-scale datasets. Thus, building and
collecting extensive datasets across multiple verticals has become a
critical endeavor at AI4Bharat. Thanks to generous grants from
MeitY, we are spearheading pioneering efforts in data collection as
part of the Data Management Unit of Bhashini. Our nationwide
initiative aims to gather 15,000 hours of transcribed data from over
400 districts, encompassing all 22 scheduled languages of India. In
parallel, our in-house team of over 100 translators is diligently
creating a parallel corpus with 2.2 million translation pairs across
22 languages. To produce studio-quality data for expressive TTS
systems, we have established recording studios in our lab, where
professional voice artists contribute their expertise. Additionally,
our annotators are meticulously labeling pages for Document Layout
Parsing, accommodating the diverse scripts of India. To accelerate
the development of Indic Large Language Models (LLMs), we are
focused on building pipelines for curating and synthetically
generating pre-training data, collecting contextually grounded
prompts, and creating evaluation datasets that reflect India’s rich
linguistic tapestry. Collecting and annotating data at this scale
demands standardization of processes and tools. To meet this
challenge, AI4Bharat has invested in developing various open-source
data collection and annotation tools, aiming to enhance these
efforts not only within India but also in multilingual regions
across the globe.

<Text color={"black"}>
Early on in our journey, we recognized that advancing Indian technology necessitates large-scale datasets. Thus, building and collecting extensive datasets across multiple verticals has become a critical endeavor at AI4Bharat. Thanks to generous grants from MeitY, we are spearheading pioneering efforts in data collection as part of the Data Management Unit of Bhashini. Our nationwide initiative aims to gather 15,000 hours of transcribed data from over 400 districts, encompassing all 22 scheduled languages of India. In parallel, our in-house team of over 100 translators is diligently creating a parallel corpus with 2.2 million translation pairs across 22 languages. To produce studio-quality data for expressive TTS systems, we have established recording studios in our lab, where professional voice artists contribute their expertise. Additionally, our annotators are meticulously labeling pages for Document Layout Parsing, accommodating the diverse scripts of India. To accelerate the development of Indic Large Language Models (LLMs), we are focused on building pipelines for curating and synthetically generating pre-training data, collecting contextually grounded prompts, and creating evaluation datasets that reflect India’s rich linguistic tapestry. Collecting and annotating data at this scale demands standardization of processes and tools. To meet this challenge, AI4Bharat has invested in developing various open-source data collection and annotation tools, aiming to enhance these efforts not only within India but also in multilingual regions across the globe.
</Text>
</Box>
</Flex>

<Stack
align={"center"}
spacing={{ base: 8, md: 10 }}
p={10}
direction={{ base: "column", md: "row" }}
>
<Stack flex={1} spacing={{ base: 5, md: 10 }}>
<HStack p={5}>
<HStack>
<FaMicrophone color="#ff6600" size={25} />
Expand Down Expand Up @@ -230,6 +201,10 @@ export default function Datasets() {
)}
</Stack>
</Stack>
</Container>
</AI4BContainer>
);
}

// TODO: Data Collection Carousel

//TODO: Redesign datasets with tabs for each area
8 changes: 3 additions & 5 deletions frontend/components/Dynamic/Article.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,9 @@ export default function ArticleComponent({ slug }: { slug: string }) {
/>
<Box m={3} borderWidth={2} borderRadius={15} p={7}>
{article.markdown_content !== "" ? (
<ReactMarkdown
components={ChakraUIRenderer(newTheme)}
children={article.markdown_content}
skipHtml
/>
<ReactMarkdown components={ChakraUIRenderer(newTheme)} skipHtml>
{article.markdown_content}
</ReactMarkdown>
) : (
<></>
)}
Expand Down
Loading
Loading