-
-
Notifications
You must be signed in to change notification settings - Fork 144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added logic to fetch README files, documentation, commit messages, an… #2919
base: main
Are you sure you want to change the base?
Changes from 42 commits
34add4f
71e2baf
d7da328
8e1f7db
d147d9f
fb4c6c8
80c1cbe
ba7adcd
fa6ff9c
fa8501d
0c56498
8d5b239
56b7f62
ff445d0
968e7da
d7b7c09
2028e01
94a03b9
95bbb84
07f8863
58642e8
c2b67e6
e21c0ef
2d0dc50
1cb10c4
5188373
5e5f5ec
3cefcf4
35a8af6
a3ac55b
927c761
576de5e
105bca5
aeb3c29
e67ec81
0d93000
0c4e813
405de94
8487384
108f592
7359570
9fb42b0
431c60b
8400c87
21e71bd
54a2f4f
03f05f1
e997511
465e344
6626ab7
c79a0e4
d2af8c5
64dcd28
28dbda2
a55fd71
26c0e80
a6a569c
481363c
8b494da
ab3e879
a8fb7fd
0211ea6
9b191e9
4ae6485
8eeb391
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import os | ||
|
||
import openai | ||
|
||
openai.api_key = os.getenv("OPENAI_API_KEY") | ||
|
||
|
||
def generate_labels(readme_content, github_topics): | ||
prompt = f""" | ||
You are an AI that assigns relevant labels to GitHub projects based on their readme content and github topics. | ||
|
||
### Input: | ||
- **README Content:** {readme_content} | ||
- **GitHub Topics:** {github_topics} | ||
|
||
### Task: | ||
Analyze the input and assign appropriate labels. | ||
Labels should include: | ||
1. **Technology Stacks** (e.g., Python, JavaScript, Java). | ||
2. **Project Type** (e.g., Web Application, CLI Tool, Library). | ||
3. **OWASP Relevance** (e.g., Security Testing, Secure Coding). | ||
4. Any other relevant labels. | ||
|
||
### Output: | ||
Provide the labels in JSON format like this: | ||
{{ | ||
"tech-stack": ["Python", "JavaScript"], | ||
"project-type": ["Web Application"], | ||
"owasp-relevance": ["Secure Coding"], | ||
"other": ["Machine Learning", "Data Processing"] | ||
}} | ||
""" | ||
|
||
response = openai.ChatCompletion.create( | ||
model="gpt-4", | ||
messages=[ | ||
{"role": "system", "content": "You are a helpful assistant for labeling projects."}, | ||
{"role": "user", "content": prompt}, | ||
], | ||
temperature=0.7, | ||
) | ||
|
||
return response["choices"][0]["message"]["content"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,15 @@ | ||
import base64 | ||
import json | ||
|
||
import requests | ||
from django.conf import settings | ||
from django.core.management.base import BaseCommand | ||
from django.utils.dateparse import parse_datetime | ||
|
||
from website.label_generation import generate_labels | ||
from website.models import Project | ||
from website.summarization import ai_summary | ||
from website.utils import markdown_to_text | ||
|
||
|
||
class Command(BaseCommand): | ||
|
@@ -50,6 +56,52 @@ def handle(self, *args, **kwargs): | |
project.updated_at = parse_datetime(repo_data.get("updated_at")) | ||
project.size = repo_data.get("size", 0) | ||
project.last_commit_date = parse_datetime(repo_data.get("pushed_at")) | ||
project.topics = repo_data.get("topics", []) | ||
|
||
# Fetch README | ||
url = f"https://api.github.com/repos/{repo_name}/readme" | ||
DonnieBLT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
response = requests.get(url, headers=headers) | ||
if response.status_code == 200: | ||
readme_data = response.json() | ||
readme_content_encoded = readme_data.get("content", "") | ||
|
||
# Decode the Base64 content | ||
try: | ||
readme_content = base64.b64decode(readme_content_encoded).decode("utf-8") | ||
project.readme_content = readme_content | ||
readme_text = markdown_to_text(readme_content) | ||
project.ai_summary = ai_summary(readme_text, project.topics) | ||
project.ai_labels = json.loads(generate_labels(readme_text, project.topics)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we just add the labels verbatim from the topics? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the AI-generated labels to be more accurate and effective, but we can surely use these topics directly. I'll modify it
DonnieBLT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except (base64.binascii.Error, UnicodeDecodeError) as e: | ||
self.stdout.write( | ||
self.style.WARNING(f"Failed to decode README for {repo_name}: {e}") | ||
) | ||
project.readme_content = "" | ||
project.ai_summary = "" | ||
project.ai_labels = {} | ||
else: | ||
self.stdout.write( | ||
self.style.WARNING( | ||
f"Failed to fetch README for {repo_name}: {response.status_code}" | ||
) | ||
) | ||
|
||
# Check for Documentation URL (homepage) | ||
project.documentation_url = repo_data.get("homepage") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we have a homepage field |
||
|
||
# Fetch Recent Commit Messages | ||
url = f"https://api.github.com/repos/{repo_name}/commits" | ||
response = requests.get(url, headers=headers) | ||
if response.status_code == 200: | ||
commits_data = response.json() | ||
commit_messages = [commit["commit"]["message"] for commit in commits_data[:5]] | ||
project.recent_commit_messages = "\n".join(commit_messages) | ||
else: | ||
self.stdout.write( | ||
self.style.WARNING( | ||
f"Failed to fetch recent commits for {repo_name}: {response.status_code}" | ||
) | ||
) | ||
|
||
# Fetch counts of issues and pull requests using the Search API | ||
def get_issue_count(repo_name, query, headers): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Generated by Django 5.1.3 on 2024-11-19 08:40 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("website", "0153_delete_contributorstats"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="project", | ||
name="ai_labels", | ||
field=models.CharField(blank=True, default="", max_length=255), | ||
), | ||
migrations.AddField( | ||
model_name="project", | ||
name="ai_summary", | ||
field=models.TextField(blank=True, null=True), | ||
), | ||
migrations.AddField( | ||
model_name="project", | ||
name="documentation_url", | ||
field=models.URLField(blank=True, null=True), | ||
), | ||
migrations.AddField( | ||
model_name="project", | ||
name="issue_tracker_url", | ||
field=models.URLField(blank=True, null=True), | ||
), | ||
migrations.AddField( | ||
model_name="project", | ||
name="readme_content", | ||
field=models.TextField(blank=True, null=True), | ||
), | ||
migrations.AddField( | ||
model_name="project", | ||
name="recent_commit_messages", | ||
field=models.TextField(blank=True, null=True), | ||
), | ||
] |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can these migrations be merged? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Generated by Django 5.1.3 on 2024-11-20 09:34 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("website", "0154_project_ai_labels_project_ai_summary_and_more"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="project", | ||
name="topics", | ||
field=models.JSONField(blank=True, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="project", | ||
name="ai_labels", | ||
field=models.JSONField(blank=True, null=True), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -754,6 +754,13 @@ class Project(models.Model): | |
closed_issues = models.IntegerField(default=0) | ||
size = models.IntegerField(default=0) | ||
commit_count = models.IntegerField(default=0) | ||
readme_content = models.TextField(null=True, blank=True) | ||
documentation_url = models.URLField(null=True, blank=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use homepage_url There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
recent_commit_messages = models.TextField(null=True, blank=True) | ||
issue_tracker_url = models.URLField(null=True, blank=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't need this because it's always /issues |
||
topics = models.JSONField(null=True, blank=True) | ||
ai_summary = models.TextField(null=True, blank=True) | ||
ai_labels = models.JSONField(null=True, blank=True) | ||
|
||
def __str__(self): | ||
return self.name | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import os | ||
|
||
import openai | ||
|
||
openai.api_key = os.getenv("OPENAI_API_KEY") | ||
|
||
|
||
def ai_summary(text, topics=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this to utils.py please |
||
"""Generate an AI-driven summary using OpenAI's GPT, including GitHub topics.""" | ||
try: | ||
topics_str = ", ".join(topics) if topics else "No topics provided." | ||
prompt = f"Generate a brief summary of the following text, focusing on key aspects such as purpose, features, technologies used, and current status. Consider the following GitHub topics to enhance the context: {topics_str}\n\n{text}" | ||
response = openai.Completion.create( | ||
model="text-davinci-003", | ||
prompt=prompt, | ||
max_tokens=150, | ||
temperature=0.5, | ||
) | ||
summary = response.choices[0].text.strip() | ||
return summary | ||
except Exception as e: | ||
return f"Error generating summary: {str(e)}" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
{% load humanize %} | ||
<ul class="project-list"> | ||
{% for project in projects %} | ||
<a href="{% url 'project_view' project.slug %}" class="project-link"> | ||
<li class="project-item"> | ||
{% if project.logo_url %} | ||
<img src="{{ project.logo_url }}" | ||
alt="{{ project.name }} logo" | ||
class="project-logo" | ||
height="100" | ||
width="100"> | ||
{% else %} | ||
<div class="project-logo placeholder"> | ||
<span>No Logo</span> | ||
</div> | ||
{% endif %} | ||
<div class="project-details"> | ||
<h3>{{ project.name }}</h3> | ||
<!-- Stats Icons Under Project Title --> | ||
<div class="stats-icons"> | ||
<ul> | ||
<li> | ||
<a href="{{ project.github_url }}" target="_blank" title="GitHub"> | ||
<i class="fab fa-github"></i> | ||
</a> | ||
</li> | ||
{% if project.wiki_url %} | ||
<li> | ||
<a href="{{ project.wiki_url }}" target="_blank" title="Wiki"> | ||
<i class="fas fa-book"></i> | ||
</a> | ||
</li> | ||
{% endif %} | ||
{% if project.homepage_url %} | ||
<li> | ||
<a href="{{ project.homepage_url }}" target="_blank" title="Homepage"> | ||
<i class="fas fa-home"></i> | ||
</a> | ||
</li> | ||
{% endif %} | ||
<li> | ||
<a href="{{ project.github_url }}/stargazers" | ||
target="_blank" | ||
title="Stars"> | ||
<i class="fas fa-star"></i> {{ project.stars|intcomma }} | ||
</a> | ||
</li> | ||
<li> | ||
<a href="{{ project.github_url }}/network/members" | ||
target="_blank" | ||
title="Forks"> | ||
<i class="fas fa-code-branch"></i> {{ project.forks|intcomma }} | ||
</a> | ||
</li> | ||
<li> | ||
<a href="{{ project.github_url }}/issues" | ||
target="_blank" | ||
title="Total Issues"> | ||
<i class="fas fa-exclamation-circle"></i> {{ project.total_issues|intcomma }} | ||
</a> | ||
</li> | ||
{% if project.release_name %} | ||
<li> | ||
<i class="fas fa-tag"></i> | ||
<span title="{{ project.release_datetime|date:'Y-m-d H:i' }}">{{ project.release_name }}</span> | ||
</li> | ||
{% endif %} | ||
<li> | ||
<i class="fas fa-clock"></i> | ||
<span title="{{ project.last_updated|date:'Y-m-d H:i' }}">{{ project.last_updated|timesince }}</span> | ||
</li> | ||
</ul> | ||
</div> | ||
<p>{{ project.description }}</p> | ||
<p class="black-text">{{ project.ai_summary }}</p> | ||
<div class="label-group"> | ||
{% for category, labels in project.ai_labels.items %} | ||
{% for label in labels %}<span class="label {{ category }}">{{ label }}</span>{% endfor %} | ||
{% endfor %} | ||
</div> | ||
<!-- Contributors Section --> | ||
<div class="project-stats"> | ||
<p> | ||
<i class="fas fa-users"></i> Contributors: {{ project.contributor_count }} | ||
</p> | ||
<div class="contributors"> | ||
{% for contributor in project.get_top_contributors %} | ||
<a href="https://github.com/search?q=repo:{{ project.github_url|slice:'19:' }}+author:{{ contributor.name }}&type=issues" | ||
target="_blank"> | ||
<img src="{{ contributor.avatar_url }}" | ||
alt="{{ contributor.name }}" | ||
class="contributor-avatar" | ||
title="{{ contributor.name }}" | ||
height="40" | ||
width="40"> | ||
</a> | ||
{% empty %} | ||
<div class="contributor-avatar placeholder"> | ||
<span>No Avatar</span> | ||
</div> | ||
{% endfor %} | ||
</div> | ||
</div> | ||
<div class="additional-metadata"> | ||
<div class="external-links"> | ||
<h4> | ||
<i class="fas fa-external-link-alt"></i> Links | ||
</h4> | ||
<ul> | ||
{% for link in project.external_links %} | ||
<li> | ||
<a href="{{ link.url }}" target="_blank"><i class="fas fa-link"></i> {{ link.name }}</a> | ||
</li> | ||
{% endfor %} | ||
</ul> | ||
</div> | ||
</div> | ||
<!-- Tags Section --> | ||
<div class="project-tags"> | ||
<h4> | ||
<i class="fas fa-tags"></i>Tags | ||
</h4> | ||
<ul> | ||
{% for tag in project.tags.all %}<li>{{ tag.name }}</li>{% endfor %} | ||
</ul> | ||
</div> | ||
</div> | ||
</li> | ||
</a> | ||
{% endfor %} | ||
</ul> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this to utils.py too please