SEO_file_update_as_in_production

OpenPecha · Jul 16, 2024 · 5d1772f · 5d1772f
1 parent f9fc8ad
commit 5d1772f
Show file tree

Hide file tree

Showing 5 changed files with 205 additions and 82 deletions.
diff --git a/reader/views.py b/reader/views.py
@@ -20,7 +20,7 @@
 from rest_framework.permissions import IsAuthenticated
 from django.template.loader import render_to_string
 from django.shortcuts import render, redirect
-from django.http import Http404, QueryDict
+from django.http import Http404, QueryDict, HttpResponse
 from django.contrib.auth.decorators import login_required
 from django.contrib.admin.views.decorators import staff_member_required
 from django.utils.encoding import iri_to_uri
@@ -120,6 +120,27 @@
     server_coordinator.connect()
 #    #    #
 
+def sitemap(request):
+    # Define the path to the sitemap.xml file
+    filepath = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'sitemap.xml')
+    try:
+        with open(filepath, 'r') as f:
+            sitemap_content = f.read()
+        return HttpResponse(sitemap_content, content_type='application/xml')
+    except FileNotFoundError:
+        return HttpResponse("Sitemap not found", status=404, content_type='text/plain')
+
+
+def robot(request):
+    # Define the path to the robots.txt file
+    filepath = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'robots.txt')
+    try:
+        with open(filepath, 'r') as f:
+            robots_content = f.read()
+        return HttpResponse(robots_content, content_type='text/plain')
+    except FileNotFoundError:
+        return HttpResponse("robots.txt not found", status=404, content_type='text/plain')         
+
 
 def render_template(request, template_name='base.html', app_props=None, template_context=None, content_type=None, status=None, using=None):
     """
@@ -1029,10 +1050,9 @@ def _get_user_calendar_params(request):
 
 
 def texts_list(request):
-    title = _("Sefaria: a Living Library of Jewish Texts Online")
-    desc  = _("The largest free library of Jewish texts available to read online in Hebrew and English including Torah, Tanakh, Talmud, Mishnah, Midrash, commentaries and more.")
-    props = get_user_history_props(request)
-    return menu_page(request, page="navigation", title=title, desc=desc, props=props)
+    title = _("Pecha - Buddhism in your own words")
+    desc  = _("The largest free library of Buddhist texts available to read online in Tibetan, English and Chinese")
+    return menu_page(request, page="navigation", title=title, desc=desc)
 
 
 def calendars(request):
@@ -1050,16 +1070,13 @@ def saved(request):
     return menu_page(request, props, page="saved", title=title, desc=desc)
 
 
-def get_user_history_props(request):
+def user_history(request):
     if request.user.is_authenticated:
         profile = UserProfile(user_obj=request.user)
         uhistory =  profile.get_history(secondary=False, serialized=True, annotate=True, limit=20) if profile.settings.get("reading_history", True) else []
     else:
         uhistory = _get_anonymous_user_history(request)
-    return {"userHistory": {"loaded": True, "items": uhistory}}
-
-def user_history(request):
-    props = get_user_history_props(request)
+    props = {"userHistory": {"loaded": True, "items": uhistory}}
     title = _("My User History")
     desc = _("See your user history on Sefaria")
     return menu_page(request, props, page="history", title=title, desc=desc)
@@ -1649,10 +1666,10 @@ def index_api(request, title, raw=False):
     API for manipulating text index records (aka "Text Info")
     """
     if request.method == "GET":
-        with_content_counts = bool(int(request.GET.get("with_content_counts", False)))
+        with_content_counts = bool(request.GET.get("with_content_counts", False))
         i = library.get_index(title).contents(raw=raw, with_content_counts=with_content_counts)
 
-        if bool(int(request.GET.get("with_related_topics", False))):
+        if request.GET.get("with_related_topics", False):
             i["relatedTopics"] = get_topics_for_book(title, annotate=True)
 
         return jsonResponse(i, callback=request.GET.get("callback", None))
@@ -1684,7 +1701,7 @@ def index_api(request, title, raw=False):
                 library.get_index(title)  # getting the index just to tell if it exists
                 # Only allow staff and the person who submitted a text to edit
                 if not request.user.is_staff and not user_started_text(request.user.id, title):
-                   return jsonResponse({"error": "{} is protected from change.<br/><br/>See a mistake?<br/>Email [email protected].".format(title)})
+                    return jsonResponse({"error": "{} is protected from change.<br/><br/>See a mistake?<br/>Email [email protected].".format(title)})
             except BookNameError:
                 pass  # if this is a new text, allow any logged in user to submit
         @csrf_protect
@@ -1866,7 +1883,7 @@ def _collapse_book_leaf_shapes(leaf_shapes):
         else:
             cat_list = title.split("/")
             depth = request.GET.get("depth", 2)
-            include_dependents = bool(int(request.GET.get("dependents", False)))
+            include_dependents = request.GET.get("dependents", False)
             indexes = []
             if len(cat_list) == 1:
                 # try as corpus
@@ -2071,7 +2088,7 @@ def notes_api(request, note_id_or_ref):
             raise Http404
         oref = Ref(note_id_or_ref)
         cb = request.GET.get("callback", None)
-        private = bool(int(request.GET.get("private", False)))
+        private = request.GET.get("private", False)
         res = get_notes(oref, uid=creds["user_id"], public=(not private))
         return jsonResponse(res, cb)
 
@@ -2145,7 +2162,7 @@ def protected_note_post(req):
 @catch_error_as_json
 def all_notes_api(request):
 
-    private = bool(int(request.GET.get("private", False)))
+    private = request.GET.get("private", False)
     if private:
         if not request.user.is_authenticated:
             res = {"error": "You must be logged in to access you notes."}
@@ -2161,17 +2178,17 @@ def related_api(request, tref):
     """
     Single API to bundle available content related to `tref`.
     """
-    if bool(int(request.GET.get("private", False))) and request.user.is_authenticated:
+    if request.GET.get("private", False) and request.user.is_authenticated:
         oref = Ref(tref)
         response = {
             "sheets": get_sheets_for_ref(tref, uid=request.user.id),
             "notes": get_notes(oref, uid=request.user.id, public=False)
         }
-    elif bool(int(request.GET.get("private", False))) and not request.user.is_authenticated:
+    elif request.GET.get("private", False) and not request.user.is_authenticated:
         response = {"error": "You must be logged in to access private content."}
     else:
         response = {
-            "links": get_links(tref, with_text=False, with_sheet_links=bool(int(request.GET.get("with_sheet_links", False)))),
+            "links": get_links(tref, with_text=False, with_sheet_links=request.GET.get("with_sheet_links", False)),
             "sheets": get_sheets_for_ref(tref),
             "notes": [],  # get_notes(oref, public=True) # Hiding public notes for now
             "webpages": get_webpages_for_ref(tref),
@@ -2664,7 +2681,7 @@ def name_api(request, name):
     name = name[1:] if topic_override else name
     # Number of results to return.  0 indicates no limit
     LIMIT = int(request.GET.get("limit", 10))
-    ref_only = bool(int(request.GET.get("ref_only", False)))
+    ref_only = request.GET.get("ref_only", False)
     completions_dict = get_name_completions(name, LIMIT, ref_only, topic_override)
     ref = completions_dict["ref"]
     topic = completions_dict["topic"]
@@ -2768,7 +2785,7 @@ def user_stats_api(request, uid):
     assert request.method == "GET", "Unsupported Method"
     u = request.user
     assert (u.is_active and u.is_staff) or (int(uid) == u.id)
-    quick = bool(int(request.GET.get("quick", False)))
+    quick = bool(request.GET.get("quick", False))
     if quick:
         return jsonResponse(public_user_data(uid))
     return jsonResponse(user_stats_data(uid))
@@ -3093,29 +3110,13 @@ def topics_list_api(request):
     return response
 
 
-@staff_member_required
-def generate_topic_prompts_api(request, slug: str):
-    if request.method == "POST":
-        from sefaria.helper.llm.tasks import generate_and_save_topic_prompts
-        from sefaria.helper.llm.topic_prompt import get_ref_context_hints_by_lang
-        topic = Topic.init(slug)
-        post_body = json.loads(request.body)
-        ref_topic_links = post_body.get('ref_topic_links')
-        for lang, ref__context_hints in get_ref_context_hints_by_lang(ref_topic_links).items():
-            orefs, context_hints = zip(*ref__context_hints)
-            generate_and_save_topic_prompts(lang, topic, orefs, context_hints)
-        return jsonResponse({"acknowledged": True}, status=202)
-    return jsonResponse({"error": "This API only accepts POST requests."})
-
-
 @staff_member_required
 def add_new_topic_api(request):
     if request.method == "POST":
         data = json.loads(request.POST["json"])
         isTopLevelDisplay = data["category"] == Topic.ROOT
         t = Topic({'slug': "", "isTopLevelDisplay": isTopLevelDisplay, "data_source": "sefaria", "numSources": 0})
         update_topic_titles(t, **data)
-        t.set_slug_to_primary_title()
         if not isTopLevelDisplay:  # not Top Level so create an IntraTopicLink to category
             new_link = IntraTopicLink({"toTopic": data["category"], "fromTopic": t.slug, "linkType": "displays-under", "dataSource": "sefaria"})
             new_link.save()
@@ -3221,37 +3222,13 @@ def reorder_topics(request):
         results.append(topic.contents())
     return jsonResponse({"topics": results})
 
-@staff_member_required()
-def topic_ref_bulk_api(request):
-    """
-    API to bulk edit RefTopicLinks
-    """
-    topic_links = json.loads(request.body)
-    all_links_touched = []
-    for link in topic_links:
-        tref = link.get('ref')
-        tref = Ref(tref).normal()
-        slug = link.get("toTopic")
-        linkType = _CAT_REF_LINK_TYPE_FILTER_MAP['authors'][0] if AuthorTopic.init(slug) else 'about'
-        descriptions = link.get("descriptions", link.get("description"))
-        languages = descriptions.keys()
-        for language in languages:
-            ref_topic_dict = edit_topic_source(slug, orig_tref=tref, new_tref=tref,
-                                               linkType=linkType, description=descriptions[language], interface_lang=language)
-        all_links_touched.append(ref_topic_dict)
-    return jsonResponse(all_links_touched)
-
-
-
 @catch_error_as_json
 def topic_ref_api(request, tref):
     """
     API to get RefTopicLinks, as well as creating, editing, and deleting of RefTopicLinks
     """
-    try:
-        data = request.GET if request.method in ["DELETE", "GET"] else json.loads(request.POST.get('json'))
-    except Exception as e:
-        data = json.loads(request.body)
+
+    data = request.GET if request.method in ["DELETE", "GET"] else json.loads(request.POST.get('json'))
     slug = data.get('topic')
     interface_lang = 'en' if data.get('interface_lang') == 'english' else 'he'
     tref = Ref(tref).normal()  # normalize input
@@ -3353,7 +3330,7 @@ def global_activity(request, page=1):
     if page > 40:
         return render_template(request,'static/generic.html', None, {
             "title": "Activity Unavailable",
-            "content": "You have requested a page deep in Sefaria's history.<br><br>For performance reasons, this page is unavailable. If you need access to this information, please <a href='mailto:hello@sefaria.org'>email us</a>."
+            "content": "You have requested a page deep in Sefaria's history.<br><br>For performance reasons, this page is unavailable. If you need access to this information, please <a href='mailto:dev@sefaria.org'>email us</a>."
         })
 
     if "api" in request.GET:
@@ -3396,7 +3373,7 @@ def user_activity(request, slug, page=1):
     if page > 40:
         return render_template(request,'static/generic.html', None, {
             "title": "Activity Unavailable",
-            "content": "You have requested a page deep in Sefaria's history.<br><br>For performance reasons, this page is unavailable. If you need access to this information, please <a href='mailto:hello@sefaria.org'>email us</a>."
+            "content": "You have requested a page deep in Sefaria's history.<br><br>For performance reasons, this page is unavailable. If you need access to this information, please <a href='mailto:dev@sefaria.org'>email us</a>."
         })
 
     q              = {"user": profile.id}
@@ -4297,7 +4274,7 @@ def search_wrapper_api(request, es6_compat=False):
         search_obj = get_query_obj(search_obj=search_obj, **j)
         response = search_obj.execute()
         if response.success():
-            response_json = response.to_dict().body
+            response_json = getattr(response.to_dict(), 'body', response.to_dict())
             if es6_compat and isinstance(response_json['hits']['total'], dict):
                 response_json['hits']['total'] = response_json['hits']['total']['value']
             return jsonResponse(response_json, callback=request.GET.get("callback", None))
@@ -4330,18 +4307,13 @@ def serve_static_by_lang(request, page):
     return render_template(request,'static/{}/{}.html'.format(request.LANGUAGE_CODE, page), None, {})
 
 
-# TODO: This really should be handled by a CMS :)
 def annual_report(request, report_year):
     pdfs = {
         '2020': STATIC_URL + 'files/Sefaria 2020 Annual Report.pdf',
         '2021': 'https://indd.adobe.com/embed/98a016a2-c4d1-4f06-97fa-ed8876de88cf?startpage=1&allowFullscreen=true',
         '2022': STATIC_URL + 'files/Sefaria_AnnualImpactReport_R14.pdf',
-        '2023': 'https://issuu.com/sefariaimpact/docs/sefaria_2023_impact_report?fr=sMmRkNTcyMzMyNTk',
     }
-    # Assume the most recent year as default when one is not provided
-    if not report_year:
-        report_year = max(pdfs.keys()) # Earlier versions of Python do not preserve insertion order in dictionaries :(
-    elif report_year not in pdfs:
+    if report_year not in pdfs:
         raise Http404
     # Renders a simple template, does not extend base.html
     return render(request, template_name='static/annualreport.html', context={'reportYear': report_year, 'pdfURL': pdfs[report_year]})
@@ -4648,7 +4620,7 @@ def isNodeJsReachable():
         except Exception as e:
             logger.warn(f"Failed node healthcheck. Error: {e}")
             return False
-
+        
     def is_database_reachable():
         try:
             from sefaria.system.database import db
@@ -4676,4 +4648,4 @@ def is_database_reachable():
         statusCode = 503
         logger.warn("Failed rollout healthcheck. Healthcheck Response: {}".format(resp))
 
-    return http.JsonResponse(resp, status=statusCode)
+    return http.JsonResponse(resp, status=statusCode)
diff --git a/robots.txt b/robots.txt
@@ -0,0 +1,18 @@
+User-agent: *
+Disallow: /admin/
+Disallow: /accounts/
+Disallow: /api/
+Disallow: /settings/
+Disallow: /login/
+Disallow: /logout/
+Disallow: /register/
+Disallow: /reset/
+Disallow: /password_change/
+Disallow: /password_reset/
+Disallow: /search/
+
+Allow: /static/
+Allow: /media/
+
+Sitemap: https://pecha.org/sitemap.xml
+
diff --git a/sefaria/urls.py b/sefaria/urls.py
@@ -25,6 +25,8 @@
 
 # App Pages
 urlpatterns = [
+    url(r'^robots.txt$', reader_views.robot, name='robots_txt'),
+    url(r'^sitemap\.xml$', reader_views.sitemap, name='sitemap'),
     url(r'^$', reader_views.home, name="home"),
     url(r'^texts/?$', reader_views.texts_list, name="table_of_contents"),
     url(r'^texts/saved/?$', reader_views.saved),
@@ -262,9 +264,7 @@
 # Topics API
 urlpatterns += [
     url(r'^api/topics$', reader_views.topics_list_api),
-    url(r'^api/topics/generate-prompts/(?P<slug>.+)$', reader_views.generate_topic_prompts_api),
     url(r'^api/topics-graph/(?P<topic>.+)$', reader_views.topic_graph_api),
-    url(r'^api/ref-topic-links/bulk$', reader_views.topic_ref_bulk_api),
     url(r'^api/ref-topic-links/(?P<tref>.+)$', reader_views.topic_ref_api),
     url(r'^api/v2/topics/(?P<topic>.+)$', reader_views.topics_api, {'v2': True}),
     url(r'^api/topics/(?P<topic>.+)$', reader_views.topics_api),
@@ -484,4 +484,4 @@
     # Everything else gets maintenance message
     urlpatterns += [
         url(r'.*', sefaria_views.maintenance_message)
-    ]
+    ]