From 2c59052851332cde18521318b50f0d1c3aae9c71 Mon Sep 17 00:00:00 2001
From: Dylan-Harden3 <dylanharden33@gmail.com>
Date: Mon, 9 Dec 2024 18:17:58 -0600
Subject: [PATCH 1/3] add typehints everywhere

---
 pydetectgpt/detect.py  | 35 +++++++++++++++------------
 pydetectgpt/methods.py | 46 ++++++++++++++++++++---------------
 pydetectgpt/utils.py   |  8 +++---
 tests/test_detector.py | 55 ++++++++++++++++++++++++------------------
 tests/test_utils.py    | 26 +++++++++++---------
 5 files changed, 98 insertions(+), 72 deletions(-)

diff --git a/pydetectgpt/detect.py b/pydetectgpt/detect.py
index d5fe6de..b88ff38 100644
--- a/pydetectgpt/detect.py
+++ b/pydetectgpt/detect.py
@@ -1,38 +1,39 @@
 """Implementations of detection algorithms."""
 
-from typing import Literal
+from typing import Literal, Dict, Callable
 from .utils import load_model
 from .methods import log_likelihood, log_rank, likelihood_logrank_ratio, fast_detect_gpt
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
-DETECTION_FUNCS = {
+DETECTION_FUNCS: Dict[str, Callable[[torch.Tensor, torch.Tensor], float]] = {
     "loglikelihood": log_likelihood,
     "logrank": log_rank,
     "detectllm": likelihood_logrank_ratio,
     "fastdetectgpt": fast_detect_gpt,
 }
-THRESHOLDS = {
+THRESHOLDS: Dict[str, float] = {
     "loglikelihood": -1.8,
     "logrank": -0.8,
     "detectllm": 2.14,
     "fastdetectgpt": 1.9,
 }
 
+DetectionMethod = Literal["loglikelihood", "logrank", "detectllm", "fastdetectgpt"]
+
 
 def detect_ai_text(
     text: str,
-    method: Literal[
-        "loglikelihood", "logrank", "detectllm", "fastdetectgpt"
-    ] = "fastdetectgpt",
-    threshold: float = None,
+    method: DetectionMethod = "fastdetectgpt",
+    threshold: float | None = None,
     detection_model: str = "Qwen/Qwen2.5-1.5B",
 ) -> int:
     """Detect if `text` is written by human or ai.
 
     Args:
         text (str): The text to check.
-        method (str, optional), default='fastdetectgpt': Detection method to use, must be one of ['loglikelihood', 'logrank', 'detectllm', 'fastdetectgpt'].
-        threshold (float, optional), default=None: Decision threshold for `method` to use. If not provided, a default value will be used based on `method`.
+        method (DetectionMethod, optional), default='fastdetectgpt': Detection method to use, must be one of ['loglikelihood', 'logrank', 'detectllm', 'fastdetectgpt']
+        threshold (float | None, optional), default=None: Decision threshold for `method` to use. If not provided, a default value will be used based on `method`.
         detection_model (str, optional), default=Qwen/Qwen2.5-1.5B: Huggingface Repo name for the model that `method` will use to generate logits.
 
     Returns:
@@ -44,10 +45,12 @@ def detect_ai_text(
     if not text:
         return 0
 
-    device = "cuda" if torch.cuda.is_available() else "cpu"
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    model: AutoModelForCausalLM
+    tokenizer: AutoTokenizer
     model, tokenizer = load_model(detection_model)
 
-    tokens = tokenizer(
+    tokens: torch.Tensor = tokenizer(
         text,
         return_tensors="pt",
         padding=True,
@@ -59,13 +62,15 @@ def detect_ai_text(
             f"In detect_ai_text `method` must be one of ['loglikelihood', 'logrank', 'detectllm', 'fastdetectgpt'], but got {method}"
         )
 
-    method_func = DETECTION_FUNCS[method]
+    method_func: Callable[[torch.Tensor, torch.Tensor], float] = DETECTION_FUNCS[method]
     if threshold is None:
         threshold = THRESHOLDS[method]
 
-    labels = tokens.input_ids[:, 1:]  # remove bos token
+    labels: torch.Tensor = tokens.input_ids[:, 1:]  # remove bos token
     with torch.no_grad():
-        logits = model(**tokens).logits[:, :-1]  # remove next token logits
-    pred = method_func(labels, logits)
+        logits: torch.Tensor = model(**tokens).logits[
+            :, :-1
+        ]  # remove next token logits
+    pred: float = method_func(labels, logits)
 
     return 0 if pred < threshold else 1
diff --git a/pydetectgpt/methods.py b/pydetectgpt/methods.py
index e48230d..515bf45 100644
--- a/pydetectgpt/methods.py
+++ b/pydetectgpt/methods.py
@@ -20,13 +20,13 @@ def log_likelihood(labels: torch.Tensor, logits: torch.Tensor) -> float:
     """
     validate_tensor_shapes(labels, logits)
 
-    logits = logits.view(-1, logits.shape[-1])
-    labels = labels.view(-1)
+    logits: torch.Tensor = logits.view(-1, logits.shape[-1])
+    labels: torch.Tensor = labels.view(-1)
 
-    log_probs = F.log_softmax(logits, dim=-1)
-    actual_token_probs = log_probs.gather(dim=-1, index=labels.unsqueeze(-1)).squeeze(
-        -1
-    )
+    log_probs: torch.Tensor = F.log_softmax(logits, dim=-1)
+    actual_token_probs: torch.Tensor = log_probs.gather(
+        dim=-1, index=labels.unsqueeze(-1)
+    ).squeeze(-1)
     return actual_token_probs.mean().item()
 
 
@@ -45,10 +45,12 @@ def log_rank(labels: torch.Tensor, logits: torch.Tensor) -> float:
     """
     validate_tensor_shapes(labels, logits)
 
-    matches = (logits.argsort(-1, descending=True) == labels.unsqueeze(-1)).nonzero()
-    ranks = matches[:, -1]
+    matches: torch.Tensor = (
+        logits.argsort(-1, descending=True) == labels.unsqueeze(-1)
+    ).nonzero()
+    ranks: torch.Tensor = matches[:, -1]
 
-    log_ranks = torch.log(ranks.float() + 1)
+    log_ranks: torch.Tensor = torch.log(ranks.float() + 1)
 
     return -log_ranks.mean().item()
 
@@ -68,8 +70,8 @@ def likelihood_logrank_ratio(labels: torch.Tensor, logits: torch.Tensor) -> floa
     """
     validate_tensor_shapes(labels, logits)
 
-    _log_likelihood = log_likelihood(labels, logits)
-    _log_rank = log_rank(labels, logits)
+    _log_likelihood: float = log_likelihood(labels, logits)
+    _log_rank: float = log_rank(labels, logits)
 
     return _log_likelihood / _log_rank
 
@@ -90,18 +92,24 @@ def fast_detect_gpt(labels: torch.Tensor, logits: torch.Tensor) -> float:
     validate_tensor_shapes(labels, logits)
 
     # conditional sampling
-    log_probs = F.log_softmax(logits, dim=-1)
-    distribution = torch.distributions.categorical.Categorical(logits=log_probs)
-    x_tilde = distribution.sample([10000]).permute([1, 2, 0])
+    log_probs: torch.Tensor = F.log_softmax(logits, dim=-1)
+    distribution: torch.distributions.categorical.Categorical = (
+        torch.distributions.categorical.Categorical(logits=log_probs)
+    )
+    x_tilde: torch.Tensor = distribution.sample([10000]).permute([1, 2, 0])
 
-    log_likelihood_x = log_probs.gather(dim=-1, index=labels.unsqueeze(-1)).mean(dim=1)
-    log_likelihood_x_tilde = log_probs.gather(dim=-1, index=x_tilde).mean(dim=1)
+    log_likelihood_x: torch.Tensor = log_probs.gather(
+        dim=-1, index=labels.unsqueeze(-1)
+    ).mean(dim=1)
+    log_likelihood_x_tilde: torch.Tensor = log_probs.gather(dim=-1, index=x_tilde).mean(
+        dim=1
+    )
 
     # estimate the mean/variance
-    mu_tilde = log_likelihood_x_tilde.mean(dim=-1)
-    sigma_tilde = log_likelihood_x_tilde.std(dim=-1)
+    mu_tilde: torch.Tensor = log_likelihood_x_tilde.mean(dim=-1)
+    sigma_tilde: torch.Tensor = log_likelihood_x_tilde.std(dim=-1)
 
     # estimate conditional probability curvature
-    dhat = (log_likelihood_x - mu_tilde) / sigma_tilde
+    dhat: torch.Tensor = (log_likelihood_x - mu_tilde) / sigma_tilde
 
     return dhat.item()
diff --git a/pydetectgpt/utils.py b/pydetectgpt/utils.py
index 078354a..4eb1677 100644
--- a/pydetectgpt/utils.py
+++ b/pydetectgpt/utils.py
@@ -17,9 +17,11 @@ def load_model(hf_repo: str) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
     Raises:
         ValueError: If there is an issue loading the model or tokenizer from HuggingFace.
     """
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    tokenizer = AutoTokenizer.from_pretrained(hf_repo)
-    model = AutoModelForCausalLM.from_pretrained(hf_repo).to(device)
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(hf_repo)
+    model: AutoModelForCausalLM = AutoModelForCausalLM.from_pretrained(hf_repo).to(
+        device
+    )
 
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
diff --git a/tests/test_detector.py b/tests/test_detector.py
index 8d3a1fc..159257e 100644
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -1,13 +1,18 @@
 from pydetectgpt import detect_ai_text
 import pytest
+from pydetectgpt.detect import DetectionMethod
 
 # I asked chatgpt "Where is Texas A&M?"
-AI_TEXT = "Texas A&M University is located in College Station, Texas, in the southeastern part of the state. It's about 90 miles northwest of Houston and around 150 miles south of Dallas. The university's full name is Texas Agricultural and Mechanical University, and it is one of the largest public universities in the United States."
+AI_TEXT: str = (
+    "Texas A&M University is located in College Station, Texas, in the southeastern part of the state. It's about 90 miles northwest of Houston and around 150 miles south of Dallas. The university's full name is Texas Agricultural and Mechanical University, and it is one of the largest public universities in the United States."
+)
 # random paragraph from one of my assignments (written by human)
-HUMAN_TEXT = "The main problem the authors are trying to address is that Large Language Models require large computational resources to use. This means that as a common setup we see companies deploying GPU clusters which act as a cloud server to generate responses when a user presents a query. Aside from the vast resources needed to set up a GPU cluster this approach has 2 main downsides: sending queries over the internet via an API exposes users’ private data and results in additional latency when generating responses"
+HUMAN_TEXT: str = (
+    "The main problem the authors are trying to address is that Large Language Models require large computational resources to use. This means that as a common setup we see companies deploying GPU clusters which act as a cloud server to generate responses when a user presents a query. Aside from the vast resources needed to set up a GPU cluster this approach has 2 main downsides: sending queries over the internet via an API exposes users’ private data and results in additional latency when generating responses"
+)
 
 
-def test_detect_ai_text():
+def test_detect_ai_text() -> None:
     # invalid method name
     with pytest.raises(ValueError, match="must be one of"):
         detect_ai_text(AI_TEXT, method="notvalidmethodname")
@@ -22,45 +27,49 @@ def test_detect_ai_text():
     assert detect_ai_text(HUMAN_TEXT, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_loglikelihood():
+def test_detect_ai_text_loglikelihood() -> None:
+    method: DetectionMethod = "loglikelihood"
 
-    assert detect_ai_text(AI_TEXT, method="loglikelihood") == 1
+    assert detect_ai_text(AI_TEXT, method=method) == 1
 
-    assert detect_ai_text(HUMAN_TEXT, method="loglikelihood") == 0
+    assert detect_ai_text(HUMAN_TEXT, method=method) == 0
 
-    assert detect_ai_text(AI_TEXT, method="loglikelihood", threshold=99999.9) == 0
+    assert detect_ai_text(AI_TEXT, method=method, threshold=99999.9) == 0
 
-    assert detect_ai_text(HUMAN_TEXT, method="loglikelihood", threshold=-99999.9) == 1
+    assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_logrank():
+def test_detect_ai_text_logrank() -> None:
+    method: DetectionMethod = "logrank"
 
-    assert detect_ai_text(AI_TEXT, method="logrank") == 1
+    assert detect_ai_text(AI_TEXT, method=method) == 1
 
-    assert detect_ai_text(HUMAN_TEXT, method="logrank") == 0
+    assert detect_ai_text(HUMAN_TEXT, method=method) == 0
 
-    assert detect_ai_text(AI_TEXT, method="logrank", threshold=99999.9) == 0
+    assert detect_ai_text(AI_TEXT, method=method, threshold=99999.9) == 0
 
-    assert detect_ai_text(HUMAN_TEXT, method="logrank", threshold=-99999.9) == 1
+    assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_detectllm():
+def test_detect_ai_text_detectllm() -> None:
+    method: DetectionMethod = "detectllm"
 
-    assert detect_ai_text(AI_TEXT, method="detectllm") == 1
+    assert detect_ai_text(AI_TEXT, method=method) == 1
 
-    assert detect_ai_text(HUMAN_TEXT, method="detectllm") == 0
+    assert detect_ai_text(HUMAN_TEXT, method=method) == 0
 
-    assert detect_ai_text(AI_TEXT, method="detectllm", threshold=99999.9) == 0
+    assert detect_ai_text(AI_TEXT, method=method, threshold=99999.9) == 0
 
-    assert detect_ai_text(HUMAN_TEXT, method="detectllm", threshold=-99999.9) == 1
+    assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_fastdetectgpt():
+def test_detect_ai_text_fastdetectgpt() -> None:
+    method: DetectionMethod = "fastdetectgpt"
 
-    assert detect_ai_text(AI_TEXT, method="fastdetectgpt") == 1
+    assert detect_ai_text(AI_TEXT, method=method) == 1
 
-    assert detect_ai_text(HUMAN_TEXT, method="fastdetectgpt") == 0
+    assert detect_ai_text(HUMAN_TEXT, method=method) == 0
 
-    assert detect_ai_text(AI_TEXT, method="fastdetectgpt", threshold=99999.9) == 0
+    assert detect_ai_text(AI_TEXT, method=method, threshold=99999.9) == 0
 
-    assert detect_ai_text(HUMAN_TEXT, method="fastdetectgpt", threshold=-99999.9) == 1
+    assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 619851a..92a3176 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,7 @@
 import torch
 import pytest
+from torch import Tensor
+
 from pydetectgpt import (
     log_likelihood,
     log_rank,
@@ -8,10 +10,10 @@
 )
 
 
-def test_log_likelihood():
+def test_log_likelihood() -> None:
     # shape mismatch
-    logits = torch.randn(1, 5, 10)
-    labels = torch.randint(0, 9, (1, 6))
+    logits: Tensor = torch.randn(1, 5, 10)
+    labels: Tensor = torch.randint(0, 9, (1, 6))
 
     with pytest.raises(
         ValueError, match="Labels and logits must have compatible shapes"
@@ -44,10 +46,10 @@ def test_log_likelihood():
     assert abs(log_likelihood(labels, logits) - -1.05657327) < 1e-5
 
 
-def test_log_rank():
+def test_log_rank() -> None:
     # shape mismatch
-    logits = torch.randn(1, 5, 10)
-    labels = torch.randint(0, 9, (1, 6))
+    logits: Tensor = torch.randn(1, 5, 10)
+    labels: Tensor = torch.randint(0, 9, (1, 6))
 
     with pytest.raises(
         ValueError, match="Labels and logits must have compatible shapes"
@@ -80,10 +82,10 @@ def test_log_rank():
     assert abs(log_rank(labels, logits) - -0.3662) < 1e-5
 
 
-def test_likelihood_logrank_ratio():
+def test_likelihood_logrank_ratio() -> None:
     # shape mismatch
-    logits = torch.randn(1, 5, 10)
-    labels = torch.randint(0, 9, (1, 6))
+    logits: Tensor = torch.randn(1, 5, 10)
+    labels: Tensor = torch.randint(0, 9, (1, 6))
 
     with pytest.raises(
         ValueError, match="Labels and logits must have compatible shapes"
@@ -116,10 +118,10 @@ def test_likelihood_logrank_ratio():
     assert abs(likelihood_logrank_ratio(labels, logits) - 2.8852) < 1e-5
 
 
-def test_fast_detect_gpt():
+def test_fast_detect_gpt() -> None:
     # shape mismatch
-    logits = torch.randn(1, 5, 10)
-    labels = torch.randint(0, 9, (1, 6))
+    logits: Tensor = torch.randn(1, 5, 10)
+    labels: Tensor = torch.randint(0, 9, (1, 6))
 
     with pytest.raises(
         ValueError, match="Labels and logits must have compatible shapes"

From 432483a3f2541177853cb2e69b717485aef73340 Mon Sep 17 00:00:00 2001
From: Dylan-Harden3 <dylanharden33@gmail.com>
Date: Mon, 9 Dec 2024 18:56:25 -0600
Subject: [PATCH 2/3] fix return type for tests

---
 tests/test_detector.py | 10 +++++-----
 tests/test_utils.py    |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_detector.py b/tests/test_detector.py
index 159257e..5ac25e8 100644
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -12,7 +12,7 @@
 )
 
 
-def test_detect_ai_text() -> None:
+def test_detect_ai_text():
     # invalid method name
     with pytest.raises(ValueError, match="must be one of"):
         detect_ai_text(AI_TEXT, method="notvalidmethodname")
@@ -27,7 +27,7 @@ def test_detect_ai_text() -> None:
     assert detect_ai_text(HUMAN_TEXT, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_loglikelihood() -> None:
+def test_detect_ai_text_loglikelihood():
     method: DetectionMethod = "loglikelihood"
 
     assert detect_ai_text(AI_TEXT, method=method) == 1
@@ -39,7 +39,7 @@ def test_detect_ai_text_loglikelihood() -> None:
     assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_logrank() -> None:
+def test_detect_ai_text_logrank():
     method: DetectionMethod = "logrank"
 
     assert detect_ai_text(AI_TEXT, method=method) == 1
@@ -51,7 +51,7 @@ def test_detect_ai_text_logrank() -> None:
     assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_detectllm() -> None:
+def test_detect_ai_text_detectllm():
     method: DetectionMethod = "detectllm"
 
     assert detect_ai_text(AI_TEXT, method=method) == 1
@@ -63,7 +63,7 @@ def test_detect_ai_text_detectllm() -> None:
     assert detect_ai_text(HUMAN_TEXT, method=method, threshold=-99999.9) == 1
 
 
-def test_detect_ai_text_fastdetectgpt() -> None:
+def test_detect_ai_text_fastdetectgpt():
     method: DetectionMethod = "fastdetectgpt"
 
     assert detect_ai_text(AI_TEXT, method=method) == 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 92a3176..0832d79 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -10,7 +10,7 @@
 )
 
 
-def test_log_likelihood() -> None:
+def test_log_likelihood():
     # shape mismatch
     logits: Tensor = torch.randn(1, 5, 10)
     labels: Tensor = torch.randint(0, 9, (1, 6))
@@ -46,7 +46,7 @@ def test_log_likelihood() -> None:
     assert abs(log_likelihood(labels, logits) - -1.05657327) < 1e-5
 
 
-def test_log_rank() -> None:
+def test_log_rank():
     # shape mismatch
     logits: Tensor = torch.randn(1, 5, 10)
     labels: Tensor = torch.randint(0, 9, (1, 6))
@@ -82,7 +82,7 @@ def test_log_rank() -> None:
     assert abs(log_rank(labels, logits) - -0.3662) < 1e-5
 
 
-def test_likelihood_logrank_ratio() -> None:
+def test_likelihood_logrank_ratio():
     # shape mismatch
     logits: Tensor = torch.randn(1, 5, 10)
     labels: Tensor = torch.randint(0, 9, (1, 6))
@@ -118,7 +118,7 @@ def test_likelihood_logrank_ratio() -> None:
     assert abs(likelihood_logrank_ratio(labels, logits) - 2.8852) < 1e-5
 
 
-def test_fast_detect_gpt() -> None:
+def test_fast_detect_gpt():
     # shape mismatch
     logits: Tensor = torch.randn(1, 5, 10)
     labels: Tensor = torch.randint(0, 9, (1, 6))

From fef0912b79b0bbaee7dc532642b6fd2fdf7b542e Mon Sep 17 00:00:00 2001
From: Dylan-Harden3 <dylanharden33@gmail.com>
Date: Mon, 9 Dec 2024 19:22:18 -0600
Subject: [PATCH 3/3] fix type error with threshold

---
 pydetectgpt/detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pydetectgpt/detect.py b/pydetectgpt/detect.py
index b88ff38..02c00ab 100644
--- a/pydetectgpt/detect.py
+++ b/pydetectgpt/detect.py
@@ -25,7 +25,7 @@
 def detect_ai_text(
     text: str,
     method: DetectionMethod = "fastdetectgpt",
-    threshold: float | None = None,
+    threshold: float = None,
     detection_model: str = "Qwen/Qwen2.5-1.5B",
 ) -> int:
     """Detect if `text` is written by human or ai.