Merge pull request #12 from Dylan-Harden3/cli

create CLI for using PyDetectGPT
Dylan-Harden3 · Dec 10, 2024 · 507e59c · 507e59c
2 parents fd3d75f + 79c432c
commit 507e59c
Show file tree

Hide file tree

Showing 4 changed files with 194 additions and 2 deletions.
diff --git a/pydetectgpt/cli.py b/pydetectgpt/cli.py
@@ -0,0 +1,66 @@
+"""CLI for using PyDetectGPT."""
+
+import argparse
+from .detect import detect_ai_text, DETECTION_FUNCS, THRESHOLDS
+
+
+def main() -> None:
+    """Run detection via CLI with argparse."""
+    parser = argparse.ArgumentParser(
+        description="Detect if text is AI-generated",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Example:
+  pydetectgpt "Your text here"
+""",
+    )
+
+    parser.add_argument("text", help="text to analyze")
+
+    parser.add_argument(
+        "-m",
+        "--method",
+        type=str,
+        choices=list(DETECTION_FUNCS.keys()),
+        default="fastdetectgpt",
+        help="detection method to use (default: %(default)s)",
+    )
+
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="Qwen/Qwen2.5-1.5B",
+        help="HuggingFace model to use (default: %(default)s)",
+    )
+
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        type=float,
+        help=f"decision threshold (defaults: {THRESHOLDS})",
+    )
+
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="output only the result (0 for human, 1 for AI)",
+    )
+
+    args = parser.parse_args()
+
+    result = detect_ai_text(
+        text=args.text,
+        method=args.method,
+        detection_model=args.model,
+        threshold=args.threshold,
+    )
+
+    if args.quiet:
+        print(result)
+    else:
+        print(f"Detection Result: {'AI Generated' if result else 'Human Written'}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
@@ -10,4 +10,5 @@
     extras_require={
         "dev": ["black", "ruff", "pydocstyle", "pytest", "pytest-cov"],
     },
+    entry_points={"console_scripts": ["pydetectgpt=pydetectgpt.cli:main"]},
 )
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,125 @@
+import pytest
+from unittest.mock import patch
+from pydetectgpt.cli import main
+
+AI_TEXT: str = (
+    "Texas A&M University is located in College Station, Texas, in the southeastern part of the state. It's about 90 miles northwest of Houston and around 150 miles south of Dallas. The university's full name is Texas Agricultural and Mechanical University, and it is one of the largest public universities in the United States."
+)
+HUMAN_TEXT: str = (
+    "The main problem the authors are trying to address is that Large Language Models require large computational resources to use. This means that as a common setup we see companies deploying GPU clusters which act as a cloud server to generate responses when a user presents a query. Aside from the vast resources needed to set up a GPU cluster this approach has 2 main downsides: sending queries over the internet via an API exposes users' private data and results in additional latency when generating responses"
+)
+
+
+def test_cli_loglikelihood(capsys) -> None:
+    method: str = "loglikelihood"
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method, "-t", "99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method, "-t", "-99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+
+def test_cli_logrank(capsys) -> None:
+    method: str = "logrank"
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method, "-t", "99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method, "-t", "-99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+
+def test_cli_detectllm(capsys) -> None:
+    """Test CLI with detectllm method."""
+    method: str = "detectllm"
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method, "-t", "99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method, "-t", "-99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+
+def test_cli_fastdetectgpt(capsys) -> None:
+    method: str = "fastdetectgpt"
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", method, "-t", "99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "Human Written" in captured.out
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-m", method, "-t", "-99999.9"]):
+        main()
+        captured = capsys.readouterr()
+        assert "AI Generated" in captured.out
+
+
+def test_cli_invalid_method() -> None:
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-m", "invalid_method"]):
+        with pytest.raises(SystemExit):
+            main()
+
+
+def test_cli_quiet_mode(capsys) -> None:
+    with patch("sys.argv", ["pydetectgpt", AI_TEXT, "-q"]):
+        main()
+        captured = capsys.readouterr()
+        assert captured.out.strip() == "1"
+
+    with patch("sys.argv", ["pydetectgpt", HUMAN_TEXT, "-q"]):
+        main()
+        captured = capsys.readouterr()
+        assert captured.out.strip() == "0"
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -147,9 +147,9 @@ def test_fast_detect_gpt():
     logits = torch.tensor([[[0.2, 0.3, 0.4]]])
     labels = torch.tensor([[1]])
 
-    assert abs(fast_detect_gpt(labels, logits) - -0.085) < 3e-2
+    assert abs(fast_detect_gpt(labels, logits) - -0.085) < 5e-2
 
     logits = torch.tensor([[[2.3, 1.1, 0.5], [0.8, 2.5, 1.1], [1.5, 2.1, 0.2]]])
     labels = torch.tensor([[0, 1, 2]])
 
-    assert abs(fast_detect_gpt(labels, logits) - -0.55) < 3e-2
+    assert abs(fast_detect_gpt(labels, logits) - -0.55) < 5e-2