-
Notifications
You must be signed in to change notification settings - Fork 4
/
data.json
733 lines (733 loc) · 40 KB
/
data.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
{
"Main": [
{
"Date": "2024.08.02",
"Notes": "A finding",
"Title": "The Phantom Menace: Unmasking Privacy Leakages in Vision-Language Models",
"Link": "https://arxiv.org/abs/2408.01228",
"Author": "Simone Caldarella, Massimiliano Mancini, Elisa Ricci, Rahaf Aljundi",
"Affiliation": "University of Trento | Toyota Motor Europe | Fondazione Bruno Kessler",
"Code": ""
},
{
"Date": "2024.07.21",
"Notes": "Attack",
"Title": "Arondight: Red Teaming Large Vision Language Models with Auto-generated Multi-modal Jailbreak Prompts",
"Link": "https://arxiv.org/abs/2407.15050",
"Author": "Yi Liu, Chengjun Cai, Xiaoli Zhang, Xingliang Yuan, Cong Wang",
"Affiliation": "City University of Hong Kong | City University of Hong Kong (Dongguan) | University of Science and Technology Beijing | The University of Melbourne",
"Code": ""
},
{
"Date": "2024.07.21",
"Notes": "Attack",
"Title": "When Do Universal Image Jailbreaks Transfer Between Vision-Language Models?",
"Link": "https://arxiv.org/abs/2407.15211",
"Author": "Rylan Schaeffer, Dan Valentine, Luke Bailey, James Chua, Cristóbal Eyzaguirre, Zane Durante, Joe Benton, Brando Miranda, Henry Sleight, John Hughes, Rajashree Agrawal, Mrinank Sharma, Scott Emmons, Sanmi Koyejo, Ethan Perez",
"Affiliation": "Stanford CS | Harvard SEAS | Anthropic | Constellation | MIT EECS | UC Berkeley EECS",
"Code": ""
},
{
"Date": "2024.07.20",
"Notes": "Attack",
"Title": "Sim-CLIP: Unsupervised Siamese Adversarial Fine-Tuning for Robust and Semantically-Rich Vision-Language Models",
"Link": "https://arxiv.org/abs/2407.14971",
"Author": "Md Zarif Hossain, Ahmed Imteaj",
"Affiliation": "Southern Illinois University | Security, Privacy and Intelligence for Edge Devices Laboratory (SPEED Lab)",
"Code": ""
},
{
"Date": "2024.07.12",
"Notes": "Attack",
"Title": "Refusing Safe Prompts for Multi-modal Large Language Models",
"Link": "https://arxiv.org/abs/2407.09050",
"Author": "Zedian Shao, Hongbin Liu, Yuepeng Hu, Neil Zhenqiang Gong",
"Affiliation": "Duke University",
"Code": "https://github.com/Sadcardation/MLLM-Refusal"
},
{
"Date": "2024.07.10",
"Notes": "Survey",
"Title": "A Survey of Attacks on Large Vision-Language Models: Resources, Advances, and Future Trends",
"Link": "https://arxiv.org/abs/2407.07403",
"Author": "Daizong Liu, Mingyu Yang, Xiaoye Qu, Pan Zhou, Wei Hu, Yu Cheng",
"Affiliation": "Peking University | Huazhong University of Science and Technology | The Chinese University of Hong Kong",
"Code": "https://github.com/liudaizong/Awesome-LVLM-Attack"
},
{
"Date": "2024.07.06",
"Notes": "Benchmark",
"Title": "Granular Privacy Control for Geolocation with Vision Language Models",
"Link": "https://arxiv.org/abs/2407.04952",
"Author": "Ethan Mendes, Yang Chen, James Hays, Sauvik Das, Wei Xu, Alan Ritter",
"Affiliation": "Georgia Tech | CMU",
"Code": "https://github.com/ethanm88/GPTGeoChat"
},
{
"Date": "2024.07.03",
"Notes": "Benchmark",
"Title": "VIVA: A Benchmark for Vision-Grounded Decision-Making with Human Values",
"Link": "https://arxiv.org/abs/2407.03000",
"Author": "Zhe Hu, Yixiao Ren, Jing Li, Yu Yin",
"Affiliation": "The Hong Kong Polytechnic University | Case Western Reserve University",
"Code": ""
},
{
"Date": "2024.07.01",
"Notes": "Attack",
"Title": "Image-to-Text Logic Jailbreak: Your Imagination can Help You Do Anything",
"Link": "https://arxiv.org/abs/2407.02534",
"Author": "Xiaotian Zou, Yongkang Chen",
"Affiliation": " University of Exeter | Nanjing University of Aeronautics and Astronautics",
"Code": ""
},
{
"Date": "2024.06.26",
"Notes": "Survey",
"Title": "JailbreakZoo: Survey, Landscapes, and Horizons in Jailbreaking Large Language and Vision-Language Models",
"Link": "https://arxiv.org/abs/2407.01599",
"Author": "Haibo Jin, Leyang Hu, Xinuo Li, Peiyan Zhang, Chonghan Chen, Jun Zhuang, Haohan Wang",
"Affiliation": "University of Illinois Urbana-Champaign | Brown University | University of Michigan Ann Arbor | Hong Kong University of Science and Technology | Carnegie Mellon University | Boise State University",
"Code": "https://github.com/Allen-piexl/JailbreakZoo"
},
{
"Date": "2024.06.22",
"Notes": "Benchmark",
"Title": "MOSSBench: Is Your Multimodal Language Model Oversensitive to Safe Queries?",
"Link": "https://arxiv.org/abs/2406.17806",
"Author": "Xirui Li, Hengguang Zhou, Ruochen Wang, Tianyi Zhou, Minhao Cheng, Cho-Jui Hsieh",
"Affiliation": "University of California | University of Maryland | Pennsylvania State University",
"Code": "https://github.com/xirui-li/MOSSBench"
},
{
"Date": "2024.06.21",
"Notes": "Survey",
"Title": "From LLMs to MLLMs: Exploring the Landscape of Multimodal Jailbreaking",
"Link": "https://arxiv.org/abs/2406.14859",
"Author": "Siyuan Wang, Zhuohan Long, Zhihao Fan, Zhongyu Wei",
"Affiliation": "University of Southern California | Fudan University | Alibaba Inc.",
"Code": ""
},
{
"Date": "2024.06.21",
"Notes": "Benchmark",
"Title": "Cross-Modality Safety Alignment",
"Link": "https://arxiv.org/abs/2406.15279",
"Author": "Siyin Wang, Xingsong Ye, Qinyuan Cheng, Junwen Duan, Shimin Li, Jinlan Fu, Xipeng Qiu, Xuanjing Huang",
"Affiliation": "Fudan University | National University of Singapore | Shanghai AI Laboratory",
"Code": "https://github.com/sinwang20/SIUO"
},
{
"Date": "2024.06.18",
"Notes": "Attack",
"Title": "Adversarial Attacks on Multimodal Agents",
"Link": "https://arxiv.org/abs/2406.12814",
"Author": "Chen Henry Wu, Jing Yu Koh, Ruslan Salakhutdinov, Daniel Fried, Aditi Raghunathan",
"Affiliation": "Carnegie Mellon University",
"Code": "https://github.com/ChenWu98/agent-attack"
},
{
"Date": "2024.06.17",
"Notes": "Defense",
"Title": "SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Model",
"Link": "https://arxiv.org/abs/2406.12030",
"Author": "Yongting Zhang, Lu Chen, Guodong Zheng, Yifeng Gao, Rui Zheng, Jinlan Fu, Zhenfei Yin, Senjie Jin, Yu Qiao, Xuanjing Huang, Feng Zhao, Tao Gui, Jing Shao",
"Affiliation": "University of Science and Technology of China | Fudan University | Shanghai Artificial Intelligence Laboratory",
"Code": "https://github.com/EchoseChen/SPA-VL-RLHF"
},
{
"Date": "2024.06.11",
"Notes": "Benchmark",
"Title": "MLLMGuard: A Multi-dimensional Safety Evaluation Suite for Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2406.07594",
"Author": "Tianle Gu, Zeyang Zhou, Kexin Huang, Dandan Liang, Yixu Wang, Haiquan Zhao, Yuanqi Yao, Xingge Qiao, Keqing Wang, Yujiu Yang, Yan Teng, Yu Qiao, Yingchun Wang",
"Affiliation": "Tsinghua University | Shanghai Artificial Intelligence Laboratory",
"Code": "https://github.com/Carol-gutianle/MLLMGuard"
},
{
"Date": "2024.06.11",
"Notes": "Benchmark",
"Title": "Benchmarking Trustworthiness of Multimodal Large Language Models: A Comprehensive Study",
"Link": "https://arxiv.org/abs/2406.07057",
"Author": "Yichi Zhang, Yao Huang, Yitong Sun, Chang Liu, Zhe Zhao, Zhengwei Fang, Yifan Wang, Huanran Chen, Xiao Yang, Xingxing Wei, Hang Su, Yinpeng Dong, Jun Zhu",
"Affiliation": "Tsinghua University | Beihang University | Shanghai Jiao Tong University | RealAI",
"Code": "https://github.com/thu-ml/MMTrustEval"
},
{
"Date": "2024.06.10",
"Notes": "Attack",
"Title": "Unveiling the Safety of GPT-4o: An Empirical Study using Jailbreak Attacks",
"Link": "https://arxiv.org/abs/2406.06302",
"Author": "Zonghao Ying, Aishan Liu, Xianglong Liu, Dacheng Tao",
"Affiliation": "Beihang University | Nanyang Technological University",
"Code": "https://github.com/NY1024/Jailbreak_GPT4o"
},
{
"Date": "2024.06.10",
"Notes": "Benchmark",
"Title": "CARES: A Comprehensive Benchmark of Trustworthiness in Medical Vision Language Models",
"Link": "https://arxiv.org/abs/2406.06007",
"Author": "Peng Xia, Ze Chen, Juanxi Tian, Yangrui Gong, Ruibo Hou, Yue Xu, Zhenbang Wu, Zhiyuan Fan, Yiyang Zhou, Kangyu Zhu, Wenhao Zheng, Zhaoyang Wang, Xiao Wang, Xuchao Zhang, Chetan Bansal, Marc Niethammer, Junzhou Huang, Hongtu Zhu, Yun Li, Jimeng Sun, Zongyuan Ge, Gang Li, James Zou, Huaxiu Yao",
"Affiliation": "UNC-Chapel Hill | UIUC | Brown University | University of Washington | Microsoft Research | UT Arlington | Monash University | Stanford University",
"Code": "https://github.com/richard-peng-xia/CARES"
},
{
"Date": "2024.06.07",
"Notes": "Defense",
"Title": "LLavaGuard: VLM-based Safeguards for Vision Dataset Curation and Safety Assessment",
"Link": "https://arxiv.org/abs/2406.05113",
"Author": "Lukas Helff, Felix Friedrich, Manuel Brack, Kristian Kersting, Patrick Schramowski",
"Affiliation": "TU Darmstadt | DFKI | hessian.AI | Ontocord | Centre for Cognitive Science, Darmstadt",
"Code": "https://github.com/ml-research/LlavaGuard"
},
{
"Date": "2024.06.06",
"Notes": "Attack",
"Title": "Jailbreak Vision Language Models via Bi-Modal Adversarial Prompt",
"Link": "https://arxiv.org/abs/2406.04031",
"Author": "Zonghao Ying, Aishan Liu, Tianyuan Zhang, Zhengmin Yu, Siyuan Liang, Xianglong Liu, Dacheng Tao",
"Affiliation": "Beihang University | Fudan University | National University of Singapore | Nanyang Technological University",
"Code": "https://github.com/NY1024/BAP-Jailbreak-Vision-Language-Models-via-Bi-Modal-Adversarial-Prompt"
},
{
"Date": "2024.05.31",
"Notes": "Benchmark",
"Title": "ToxVidLLM: A Multimodal LLM-based Framework for Toxicity Detection in Code-Mixed Videos",
"Link": "https://arxiv.org/abs/2405.20628",
"Author": "Krishanu Maity, A.S. Poornash, Sriparna Saha, Pushpak Bhattacharyya",
"Affiliation": "Indian Institute of Technology Patna | Indian Institute of Technology Bombay",
"Code": "https://github.com/justaguyalways/ToxVidLM_ACL_2024"
},
{
"Date": "2024.05.30",
"Notes": "Attack(Robot)",
"Title": "Exploring the Robustness of Decision-Level Through Adversarial Attacks on LLM-Based Embodied Models",
"Link": "https://arxiv.org/abs/2405.19802",
"Author": "Shuyuan Liu, Jiawei Chen, Shouwei Ruan, Hang Su, Zhaoxia Yin",
"Affiliation": "East China Normal University | Beihang University | Tsinghua University",
"Code": ""
},
{
"Date": "2024.05.30",
"Notes": "Attack",
"Title": "Typography Leads Semantic Diversifying: Amplifying Adversarial Transferability across Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2405.20090",
"Author": "Hao Cheng, Erjia Xiao, Jiahang Cao, Le Yang, Kaidi Xu, Jindong Gu, Renjing Xu",
"Affiliation": "HKUST (GZ) | Xi'an Jiaotong University | Drexel University | Oxford University",
"Code": ""
},
{
"Date": "2024.05.30",
"Notes": "Benchmark",
"Title": "AutoBreach: Universal and Adaptive Jailbreaking with Efficient Wordplay-Guided Optimization",
"Link": "https://arxiv.org/abs/2405.19668",
"Author": "Jiawei Chen, Xiao Yang, Zhengwei Fang, Yu Tian, Yinpeng Dong, Zhaoxia Yin, Hang Su",
"Affiliation": "East China Normal University | Tsinghua University | RealAI",
"Code": ""
},
{
"Date": "2024.05.30",
"Notes": "Attack",
"Title": "Efficient LLM-Jailbreaking by Introducing Visual Modality",
"Link": "https://arxiv.org/abs/2405.20015",
"Author": "Zhenxing Niu, Yuyao Sun, Haodong Ren, Haoxuan Ji, Quan Wang, Xiaoke Ma, Gang Hua, Rong Jin",
"Affiliation": "Xidian University | Xi'an Jiaotong University | Wormpex AI Research | Meta",
"Code": "https://github.com/abc321123444/LLM-jb"
},
{
"Date": "2024.05.28",
"Notes": "Attack",
"Title": "White-box Multimodal Jailbreaks Against Large Vision-Language Models",
"Link": "https://arxiv.org/abs/2405.17894",
"Author": "Ruofan Wang, Xingjun Ma, Hanxu Zhou, Chuanjun Ji, Guangnan Ye, Yu-Gang Jiang",
"Affiliation": "Fudan University | Shanghai Jiao Tong University | DataGrand Tech",
"Code": ""
},
{
"Date": "2024.05.27",
"Notes": "Defense",
"Title": "Cross-Modal Safety Alignment: Is textual unlearning all you need?",
"Link": "https://arxiv.org/abs/2406.02575",
"Author": "Trishna Chakraborty, Erfan Shayegani, Zikui Cai, Nael Abu-Ghazaleh, M. Salman Asif, Yue Dong, Amit K. Roy-Chowdhury, Chengyu Song",
"Affiliation": "University of California",
"Code": ""
},
{
"Date": "2024.05.27",
"Notes": "Defense, Benchmark",
"Title": "Privacy-Aware Visual Language Models",
"Link": "https://arxiv.org/abs/2405.17423",
"Author": "Laurens Samson, Nimrod Barazani, Sennay Ghebreab, Yuki M. Asano",
"Affiliation": "University of Amsterdam",
"Code": ""
},
{
"Date": "2024.05.26",
"Notes": "Attack, Benchmark",
"Title": "Cross-Modality Jailbreak and Mismatched Attacks on Medical Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2405.20775",
"Author": "Xijie Huang, Xinyuan Wang, Hantao Zhang, Jiawen Xi, Jingkun An, Hao Wang, Chengwei Pan",
"Affiliation": "Beihang University | University of Science and Technology of China",
"Code": "https://github.com/dirtycomputer/O2M_attack"
},
{
"Date": "2024.05.25",
"Notes": "Attack",
"Title": "Visual-RolePlay: Universal Jailbreak Attack on MultiModal Large Language Models via Role-playing Image Characte",
"Link": "https://arxiv.org/abs/2405.20773",
"Author": "Siyuan Ma, Weidi Luo, Yu Wang, Xiaogeng Liu, Muhao Chen, Bo Li, Chaowei Xiao",
"Affiliation": "Peking University | The Ohio State University | University of Wisconsin-Madison | University of California | The University of Chicago",
"Code": ""
},
{
"Date": "2024.05.22",
"Notes": "A finding",
"Title": "More Distinctively Black and Feminine Faces Lead to Increased Stereotyping in Vision-Language Models",
"Link": "https://arxiv.org/abs/2407.06194",
"Author": "Messi H.J. Lee, Jacob M. Montgomery, Calvin K. Lai",
"Affiliation": "Washington University in St. Louis",
"Code": ""
},
{
"Date": "2024.05.22",
"Notes": "Defense",
"Title": "Safety Alignment for Vision Language Models",
"Link": "https://arxiv.org/abs/2405.13581",
"Author": "Zhendong Liu, Yuanbi Nie, Yingshui Tan, Xiangyu Yue, Qiushi Cui, Chongjun Wang, Xiaoyong Zhu, Bo Zheng",
"Affiliation": "Nanjing University | Chongqing University | Alibaba Group | Chinese University of Hong Kong",
"Code": ""
},
{
"Date": "2024.05.07",
"Notes": "A finding",
"Title": "Learning To See But Forgetting To Follow: Visual Instruction Tuning Makes LLMs More Prone To Jailbreak Attacks",
"Link": "https://arxiv.org/abs/2405.04403",
"Author": "Georgios Pantazopoulos, Amit Parekh, Malvina Nikandrou, Alessandro Suglia",
"Affiliation": "Heriot-Watt University",
"Code": "https://github.com/gpantaz/vl_jailbreak"
},
{
"Date": "2024.04.16",
"Notes": "Benchmark",
"Title": "Private Attribute Inference from Images with Vision-Language Models",
"Link": "https://arxiv.org/abs/2404.10618",
"Author": "Batuhan Tömekçe, Mark Vero, Robin Staab, Martin Vechev",
"Affiliation": "ETH Zurich",
"Code": ""
},
{
"Date": "2024.04.08",
"Notes": "Survey",
"Title": "Unbridled Icarus: A Survey of the Potential Perils of Image Inputs in Multimodal Large Language Model Security",
"Link": "https://arxiv.org/abs/2404.05264",
"Author": "Yihe Fan, Yuxin Cao, Ziyu Zhao, Ziyao Liu, Shaofeng Li",
"Affiliation": "TongJi University | Tsinghua University | Beijing University of Technology | Nanyang Technological University | Peng Cheng Laboratory",
"Code": ""
},
{
"Date": "2024.04.04",
"Notes": "Benchmark",
"Title": "Red Teaming GPT-4V: Are GPT-4V Safe Against Uni/Multi-Modal Jailbreak Attacks?",
"Link": "https://arxiv.org/abs/2404.03411",
"Author": "Shuo Chen, Zhen Han, Bailan He, Zifeng Ding, Wenqian Yu, Philip Torr, Volker Tresp, Jindong Gu",
"Affiliation": "LMU Munich | University of Oxford | Siemens AG | MCML | Wuhan University",
"Code": ""
},
{
"Date": "2024.04.03",
"Notes": "Benchmark",
"Title": "JailBreakV-28K: A Benchmark for Assessing the Robustness of MultiModal Large Language Models against Jailbreak Attacks",
"Link": "https://arxiv.org/abs/2404.03027",
"Author": "Weidi Luo, Siyuan Ma, Xiaogeng Liu, Xiaoyu Guo, Chaowei Xiao",
"Affiliation": "The Ohio State University | University of Wisconsin-Madison",
"Code": "https://github.com/EddyLuo1232/JailBreakV_28K"
},
{
"Date": "2024.03.30",
"Notes": "Attack",
"Title": "Composite Backdoor Attacks Against Large Language Models",
"Link": "https://arxiv.org/abs/2310.07676",
"Author": "Hai Huang, Zhengyu Zhao, Michael Backes, Yun Shen, Yang Zhang",
"Affiliation": "CISPA Helmholtz Center for Information Security | Xi'an Jiaotong University | NetApp",
"Code": "https://github.com/MiracleHH/CBA"
},
{
"Date": "2024.03.29",
"Notes": "Analysis",
"Title": "Uncovering Bias in Large Vision-Language Models with Counterfactuals",
"Link": "https://arxiv.org/abs/2404.00166",
"Author": "Phillip Howard, Anahita Bhiwandiwalla, Kathleen C. Fraser, Svetlana Kiritchenko",
"Affiliation": "Intel Labs | National Research Council Canada",
"Code": ""
},
{
"Date": "2024.03.26",
"Notes": "Benchmark",
"Title": "Assessment of Multimodal Large Language Models in Alignment with Human Values",
"Link": "https://arxiv.org/abs/2403.17830",
"Author": "Zhelun Shi, Zhipin Wang, Hongxing Fan, Zaibin Zhang, Lijun Li, Yongting Zhang, Zhenfei Yin, Lu Sheng, Yu Qiao, Jing Shao",
"Affiliation": "Shanghai Artificial Intelligence Laboratory | School of Software, Beihang University | Dalian University of Technology | University of Science and Technology of China | The University of Sydney",
"Code": "https://github.com/OpenGVLab/LAMM"
},
{
"Date": "2024.03.26",
"Notes": "Analysis",
"Title": "Evaluating the Efficacy of Prompt-Engineered Large Multimodal Models Versus Fine-Tuned Vision Transformers in Image-Based Security Applications",
"Link": "https://arxiv.org/abs/2403.17787",
"Author": "Fouad Trad, Ali Chehab",
"Affiliation": "American University of Beirut",
"Code": ""
},
{
"Date": "2024.03.14",
"Notes": "Defense",
"Title": "AdaShield: Safeguarding Multimodal Large Language Models from Structure-based Attack via Adaptive Shield Prompting",
"Link": "https://arxiv.org/abs/2403.09513",
"Author": "Yu Wang, Xiaogeng Liu, Yu Li, Muhao Chen, Chaowei Xiao",
"Affiliation": "Peking University | University of Wisconsin-Madison | International Digital Economy Academy | University of California, Davis",
"Code": "https://github.com/rain305f/AdaShield"
},
{
"Date": "2024.03.14",
"Notes": "Defense",
"Title": "Eyes Closed, Safety On: Protecting Multimodal LLMs via Image-to-Text Transformation",
"Link": "https://arxiv.org/abs/2403.09572",
"Author": "Yunhao Gou, Kai Chen, Zhili Liu, Lanqing Hong, Hang Xu, Zhenguo Li, Dit-Yan Yeung, James T. Kwok, Yu Zhang",
"Affiliation": "Southern University of Science and Technology | Hong Kong University of Science and Technology | Huawei Noah's Ark Lab | Peng Cheng Laboratory",
"Code": "https://github.com/gyhdog99/ECSO"
},
{
"Date": "2024.03.14",
"Notes": "Defense",
"Title": "The First to Know: How Token Distributions Reveal Hidden Knowledge in Large Vision-Language Models?",
"Link": "https://arxiv.org/abs/2403.09037",
"Author": "Qinyu Zhao, Ming Xu, Kartik Gupta, Akshay Asthana, Liang Zheng, Stephen Gould",
"Affiliation": "The Australian National University | Seeing Machines Ltd",
"Code": "https://github.com/Qinyu-Allen-Zhao/LVLM-LP"
},
{
"Date": "2024.03.14",
"Notes": "Benchmark",
"Title": "AVIBench: Towards Evaluating the Robustness of Large Vision-Language Model on Adversarial Visual-Instructions",
"Link": "https://arxiv.org/abs/2403.09346",
"Author": "Hao Zhang, Wenqi Shao, Hong Liu, Yongqiang Ma, Ping Luo, Yu Qiao, Kaipeng Zhang",
"Affiliation": "Xi'an Jiaotong University | Shanghai Artificial Intelligence Laboratory | Osaka University",
"Code": ""
},
{
"Date": "2024.03.14",
"Notes": "Attack",
"Title": "Images are Achilles' Heel of Alignment: Exploiting Visual Vulnerabilities for Jailbreaking Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2403.09792",
"Author": "Yifan Li, Hangyu Guo, Kun Zhou, Wayne Xin Zhao, Ji-Rong Wen",
"Affiliation": "Renmin University | Beijing Key Laboratory of Big Data Management and Analysis Methods",
"Code": ""
},
{
"Date": "2024.03.05",
"Notes": "Attack",
"Title": "ImgTrojan: Jailbreaking Vision-Language Models with ONE Image",
"Link": "https://arxiv.org/abs/2403.02910",
"Author": "Xijia Tao, Shuai Zhong, Lei Li, Qi Liu, Lingpeng Kong",
"Affiliation": "The University of Hong Kong",
"Code": "https://github.com/xijia-tao/ImgTrojan"
},
{
"Date": "2024.02.20",
"Notes": "Attack(Agent)",
"Title": "The Wolf Within: Covert Injection of Malice into MLLM Societies via an MLLM Operative",
"Link": "https://arxiv.org/abs/2402.14859",
"Author": "Zhen Tan, Chengshuai Zhao, Raha Moraffah, Yifan Li, Yu Kong, Tianlong Chen, Huan Liu",
"Affiliation": "Arizona State University | Michigan State University | MIT",
"Code": "https://github.com/ChengshuaiZhao0/The-Wolf-Within"
},
{
"Date": "2024.02.15",
"Notes": "Attack",
"Title": "Exploiting Alpha Transparency In Language And Vision-Based AI Systems",
"Link": "https://arxiv.org/abs/2402.09671",
"Author": "David Noever, Forrest McKee",
"Affiliation": "PeopleTec",
"Code": ""
},
{
"Date": "2024.02.13",
"Notes": "Attack",
"Title": "Test-Time Backdoor Attacks on Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2402.08577",
"Author": "Dong Lu, Tianyu Pang, Chao Du, Qian Liu, Xianjun Yang, Min Lin",
"Affiliation": "Southern University of Science and Technology | Sea AI Lab | University of California",
"Code": "https://github.com/sail-sg/AnyDoor"
},
{
"Date": "2024.02.13",
"Notes": "Attack(Agent)",
"Title": "Agent Smith: A Single Image Can Jailbreak One Million Multimodal LLM Agents Exponentially Fast",
"Link": "https://arxiv.org/abs/2402.08567",
"Author": "Xiangming Gu, Xiaosen Zheng, Tianyu Pang, Chao Du, Qian Liu, Ye Wang, Jing Jiang, Min Lin",
"Affiliation": "Sea AI Lab | National University of Singapore | Singapore Management University",
"Code": "https://github.com/sail-sg/Agent-Smith"
},
{
"Date": "2024.02.12",
"Notes": "Insights",
"Title": "Prismatic VLMs: Investigating the Design Space of Visually-Conditioned Language Models",
"Link": "https://arxiv.org/abs/2402.07865",
"Author": "Siddharth Karamcheti, Suraj Nair, Ashwin Balakrishna, Percy Liang, Thomas Kollar, Dorsa Sadigh",
"Affiliation": "Stanford | Toyota Research Institute",
"Code": "https://github.com/TRI-ML/prismatic-vlms"
},
{
"Date": "2024.02.06",
"Notes": "Benchmark",
"Title": "HarmBench: A Standardized Evaluation Framework for Automated Red Teaming and Robust Refusal",
"Link": "https://arxiv.org/abs/2402.04249",
"Author": "Mantas Mazeika, Long Phan, Xuwang Yin, Andy Zou, Zifan Wang, Norman Mu, Elham Sakhaee, Nathaniel Li, Steven Basart, Bo Li, David Forsyth, Dan Hendrycks",
"Affiliation": "University of Illinois Urbana-Champaign | Center for AI Safety | Carnegie Mellon University | UC Berkeley | Microsoft",
"Code": "https://github.com/centerforaisafety/HarmBench"
},
{
"Date": "2024.02.05",
"Notes": "Attack",
"Title": "Shadowcast: Stealthy Data Poisoning Attacks Against Vision-Language Models",
"Link": "https://arxiv.org/abs/2402.06659",
"Author": "Yuancheng Xu, Jiarui Yao, Manli Shu, Yanchao Sun, Zichu Wu, Ning Yu, Tom Goldstein, Furong Huang",
"Affiliation": "University of Maryland, College Park | JPMorgan AI Research | University of Waterloo | Salesforce Research",
"Code": "https://github.com/umd-huang-lab/VLM-Poisoning"
},
{
"Date": "2024.02.05",
"Notes": "Attack",
"Title": "GUARD: Role-playing to Generate Natural-language Jailbreakings to Test Guideline Adherence of Large Language Models",
"Link": "https://arxiv.org/abs/2402.03299",
"Author": "Haibo Jin, Ruoxi Chen, Andy Zhou, Jinyin Chen, Yang Zhang, Haohan Wang",
"Affiliation": "University of Illinois at Urbana-Champaign | Zhejiang University of Technology | Lapis Labs",
"Code": ""
},
{
"Date": "2024.02.04",
"Notes": "Attack",
"Title": "Jailbreaking Attack against Multimodal Large Language Model",
"Link": "https://arxiv.org/abs/2402.02309",
"Author": "Zhenxing Niu, Haodong Ren, Xinbo Gao, Gang Hua, Rong Jin",
"Affiliation": "Xidian University | Wormpex AI Research | Meta",
"Code": ""
},
{
"Date": "2024.02.03",
"Notes": "Defense, Benchmark",
"Title": "Safety Fine-Tuning at (Almost) No Cost: A Baseline for Vision Large Language Models",
"Link": "https://arxiv.org/abs/2402.02207",
"Author": "Yongshuo Zong, Ondrej Bohdal, Tingyang Yu, Yongxin Yang, Timothy Hospedales",
"Affiliation": "University of Edinburgh | EPFL",
"Code": "https://github.com/ys-zong/VLGuard"
},
{
"Date": "2024.02.01",
"Notes": "Survey",
"Title": "Safety of Multimodal Large Language Models on Images and Text",
"Link": "https://arxiv.org/abs/2402.00357",
"Author": "Xin Liu, Yichen Zhu, Yunshi Lan, Chao Yang, Yu Qiao",
"Affiliation": "East China Normal University | Midea Group | Shanghai AI Laboratory",
"Code": "https://github.com/isXinLiu/Awesome-MLLM-Safety"
},
{
"Date": "2024.01.23",
"Notes": "Benchmark",
"Title": "Red Teaming Visual Language Models",
"Link": "https://arxiv.org/abs/2401.12915",
"Author": "Mukai Li, Lei Li, Yuwei Yin, Masood Ahmed, Zhenguang Liu, Qi Liu",
"Affiliation": "The University of Hong Kong | Zhejiang University",
"Code": ""
},
{
"Date": "2024.01.20",
"Notes": "Defense, Benchmark",
"Title": "InferAligner: Inference-Time Alignment for Harmlessness through Cross-Model Guidance",
"Link": "https://arxiv.org/abs/2401.11206",
"Author": "Pengyu Wang, Dong Zhang, Linyang Li, Chenkun Tan, Xinghao Wang, Ke Ren, Botian Jiang, Xipeng Qiu",
"Affiliation": "Fudan University",
"Code": "https://github.com/Jihuai-wpy/InferAligner"
},
{
"Date": "2024.01.16",
"Notes": "Attack",
"Title": "An Image Is Worth 1000 Lies: Transferability of Adversarial Images across Prompts on Vision-Language Models",
"Link": "https://arxiv.org/abs/2403.09766",
"Author": "Haochen Luo, Jindong Gu, Fengyuan Liu, Philip Torr",
"Affiliation": "University of Oxford",
"Code": "https://github.com/Haochen-Luo/CroPA"
},
{
"Date": "2024.01.05",
"Notes": "Defense",
"Title": "MLLM-Protector: Ensuring MLLM's Safety without Hurting Performance",
"Link": "https://arxiv.org/abs/2401.02906",
"Author": "Renjie Pi, Tianyang Han, Yueqi Xie, Rui Pan, Qing Lian, Hanze Dong, Jipeng Zhang, Tong Zhang",
"Affiliation": "The Hong Kong University of Science and Technology | University of Illinois at Urbana-Champaign | The Hong Kong Polytechnic University",
"Code": "https://github.com/pipilurj/MLLM-protector"
},
{
"Date": "2024.01.03",
"Notes": "Benchmark",
"Title": "GOAT-Bench: Safety Insights to Large Multimodal Models through Meme-Based Social Abuse",
"Link": "https://arxiv.org/abs/2401.01523",
"Author": "Hongzhan Lin, Ziyang Luo, Bo Wang, Ruichao Yang, Jing Ma",
"Affiliation": "Hong Kong Baptist University",
"Code": ""
},
{
"Date": "2023.12.13",
"Notes": "Benchmark",
"Title": "ToViLaG: Your Visual-Language Generative Model is Also An Evildoer",
"Link": "https://arxiv.org/abs/2312.11523v1",
"Author": "Xinpeng Wang, Xiaoyuan Yi, Han Jiang, Shanlin Zhou, Zhihua Wei, Xing Xie",
"Affiliation": "Tongji University | Microsoft Research Asia",
"Code": ""
},
{
"Date": "2023.11.29",
"Notes": "Attack, Benchmark",
"Title": "MM-SafetyBench: A Benchmark for Safety Evaluation of Multimodal Large Language Models",
"Link": "https://arxiv.org/abs/2311.17600",
"Author": "Xin Liu, Yichen Zhu, Jindong Gu, Yunshi Lan, Chao Yang, Yu Qiao",
"Affiliation": "East China Normal University | Midea Group | Shanghai AI Laboratory",
"Code": "https://github.com/isXinLiu/MM-SafetyBench"
},
{
"Date": "2023.11.27",
"Notes": "Attack",
"Title": "How Many Unicorns Are in This Image? A Safety Evaluation Benchmark for Vision LLMs",
"Link": "https://arxiv.org/abs/2311.16101",
"Author": "Haoqin Tu, Chenhang Cui, Zijun Wang, Yiyang Zhou, Bingchen Zhao, Junlin Han, Wangchunshu Zhou, Huaxiu Yao, Cihang Xie",
"Affiliation": "UC Santa Cruz | UNC-Chapel Hill | University of Edinburgh | University of Oxford | AIWaves Inc",
"Code": "https://github.com/UCSC-VLAA/vllm-safety-benchmark"
},
{
"Date": "2023.11.24",
"Notes": "Benchmark",
"Title": "Large Language Models as Automated Aligners for benchmarking Vision-Language Models",
"Link": "https://arxiv.org/abs/2311.14580",
"Author": "Yuanfeng Ji, Chongjian Ge, Weikai Kong, Enze Xie, Zhengying Liu, Zhengguo Li, Ping Luo",
"Affiliation": "The University of Hong Kong | Huawei Noah's Ark Lab",
"Code": ""
},
{
"Date": "2023.11.16",
"Notes": "Defense, Benchmark",
"Title": "DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback",
"Link": "https://arxiv.org/abs/2311.10081",
"Author": "Yangyi Chen, Karan Sikka, Michael Cogswell, Heng Ji, Ajay Divakaran",
"Affiliation": "SRI International | University of Illinois Urbana-Champaign",
"Code": ""
},
{
"Date": "2023.11.15",
"Notes": "Attack, Defense",
"Title": "Jailbreaking GPT-4V via Self-Adversarial Attacks with System Prompts",
"Link": "https://arxiv.org/abs/2311.09127",
"Author": "Yuanwei Wu, Xiang Li, Yixin Liu, Pan Zhou, Lichao Sun",
"Affiliation": "Huazhong University of Science and Technology | Lehigh University",
"Code": ""
},
{
"Date": "2023.11.09",
"Notes": "Attack, Benchmark",
"Title": "FigStep: Jailbreaking Large Vision-language Models via Typographic Visual Prompts",
"Link": "https://arxiv.org/abs/2311.05608",
"Author": "Yichen Gong, Delong Ran, Jinyuan Liu, Conglei Wang, Tianshuo Cong, Anyu Wang, Sisi Duan, Xiaoyun Wang",
"Affiliation": "Tsinghua University | Shandong University | Carnegie Mellon University",
"Code": "https://github.com/ThuCCSLab/FigStep"
},
{
"Date": "2023.10.05",
"Notes": "A finding",
"Title": "Fine-tuning Aligned Language Models Compromises Safety, Even When Users Do Not Intend To!",
"Link": "https://arxiv.org/abs/2310.03693",
"Author": "Xiangyu Qi, Yi Zeng, Tinghao Xie, Pin-Yu Chen, Ruoxi Jia, Prateek Mittal, Peter Henderson",
"Affiliation": "Princeton University | Virginia Tech | IBM Research | Stanford University",
"Code": "https://github.com/LLM-Tuning-Safety/LLMs-Finetuning-Safety"
},
{
"Date": "2023.10.04",
"Notes": "Attack",
"Title": "Misusing Tools in Large Language Models With Visual Adversarial Examples",
"Link": "https://arxiv.org/abs/2310.03185",
"Author": "Xiaohan Fu, Zihan Wang, Shuheng Li, Rajesh K. Gupta, Niloofar Mireshghallah, Taylor Berg-Kirkpatrick, Earlence Fernandes",
"Affiliation": "University of California San Diego | University of Washington",
"Code": ""
},
{
"Date": "2023.10.03",
"Notes": "Attack, Defense, Benchmark",
"Title": "Can Language Models be Instructed to Protect Personal Information?",
"Link": "https://arxiv.org/abs/2310.02224",
"Author": "Yang Chen, Ethan Mendes, Sauvik Das, Wei Xu, Alan Ritter",
"Affiliation": "Georgia Institute of Technology | Carnegie Mellon University",
"Code": "https://github.com/ethanm88/llm-access-control"
},
{
"Date": "2023.09.21",
"Notes": "Attack",
"Title": "How Robust is Google's Bard to Adversarial Image Attacks?",
"Link": "https://arxiv.org/abs/2309.11751",
"Author": "Yinpeng Dong, Huanran Chen, Jiawei Chen, Zhengwei Fang, Xiao Yang, Yichi Zhang, Yu Tian, Hang Su, Jun Zhu",
"Affiliation": "Tsinghua University | RealAI",
"Code": "https://github.com/thu-ml/Attack-Bard"
},
{
"Date": "2023.09.01",
"Notes": "Attack",
"Title": "Image Hijacks: Adversarial Images can Control Generative Models at Runtime",
"Link": "https://arxiv.org/abs/2309.00236",
"Author": "Luke Bailey, Euan Ong, Stuart Russell, Scott Emmons",
"Affiliation": "UC Berkeley | Harvard University | University of Cambridge",
"Code": "https://github.com/euanong/image-hijacks"
},
{
"Date": "2023.08.21",
"Notes": "Attack",
"Title": "On the Adversarial Robustness of Multi-Modal Foundation Models",
"Link": "https://arxiv.org/abs/2308.10741",
"Author": "Christian Schlarmann, Matthias Hein",
"Affiliation": "University of Tubingen",
"Code": ""
},
{
"Date": "2023.07.26",
"Notes": "Attack",
"Title": "Jailbreak in pieces: Compositional Adversarial Attacks on Multi-Modal Language Models",
"Link": "https://arxiv.org/abs/2307.14539",
"Author": "Erfan Shayegani, Yue Dong, Nael Abu-Ghazaleh",
"Affiliation": "University of California",
"Code": ""
},
{
"Date": "2023.07.19",
"Notes": "Attack",
"Title": "Abusing Images and Sounds for Indirect Instruction Injection in Multi-Modal LLMs",
"Link": "https://arxiv.org/abs/2307.10490",
"Author": "Eugene Bagdasaryan, Tsung-Yin Hsieh, Ben Nassi, Vitaly Shmatikov",
"Affiliation": "Cornell Tech",
"Code": ""
},
{
"Date": "2023.06.26",
"Notes": "Attack",
"Title": "Are aligned neural networks adversarially aligned?",
"Link": "https://arxiv.org/abs/2306.15447",
"Author": "Nicholas Carlini, Milad Nasr, Christopher A. Choquette-Choo, Matthew Jagielski, Irena Gao, Anas Awadalla, Pang Wei Koh, Daphne Ippolito, Katherine Lee, Florian Tramer, Ludwig Schmidt",
"Affiliation": "Google DeepMind | Stanford | University of Washington | ETH Zurich",
"Code": ""
},
{
"Date": "2023.06.22",
"Notes": "Attack",
"Title": "Visual Adversarial Examples Jailbreak Aligned Large Language Models",
"Link": "https://arxiv.org/abs/2306.13213",
"Author": "Xiangyu Qi, Kaixuan Huang, Ashwinee Panda, Peter Henderson, Mengdi Wang, Prateek Mittal",
"Affiliation": "Princeton University | Stanford University",
"Code": "https://github.com/Unispac/Visual-Adversarial-Examples-Jailbreak-Large-Language-Models"
}
]
}