Skip to content

Commit

Permalink
Expand BBS from 32-2048 to 1-4096 range
Browse files Browse the repository at this point in the history
  • Loading branch information
Nexesenex committed Jun 23, 2024
1 parent 8b42b87 commit 6d533ca
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1793,8 +1793,8 @@ def hide_tooltip(event):
(lib_failsafe, "Failsafe Mode (Old CPU)")]
openblas_option, clblast_option, cublas_option, hipblas_option, vulkan_option, default_option, clblast_noavx2_option, vulkan_noavx2_option, noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
# slider data
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
blasbatchsize_values = ["-1", "1", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024", "2048", "4096"]
blasbatchsize_text = ["Don't Batch BLAS","1","2","4","8","16","32","64","128","256","512","1024","2048","4096"]
contextsize_text = ["128", "256", "384", "512", "640", "768", "896", "1024", "1152", "1280", "1408", "1536", "1664", "1792", "1920", "2048", "2176", "2304", "2432", "2560", "2688", "2816", "2944", "3072", "3200", "3328", "3456", "3584", "3712", "3840", "3968", "4096", "4224", "4352", "4480", "4608", "4736", "4864", "4992", "5120", "5248", "5376", "5504", "5632", "5760", "5888", "6016", "6144", "6272", "6400", "6528", "6656", "6784", "6912", "7040", "7168", "7296", "7424", "7552", "7680", "7808", "7936", "8064", "8192", "8320", "8448", "8576", "8704", "8832", "8960", "9088", "9216", "9344", "9472", "9600", "9728", "9856", "9984", "10112", "10240", "10368", "10496", "10624", "10752", "10880", "11008", "11136", "11264", "11392", "11520", "11648", "11776", "11904", "12032", "12160", "12288", "12416", "12544", "12672", "12800", "12928", "13056", "13184", "13312", "13440", "13568", "13696", "13824", "13952", "14080", "14208", "14336", "14464", "14592", "14720", "14848", "14976", "15104", "15232", "15360", "15488", "15616", "15744", "15872", "16000", "16128", "16256", "16384", "16512", "16640", "16768", "16896", "17024", "17152", "17280", "17408", "17536", "17664", "17792", "17920", "18048", "18176", "18304", "18432", "18560", "18688", "18816", "18944", "19072", "19200", "19328", "19456", "19584", "19712", "19840", "19968", "20096", "20224", "20352", "20480", "20608", "20736", "20864", "20992", "21120", "21248", "21376", "21504", "21632", "21760", "21888", "22016", "22144", "22272", "22400", "22528", "22656", "22784", "22912", "23040", "23168", "23296", "23424", "23552", "23680", "23808", "23936", "24064", "24192", "24320", "24448", "24576", "24704", "24832", "24960", "25088", "25216", "25344", "25472", "25600", "25728", "25856", "25984", "26112", "26240", "26368", "26496", "26624", "26752", "26880", "27008", "27136", "27264", "27392", "27520", "27648", "27776", "27904", "28032", "28160", "28288", "28416", "28544", "28672", "28800", "28928", "29056", "29184", "29312", "29440", "29568", "29696", "29824", "29952", "30080", "30208", "30336", "30464", "30592", "30720", "30848", "30976", "31104", "31232", "31360", "31488", "31616", "31744", "31872", "32000", "32128", "32256", "32384", "32512", "32640", "32768", "32896", "33024", "33152", "33280", "33408", "33536", "33664", "33792", "33920", "34048", "34176", "34304", "34432", "34560", "34688", "34816", "34944", "35072", "35200", "35328", "35456", "35584", "35712", "35840", "35968", "36096", "36224", "36352", "36480", "36608", "36736", "36864", "36992", "37120", "37248", "37376", "37504", "37632", "37760", "37888", "38016", "38144", "38272", "38400", "38528", "38656", "38784", "38912", "39040", "39168", "39296", "39424", "39552", "39680", "39808", "39936", "40064", "40192", "40320", "40448", "40576", "40704", "40832", "40960", "41088", "41216", "41344", "41472", "41600", "41728", "41856", "41984", "42112", "42240", "42368", "42496", "42624", "42752", "42880", "43008", "43136", "43264", "43392", "43520", "43648", "43776", "43904", "44032", "44160", "44288", "44416", "44544", "44672", "44800", "44928", "45056", "45184", "45312", "45440", "45568", "45696", "45824", "45952", "46080", "46208", "46336", "46464", "46592", "46720", "46848", "46976", "47104", "47232", "47360", "47488", "47616", "47744", "47872", "48000", "48128", "48384", "48640", "48896", "49152", "49408", "49664", "49920", "50176", "50432", "50688", "50944", "51200", "51456", "51712", "51968", "52224", "52480", "52736", "52992", "53248", "53504", "53760", "54016", "54272", "54528", "54784", "55040", "55296", "55552", "55808", "56064", "56320", "56576", "56832", "57088", "57344", "57600", "57856", "58112", "58368", "58624", "58880", "59136", "59392", "59648", "59904", "60416", "60928", "61440", "61952", "62464", "62976", "63488", "64000", "64512", "65024", "65536", "66560", "67584", "68608", "69632", "70656", "71680", "72704", "73728", "74752", "75776", "76800", "77824", "78848", "79872", "80896", "81920", "82944", "83968", "84992", "86016", "87040", "88064", "89088", "90112", "91136", "92160", "93184", "94208", "95232", "96256", "97280", "98304", "100352", "102400", "104448", "106496", "108544", "110592", "112640", "114688", "116736", "118784", "120832", "122880", "124928", "126976", "129024", "131072", "135168", "139264", "143360", "147456", "151552", "155648", "163840", "167936", "172032", "176128", "180224", "184320", "188416", "192512", "196608", "200704", "204800", "208896", "212992", "217088", "221184", "225180", "229376", "233472", "237568", "241664", "245760", "249856", "253952", "258048", "262144"]
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if not (opt in runopts)]
Expand Down Expand Up @@ -2267,7 +2267,7 @@ def changerunmode(a,b,c):
makelabelentry(quick_tab, "BLAS threads :" , blas_threads_var, 14, 50,"How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")

# blas batch size
makeslider(quick_tab, "BLAS Batch Size - optimum of 512 if not filled :", blasbatchsize_text, blas_size_var, 0, 7, 16, set=5,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
makeslider(quick_tab, "BLAS Batch Size - optimum of 256 if not filled :", blasbatchsize_text, blas_size_var, 0, 13, 16, set=9,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")

# load model
makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.")
Expand Down Expand Up @@ -2319,7 +2319,7 @@ def changerunmode(a,b,c):
# blas thread specifier
makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 14, 50,"How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
# blas batch size
makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 16, set=5,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 13, 16, set=9,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
# force version
makelabelentry(hardware_tab, "Force Version:" , version_var, 100, 50,"If the autodetected version is wrong, you can change it here.\nLeave as 0 for default.")
ctk.CTkButton(hardware_tab , text = "Run Benchmark", command = guibench ).grid(row=110,column=0, stick="se", padx= 0, pady=2)
Expand Down Expand Up @@ -3756,7 +3756,7 @@ def range_checker(arg: str):
#more advanced params
advparser = parser.add_argument_group('Advanced Commands')
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For NTK Rope, a rule of thumb is to double the base frequency to go 50% beyond the base context, and to triple the base frequency to double the context. Beyond, the NTK calculations are more complex and you might need to use frequency scale as well. To use only linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512, the best speed/size compromize, 256 and 128 being good if you're short of VRAM). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512, the best speed/size compromize, 256 and 128 being good if you're short of VRAM). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,1,2,4,8,16,32,64,128,256,512,1024,2048,4096], default=256)
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
advparser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context without reprocessing everything once the max context is reached. If you disable it (or need to use KV cache quantized), you can eventually use --smartcontext instead.", action='store_true')
Expand Down

0 comments on commit 6d533ca

Please sign in to comment.