quantize.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. # Quantizes rwkv.cpp model file from FP32 or FP16.
  2. # Available format names are in rwkv_cpp_shared_library.QUANTIZED_FORMAT_NAMES
  3. # Usage: python quantize.py bin\Release\rwkv.dll C:\rwkv.cpp-169M-FP32.bin C:\rwkv.cpp-169M-Q5_1.bin Q5_1
  4. import argparse
  5. import rwkv_cpp_shared_library
  6. def parse_args():
  7. format_names = rwkv_cpp_shared_library.QUANTIZED_FORMAT_NAMES
  8. parser = argparse.ArgumentParser(description='Quantize rwkv.cpp model file from FP32 or FP16')
  9. parser.add_argument('src_path', help='Path to FP32/FP16 checkpoint file')
  10. parser.add_argument('dest_path', help='Path to resulting checkpoint file, will be overwritten')
  11. parser.add_argument('format_name', help='Format name, one of ' + ', '.join(format_names), type=str, choices=format_names, default='Q5_1')
  12. return parser.parse_args()
  13. def main() -> None:
  14. args = parse_args()
  15. library = rwkv_cpp_shared_library.load_rwkv_shared_library()
  16. library.rwkv_quantize_model_file(
  17. args.src_path,
  18. args.dest_path,
  19. args.format_name
  20. )
  21. print('Done')
  22. if __name__ == "__main__":
  23. main()
粤ICP备19079148号