diff --git a/README.md b/README.md index c4872eb12..07b537165 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ KanTV("Kan", aka Chinese PinYin "Kan" or Chinese HanZi "看" or English "watch/l - Record online TV to automatically generate videos (useful for short video creators to generate short video materials but pls respect IPR of original content creator/provider); record online TV's video / audio content for gather video / audio data which might be required of/useful for AI R&D activity -- ASR(Automatic Speech Recognition, a sub-filed of AI) research by the great whisper.cpp +- ASR(Automatic Speech Recognition, a subfiled of AI) research by the great whisper.cpp -- LLM(Large Language Model, a sub-filed of AI) research by the great llama.cpp +- LLM(Large Language Model, a subfiled of AI) research by the great llama.cpp - Real-time English subtitle for English online-TV(aka OTT TV) by the great & excellent & amazing whisper.cpp (PoC finished on Xiaomi 14. Xiaomi 14 or other powerful Android mobile phone is HIGHLY required/recommended for real-time subtitle feature otherwise unexpected behavior would happen) @@ -27,7 +27,7 @@ Some goals of this project are: - Well-maintained "workbench" for LLM(Large Language Model) researchers who was interested in practise state-of-the-art AI tech(like [llama.cpp](https://github.com/ggerganov/llama.cpp)) in real scenario on mobile device(Android) -- Android turn-key project for AI experts(whom mightbe not familiar with regular Android software development) focus on AI research activity, part of AI R&D activity(algorithm improvement, model training, model generation, algorithm validation, model validation, performance benchmark......) could be done by Android Studio IDE + a powerful Android phone very easily +- Android turn-key project for AI experts/researchers(whom mightbe not familiar with regular Android software development) focus on device-side AI R&D activity, part of AI R&D activity(algorithm improvement, model training, model generation, algorithm validation, model validation, performance benchmark......) could be done by Android Studio IDE + a powerful Android phone very easily ### How to build project @@ -150,24 +150,38 @@ autocmd InsertEnter * match ForbiddenWhitespace /\t\|\s\+\%#\@build/envsetup.sh accordingly before launch build + - download android-ndk-r26c to prebuilts/toolchain, skip this step if android-ndk-r26c is already exist -pay attention here and modify it accordingly if build-target is kantv-android and running Android device is NOT Xiaomi 14 +``` -(TIP: a VERY powerful Linux PC / Linux workstation is HIGHLY recommended for this step) +./build/prebuild-download.sh ``` + + - modify build/envsetup.sh accordingly before launch build + + - moidfy whispercpp/CMakeLists.txt accordingly if build-target is kantv-android and running Android device is NOT Xiaomi 14 + + +#### Build native codes + +``` + . build/envsetup.sh -(download android-ndk-r26c to prebuilts/toolchain, skip this step if android-ndk-r26c is already exist) -./build/prebuild-download.sh ``` ![Screenshot from 2024-03-21 21-41-41](https://github.com/zhouwg/kantv/assets/6889919/3e13946f-596b-44be-9716-5793ce0c7263) @@ -184,7 +198,7 @@ pay attention i TextView _txtLLMInfo; TextView _txtGGMLInfo; TextView _txtGGMLStatus; + EditText _txtUserInput; + Button _btnInference; - Button _btnBenchmark; - - - private int nThreadCounts = 1; + private int nThreadCounts = 8; private int benchmarkIndex = 0; - private String strModeName = "tiny"; private long beginTime = 0; private long endTime = 0; private long duration = 0; private String strBenchmarkInfo; + private String strUserInput = "how many days in this month?"; private AtomicBoolean isBenchmarking = new AtomicBoolean(false); private ProgressDialog mProgressDialog; + // https://huggingface.co/TheBloke/Llama-2-7B-GGUF + // https://huggingface.co/TheBloke/Llama-2-13B-GGUF + // https://huggingface.co/TheBloke/Llama-2-70B-GGUF + // https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF - // https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + // https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF + // https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF + + //private String ggmlModelFileName = "llama-2-7b.Q4_K_M.gguf"; //4.08 GB private String ggmlModelFileName = "llama-2-7b-chat.Q4_K_M.gguf"; //4.08 GB private Context mContext; private Activity mActivity; private Settings mSettings; + private KANTVMgr mKANTVMgr = null; + private LLMResearchFragment.MyEventListener mEventListener = new LLMResearchFragment.MyEventListener(); public static LLMResearchFragment newInstance() { return new LLMResearchFragment(); @@ -138,24 +153,137 @@ public void initView() { _txtGGMLInfo = (TextView) mActivity.findViewById(R.id.ggmlInfoLLM); _txtGGMLStatus = (TextView) mActivity.findViewById(R.id.ggmlStatusLLM); + //TODO: change to voice input, and then use whisper.cpp to convert it into text + _txtUserInput = (EditText) mActivity.findViewById(R.id.txtUserInput); + + _btnInference = (Button) mActivity.findViewById(R.id.btnInference); + _txtLLMInfo.setCompoundDrawablesWithIntrinsicBounds(null, null, null, null); + _txtLLMInfo.setMovementMethod(ScrollingMovementMethod.getInstance()); displayFileStatus(CDEUtils.getDataPath() + ggmlModelFileName); - CDELog.j(TAG, "load LLM model"); + try { + CDELibraryLoader.load("whispercpp"); + CDELog.j(TAG, "cpu core counts:" + whispercpp.get_cpu_core_counts()); + } catch (Exception e) { + CDELog.j(TAG, "failed to initialize GGML jni"); + return; + } + + try { + initKANTVMgr(); + } catch (Exception e) { + CDELog.j(TAG, "failed to initialize asr subsystem"); + return; + } + + CDELog.j(TAG, "load ggml's LLM model"); + String systemInfo = whispercpp.llm_get_systeminfo(); String phoneInfo = "Device info:" + "\n" + "Brand:" + Build.BRAND + "\n" + "Hardware:" + Build.HARDWARE + "\n" + "OS:" + "Android " + android.os.Build.VERSION.RELEASE + "\n" - + "Arch:" + Build.CPU_ABI ; + + "Arch:" + Build.CPU_ABI + "(" + systemInfo + ")"; _txtGGMLInfo.setText(""); _txtGGMLInfo.append(phoneInfo + "\n"); _txtGGMLInfo.append("Powered by llama.cpp(https://github.com/ggerganov/llama.cpp)\n"); + _btnInference.setOnClickListener(v -> { + String strPrompt = _txtUserInput.getText().toString(); + if (strPrompt.isEmpty()) { + //CDEUtils.showMsgBox(mActivity, "pls check your input"); + //return; + //just for test + strPrompt = strUserInput; + } + strPrompt = strPrompt.trim(); + strUserInput = strPrompt; + CDELog.j(TAG, "User input: \n " + strUserInput); + + CDELog.j(TAG, "strModeName:" + ggmlModelFileName); + + String selectModeFileName = ggmlModelFileName; + String selectModelFilePath = CDEUtils.getDataPath() + selectModeFileName; + CDELog.j(TAG, "selectModelFilePath:" + selectModelFilePath); + File selectModeFile = new File(selectModelFilePath); + displayFileStatus(selectModelFilePath); + if (!selectModeFile.exists()) { + CDELog.j(TAG, "model file not exist:" + selectModeFile.getAbsolutePath()); + } + + if (!selectModeFile.exists()) { + CDEUtils.showMsgBox(mActivity, "pls check whether GGML's model file exist in /sdcard/kantv/"); + return; + } + ggmlModelFileName = selectModeFileName; + CDELog.j(TAG, "model file:" + CDEUtils.getDataPath() + selectModeFileName); + + isBenchmarking.set(true); + + Toast.makeText(mContext, mContext.getString(R.string.ggml_benchmark_start), Toast.LENGTH_LONG).show(); + + _txtLLMInfo.setText(""); + _btnInference.setEnabled(false); + + WindowManager.LayoutParams attributes = mActivity.getWindow().getAttributes(); + attributes.screenBrightness = 1.0f; + mActivity.getWindow().setAttributes(attributes); + mActivity.getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); + + launchGGMLBenchmarkThread(); + + }); endTime = System.currentTimeMillis(); CDELog.j(TAG, "initView cost: " + (endTime - beginTime) + " milliseconds"); } + private final void launchGGMLBenchmarkThread() { + Thread workThread = new Thread(new Runnable() { + @RequiresApi(api = Build.VERSION_CODES.O) + @Override + public void run() { + strBenchmarkInfo = ""; + + while (isBenchmarking.get()) { + beginTime = System.currentTimeMillis(); + _txtGGMLStatus.setText("LLAMA inference is progressing..."); + strBenchmarkInfo = whispercpp.llm_bench( + CDEUtils.getDataPath() + ggmlModelFileName, + strUserInput, + benchmarkIndex, + nThreadCounts); + endTime = System.currentTimeMillis(); + duration = (endTime - beginTime); + isBenchmarking.set(false); + + mActivity.runOnUiThread(new Runnable() { + @Override + public void run() { + String benchmarkTip = "LLAMA inference " + "(model: " + ggmlModelFileName + + " ,threads: " + nThreadCounts + + " ) cost " + duration + " milliseconds"; + benchmarkTip += "\n"; + + if (!strBenchmarkInfo.startsWith("unknown")) { + benchmarkTip += strBenchmarkInfo; + } + + CDELog.j(TAG, benchmarkTip); + _txtGGMLStatus.append(benchmarkTip); + + _btnInference.setEnabled(true); + } + }); + } + + + } + }); + workThread.start(); + + } + @Override public void initListener() { @@ -178,7 +306,6 @@ public void onStop() { } - private void displayFileStatus(String modelFilePath) { _txtGGMLStatus.setText(""); File modelFile = new File(modelFilePath); @@ -189,4 +316,85 @@ private void displayFileStatus(String modelFilePath) { _txtGGMLStatus.append("model file not exist: " + modelFile.getAbsolutePath()); } } + + protected class MyEventListener implements KANTVEventListener { + + MyEventListener() { + } + + + @Override + public void onEvent(KANTVEventType eventType, int what, int arg1, int arg2, Object obj) { + String eventString = "got event from native layer: " + eventType.toString() + " (" + what + ":" + arg1 + " ) :" + (String) obj; + String content = (String) obj; + + if (eventType.getValue() == KANTVEvent.KANTV_ERROR) { + CDELog.j(TAG, "ERROR:" + eventString); + _txtLLMInfo.setText("ERROR:" + content); + } + + if (eventType.getValue() == KANTVEvent.KANTV_INFO) { + if ((arg1 == KANTV_INFO_ASR_STOP) + || (arg1 == KANTV_INFO_ASR_FINALIZE) + ) { + return; + } + + //CDELog.j(TAG, "content:" + content); + if (content.startsWith("unknown")) { + + } else { + if (content.startsWith("llama-timings")) { + _txtGGMLStatus.setText(""); + _txtGGMLStatus.append(content); + } else { + _txtLLMInfo.append(content); + } + } + } + } + } + + + private void initKANTVMgr() { + if (mKANTVMgr != null) { + return; + } + + try { + mKANTVMgr = new KANTVMgr(mEventListener); + if (mKANTVMgr != null) { + mKANTVMgr.initASR(); + mKANTVMgr.startASR(); + } + CDELog.j(TAG, "KANTVMgr version:" + mKANTVMgr.getMgrVersion()); + } catch (KANTVException ex) { + String errorMsg = "An exception was thrown because:\n" + " " + ex.getMessage(); + CDELog.j(TAG, "error occurred: " + errorMsg); + CDEUtils.showMsgBox(mActivity, errorMsg); + ex.printStackTrace(); + } + } + + + public void release() { + if (mKANTVMgr == null) { + return; + } + + try { + CDELog.j(TAG, "release"); + { + mKANTVMgr.finalizeASR(); + mKANTVMgr.stopASR(); + mKANTVMgr.release(); + mKANTVMgr = null; + } + } catch (Exception ex) { + String errorMsg = "An exception was thrown because:\n" + " " + ex.getMessage(); + CDELog.j(TAG, "error occurred: " + errorMsg); + ex.printStackTrace(); + } + } + } diff --git a/cdeosplayer/kantv/src/main/res/drawable/llamacpp_logo.png b/cdeosplayer/kantv/src/main/res/drawable/llamacpp_logo.png index 1b6946be9..62eb4bead 100644 Binary files a/cdeosplayer/kantv/src/main/res/drawable/llamacpp_logo.png and b/cdeosplayer/kantv/src/main/res/drawable/llamacpp_logo.png differ diff --git a/cdeosplayer/kantv/src/main/res/drawable/textview_border.xml b/cdeosplayer/kantv/src/main/res/drawable/textview_border.xml new file mode 100644 index 000000000..9ba457ede --- /dev/null +++ b/cdeosplayer/kantv/src/main/res/drawable/textview_border.xml @@ -0,0 +1,17 @@ + + + + + + + + + \ No newline at end of file diff --git a/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml b/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml index d27501a50..64dfd75ac 100755 --- a/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml +++ b/cdeosplayer/kantv/src/main/res/layout/fragment_asr.xml @@ -31,7 +31,7 @@ + android:layout_height="210dp" /> + android:layout_height="210dp" /> - - + + +