diff --git a/apps/protein_folding/helixfold3/README.md b/apps/protein_folding/helixfold3/README.md
index 12ed7041..0cb2b7c4 100644
--- a/apps/protein_folding/helixfold3/README.md
+++ b/apps/protein_folding/helixfold3/README.md
@@ -63,7 +63,15 @@ Note: If you have a different version of python3 and cuda, please refer to [here
 #### Install Maxit
 The conversion between `.cif` and `.pdb` relies on [Maxit](https://sw-tools.rcsb.org/apps/MAXIT/index.html). 
 Download Maxit source code from https://sw-tools.rcsb.org/apps/MAXIT/maxit-v11.100-prod-src.tar.gz. Untar and follow 
-its `README` to complete installation. 
+its `README` to complete installation. If you encouter error like your GCC version not support (9.4.0, for example), editing `etc/platform.sh` and reruning compilation again would make sense. See below:
+
+```bash
+#   Check if it is a Linux platform
+    Linux)
+#     Check if it is GCC version 4.x
+      gcc_ver=`gcc --version | grep -e " 4\."` # edit `4\.` to `9\.`
+      if [[ -z $gcc_ver ]]
+```
 
 ### Usage
 
@@ -96,10 +104,11 @@ The script `scripts/download_all_data.sh` can be used to download and set up all
 
 There are some demo input under `./data/` for your test and reference. Data input is in the form of JSON containing
 several entities such as `protein`, `ligand`, `nucleic acids`, and `iron`. Proteins and nucleic acids inputs are their sequence.
-HelixFold3 supports input ligand as SMILES or CCD id, please refer to `/data/demo_6zcy_smiles.json` and `demo_output/demo_6zcy_smiles/` 
-for more details about SMILES input. More flexible input will come in soon.
+HelixFold3 supports input ligand as SMILES, CCD id or small molecule files, please refer to `/data/demo_6zcy_smiles.json` and `data/demo_p450_heme_sdf.json` 
+for more details about SMILES input. Flexible input from small molecule is now supported. See `obabel -L formats |grep -v 'Write-only'`
 
 A example of input data is as follows:
+
 ```json
 {
     "entities": [
@@ -116,11 +125,59 @@ A example of input data is as follows:
     ]
 }
 ```
+Example of **covalently modified** input:
+
+```json
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "ccd": "HEM",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,445,SG,B,HEM,1,FE,covale,2.3",
+            "_comment": "<chain-id>,<residue name>,<residue index>,<atom id>,<chain-id>,<residue name>,<residue index>,<atom id>,<bond type>,<bond length>",
+            "_another_comment": "use semicolon to separate multiple bonds",
+            "_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input. eg. `UNK-1` for the first ligand chain(or the count #1), `UNK-2` the second(or the count #2)."
+        }
+    ]
+}
+```
+Another example of **disulfide modified** input:
+
+```json
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MASVKSSSSSSSSSFISLLLLILLVIVLQSQVIECQPQQSCTASLTGLNVCAPFLVPGSPTASTECCNAVQSINHDCMCNTMRIAAQIPAQCNLPPLSCSAN",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,41,SG,A,CYS,77,SG,disulf,2.2;A,CYS,51,SG,A,CYS,66,SG,disulf,2.2;A,CYS,67,SG,A,CYS,92,SG,disulf,2.2;A,CYS,79,SG,A,CYS,99,SG,disulf,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/Q43495/entry#ptm_processing"
+        }
+    ]
+}
+```
 
 #### Running HelixFold for Inference
 To run inference on a sequence or multiple sequences using HelixFold3's pretrained parameters, run e.g.:
 * Inference on single GPU (change the settings in script BEFORE you run it)
-```
+```shell
 sh run_infer.sh
 ```
 
@@ -184,7 +241,7 @@ The outputs will be in a subfolder of `output_dir`, including the computed MSAs,
 ranked structures, and evaluation metrics. For a task of inferring twice with diffusion batch size 3, 
 assume your input JSON is named `demo_data.json`, the `output_dir` directory will have the following structure:
 
-```
+```text
 <output_dir>/
 └── demo_data/
     ├── demo_data-pred-1-1/
@@ -208,9 +265,10 @@ assume your input JSON is named `demo_data.json`, the `output_dir` directory wil
         └── ...
 
 ```
+
 The contents of each output file are as follows:
 * `final_features.pkl` – A `pickle` file containing the input feature NumPy arrays
- used by the models to predict the structures.
+ used by the models to predict the structures. If you need to re-run a inference without re-building the MSAs, delete this file.
 * `msas/` - A directory containing the files describing the various genetic
  tool hits that were used to construct the input MSA.
 * `demo_data-pred-X-Y` - Prediction results of `demo_data.json` in X-th inference and Y-thdiffusion batch, 
diff --git a/apps/protein_folding/helixfold3/data/7s69_glycan.sdf b/apps/protein_folding/helixfold3/data/7s69_glycan.sdf
new file mode 100644
index 00000000..8ac6a9e0
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/7s69_glycan.sdf
@@ -0,0 +1,155 @@
+
+ OpenBabel03042416223D
+
+ 72 77  0  0  1  0  0  0  0  0999 V2000
+   29.7340    3.2540   76.7430 C   0  0  0  0  0  2  0  0  0  0  0  0
+   29.8160    4.4760   77.6460 C   0  0  1  0  0  3  0  0  0  0  0  0
+   28.5260    5.2840   77.5530 C   0  0  2  0  0  3  0  0  0  0  0  0
+   28.1780    5.5830   76.1020 C   0  0  1  0  0  3  0  0  0  0  0  0
+   28.2350    4.3240   75.2420 C   0  0  1  0  0  3  0  0  0  0  0  0
+   28.1040    4.6170   73.7650 C   0  0  0  0  0  2  0  0  0  0  0  0
+   31.3020    3.8250   79.4830 C   0  0  0  0  0  0  0  0  0  0  0  0
+   31.3910    3.4410   80.9280 C   0  0  0  0  0  1  0  0  0  0  0  0
+   30.0760    4.0880   79.0210 N   0  0  0  0  0  2  0  0  0  0  0  0
+   28.6870    6.5050   78.2670 O   0  0  0  0  0  1  0  0  0  0  0  0
+   26.8490    6.0910   76.0350 O   0  0  0  0  0  0  0  0  0  0  0  0
+   29.4950    3.6650   75.4130 O   0  0  0  0  0  0  0  0  0  0  0  0
+   29.3670    4.5550   73.1150 O   0  0  0  0  0  1  0  0  0  0  0  0
+   32.2950    3.8940   78.7640 O   0  0  0  0  0  0  0  0  0  0  0  0
+   26.7420    7.4140   75.6950 C   0  0  1  0  0  3  0  0  0  0  0  0
+   25.2700    7.7830   75.6110 C   0  0  1  0  0  3  0  0  0  0  0  0
+   25.1290    9.2300   75.1610 C   0  0  2  0  0  3  0  0  0  0  0  0
+   25.9180   10.1440   76.0880 C   0  0  1  0  0  3  0  0  0  0  0  0
+   27.3630    9.6720   76.2210 C   0  0  1  0  0  3  0  0  0  0  0  0
+   28.1310   10.4360   77.2730 C   0  0  0  0  0  2  0  0  0  0  0  0
+   23.8820    5.8170   75.1400 C   0  0  0  0  0  0  0  0  0  0  0  0
+   23.1980    5.0100   74.0810 C   0  0  0  0  0  1  0  0  0  0  0  0
+   24.5530    6.8930   74.7160 N   0  0  0  0  0  2  0  0  0  0  0  0
+   23.7530    9.5950   75.1670 O   0  0  0  0  0  1  0  0  0  0  0  0
+   25.9170   11.4700   75.5730 O   0  0  0  0  0  0  0  0  0  0  0  0
+   27.4050    8.2900   76.6040 O   0  0  0  0  0  0  0  0  0  0  0  0
+   29.5300   10.4030   77.0280 O   0  0  0  0  0  1  0  0  0  0  0  0
+   23.8300    5.5110   76.3290 O   0  0  0  0  0  0  0  0  0  0  0  0
+   25.3940   12.4250   76.4090 C   0  0  1  0  0  3  0  0  0  0  0  0
+   25.9490   13.7680   75.9090 C   0  0  2  0  0  3  0  0  0  0  0  0
+   25.1320   14.9560   76.4900 C   0  0  2  0  0  3  0  0  0  0  0  0
+   23.6130   14.6900   76.6390 C   0  0  1  0  0  3  0  0  0  0  0  0
+   23.3700   13.3000   77.2280 C   0  0  1  0  0  3  0  0  0  0  0  0
+   21.9020   12.9360   77.3500 C   0  0  0  0  0  2  0  0  0  0  0  0
+   25.9010   13.8490   74.4810 O   0  0  0  0  0  1  0  0  0  0  0  0
+   25.3420   16.1410   75.7110 O   0  0  0  0  0  0  0  0  0  0  0  0
+   23.0420   15.6520   77.5170 O   0  0  0  0  0  1  0  0  0  0  0  0
+   23.9910   12.3690   76.3570 O   0  0  0  0  0  0  0  0  0  0  0  0
+   21.3660   12.8480   76.0500 O   0  0  0  0  0  0  0  0  0  0  0  0
+   20.8090   11.6500   75.6780 C   0  0  2  0  0  3  0  0  0  0  0  0
+   20.6800   11.6410   74.1740 C   0  0  2  0  0  3  0  0  0  0  0  0
+   19.5510   12.5850   73.8180 C   0  0  2  0  0  3  0  0  0  0  0  0
+   18.2370   12.0940   74.4540 C   0  0  1  0  0  3  0  0  0  0  0  0
+   18.4030   11.9240   75.9810 C   0  0  1  0  0  3  0  0  0  0  0  0
+   17.2710   11.1260   76.6120 C   0  0  0  0  0  2  0  0  0  0  0  0
+   20.2900   10.3510   73.7080 O   0  0  0  0  0  1  0  0  0  0  0  0
+   19.4280   12.7380   72.4110 O   0  0  0  0  0  0  0  0  0  0  0  0
+   17.2120   13.0460   74.2030 O   0  0  0  0  0  1  0  0  0  0  0  0
+   19.6260   11.2000   76.3010 O   0  0  0  0  0  0  0  0  0  0  0  0
+   16.0670   11.4490   75.9360 O   0  0  0  0  0  1  0  0  0  0  0  0
+   20.2190   13.6280   71.7260 C   0  0  2  0  0  3  0  0  0  0  0  0
+   19.6090   14.0000   70.3810 C   0  0  2  0  0  3  0  0  0  0  0  0
+   19.6360   12.7820   69.4880 C   0  0  2  0  0  3  0  0  0  0  0  0
+   21.0860   12.3100   69.3240 C   0  0  1  0  0  3  0  0  0  0  0  0
+   21.7030   12.0240   70.7120 C   0  0  1  0  0  3  0  0  0  0  0  0
+   23.1940   11.7460   70.6620 C   0  0  0  0  0  2  0  0  0  0  0  0
+   20.4080   14.9810   69.7000 O   0  0  0  0  0  1  0  0  0  0  0  0
+   19.0310   13.0500   68.2340 O   0  0  0  0  0  1  0  0  0  0  0  0
+   21.1060   11.1280   68.5380 O   0  0  0  0  0  1  0  0  0  0  0  0
+   21.5380   13.1700   71.5840 O   0  0  0  0  0  0  0  0  0  0  0  0
+   23.8240   12.5210   71.6820 O   0  0  0  0  0  1  0  0  0  0  0  0
+   26.0070   17.3020   76.0200 C   0  0  2  0  0  3  0  0  0  0  0  0
+   27.0750   17.5250   74.9350 C   0  0  2  0  0  3  0  0  0  0  0  0
+   28.3660   16.8320   75.3290 C   0  0  2  0  0  3  0  0  0  0  0  0
+   28.7820   17.2470   76.7510 C   0  0  1  0  0  3  0  0  0  0  0  0
+   27.6930   16.8120   77.7320 C   0  0  1  0  0  3  0  0  0  0  0  0
+   27.9770   17.2020   79.1710 C   0  0  0  0  0  2  0  0  0  0  0  0
+   27.3990   18.9140   74.8010 O   0  0  0  0  0  1  0  0  0  0  0  0
+   29.4060   17.0990   74.3950 O   0  0  0  0  0  1  0  0  0  0  0  0
+   30.0160   16.6410   77.0930 O   0  0  0  0  0  1  0  0  0  0  0  0
+   26.4610   17.4820   77.3520 O   0  0  0  0  0  0  0  0  0  0  0  0
+   27.3660   18.4620   79.4040 O   0  0  0  0  0  1  0  0  0  0  0  0
+  1  2  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2  3  1  0  0  0  0
+  2  9  1  1  0  0  0
+  3 10  1  1  0  0  0
+  3  4  1  0  0  0  0
+  4  5  1  0  0  0  0
+  4 11  1  1  0  0  0
+  5  6  1  6  0  0  0
+  5 12  1  0  0  0  0
+  6 13  1  0  0  0  0
+  7 14  2  0  0  0  0
+  7  8  1  0  0  0  0
+  7  9  1  0  0  0  0
+ 15 16  1  0  0  0  0
+ 15 11  1  1  0  0  0
+ 15 26  1  0  0  0  0
+ 16 23  1  6  0  0  0
+ 16 17  1  0  0  0  0
+ 17 18  1  0  0  0  0
+ 17 24  1  1  0  0  0
+ 18 25  1  6  0  0  0
+ 18 19  1  0  0  0  0
+ 19 20  1  1  0  0  0
+ 19 26  1  0  0  0  0
+ 20 27  1  0  0  0  0
+ 21 22  1  0  0  0  0
+ 21 23  1  0  0  0  0
+ 21 28  2  0  0  0  0
+ 29 38  1  0  0  0  0
+ 29 25  1  6  0  0  0
+ 29 30  1  0  0  0  0
+ 30 35  1  6  0  0  0
+ 30 31  1  0  0  0  0
+ 31 32  1  0  0  0  0
+ 31 36  1  6  0  0  0
+ 32 33  1  0  0  0  0
+ 32 37  1  1  0  0  0
+ 33 38  1  0  0  0  0
+ 33 34  1  6  0  0  0
+ 34 39  1  0  0  0  0
+ 40 49  1  0  0  0  0
+ 40 41  1  0  0  0  0
+ 40 39  1  1  0  0  0
+ 41 46  1  1  0  0  0
+ 41 42  1  0  0  0  0
+ 42 43  1  0  0  0  0
+ 42 47  1  6  0  0  0
+ 43 48  1  1  0  0  0
+ 43 44  1  0  0  0  0
+ 44 49  1  0  0  0  0
+ 44 45  1  6  0  0  0
+ 45 50  1  0  0  0  0
+ 51 47  1  6  0  0  0
+ 51 60  1  0  0  0  0
+ 51 52  1  0  0  0  0
+ 52 53  1  0  0  0  0
+ 52 57  1  6  0  0  0
+ 53 54  1  0  0  0  0
+ 53 58  1  6  0  0  0
+ 54 59  1  6  0  0  0
+ 54 55  1  0  0  0  0
+ 55 56  1  6  0  0  0
+ 55 60  1  0  0  0  0
+ 56 61  1  0  0  0  0
+ 62 71  1  0  0  0  0
+ 62 36  1  1  0  0  0
+ 62 63  1  0  0  0  0
+ 63 68  1  1  0  0  0
+ 63 64  1  0  0  0  0
+ 64 69  1  6  0  0  0
+ 64 65  1  0  0  0  0
+ 65 70  1  1  0  0  0
+ 65 66  1  0  0  0  0
+ 66 67  1  1  0  0  0
+ 66 71  1  0  0  0  0
+ 67 72  1  0  0  0  0
+M  END
+$$$$
diff --git a/apps/protein_folding/helixfold3/data/demo_3fap_protein_sm.json b/apps/protein_folding/helixfold3/data/demo_3fap_protein_sm.json
new file mode 100644
index 00000000..caf27f02
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_3fap_protein_sm.json
@@ -0,0 +1,20 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "GVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE",
+            "count": 1
+        },
+        {
+            "type": "protein",
+            "sequence": "VAILWHEMWHEGLEEASRLYFGERNVKGMFEVLEPLHAMMERGPQTLKETSFNQAYGRDLMEAQEWCRKYMKSGNVKDLTQAWDLYYHVFRRIS",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "sdf": "/mnt/data/yinying/tests/helixfold/ligands/ARD_ideal.sdf",
+            "use_3d": false,
+            "count": 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_4Fe-4S.json b/apps/protein_folding/helixfold3/data/demo_4Fe-4S.json
new file mode 100644
index 00000000..4599c4af
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_4Fe-4S.json
@@ -0,0 +1,45 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MKLNVDGLLVYFPYDYIYPEQFSYMRELKRTLDAKGHGVLEMPSGTGKTVSLLALIMAYQRAYPLEVTKLIYCSRTVPEIEKVIEELRKLLNFYEKQEGEKLPFLGLALSSRKNLCIHPEVTPLRFGKDVDGKCHSLTASYVRAQYQHDTSLPHCRFYEEFDAHGREVPLPAGIYNLDDLKALGRRQGWCPYFLARYSILHANVVVYSYHYLLDPKIADLVSKELARKAVVVFDEAHNIDNVCIDSMSVNLTRRTLDRCQGNLETLQKTVLRIKETDEQRLRDEYRRLVEGLREASAARETDAHLANPVLPDEVLQEAVPGSIRTAEHFLGFLRRLLEYVKWRLRVQHVVQESPPAFLSGLAQRVCIQRKPLRFCAERLRSLLHTLEITDLADFSPLTLLANFATLVSTYAKGFTIIIEPFDDRTPTIANPILHFSCMDASLAIKPVFERFQSVIITSGTLSPLDIYPKILDFHPVTMATFTMTLARVCLCPMIIGRGNDQVAISSKFETREDIAVIRNYGNLLLEMSAVVPDGIVAFFTSYQYMESTVASWYEQGILENIQRNKLLFIETQDGAETSVALEKYQEACENGRGAILLSVARGKVSEGIDFVHHYGRAVIMFGVPYVYTQSRILKARLEYLRDQFQIRENDFLTFDAMRHAAQCVGRAIRGKTDYGLMVFADKRFARGDKRGKLPRWIQEHLTDANLNLTVDEGVQVAKYFLRQMAQPFHREDQLGLSLLSLEQLESEETLKRIEQIAQQL",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "ccd": "SF4",
+            "count": 1,
+            "_note": "5T5I"
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,116,SG,B,SF4,1,FE1,metalc,2.2;A,CYS,134,SG,B,SF4,1,FE2,metalc,2.2;A,CYS,155,SG,B,SF4,1,FE3,metalc,2.2;A,CYS,190,SG,B,SF4,1,FE4,metalc,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P18074/entry",
+            "_note":"ALL_CYS-ALL_FE"
+        },
+        {
+            "type": "bond",
+            "bond": "B,SF4,1,FE1,B,SF4,1,S2,metalc,2.2;B,SF4,1,FE1,B,SF4,1,S3,metalc,2.2;B,SF4,1,FE1,B,SF4,1,S4,metalc,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P18074/entry",
+            "_note":"FE1-S234"
+        },
+        {
+            "type": "bond",
+            "bond": "B,SF4,1,FE2,B,SF4,1,S1,metalc,2.2;B,SF4,1,FE2,B,SF4,1,S3,metalc,2.2;B,SF4,1,FE2,B,SF4,1,S4,metalc,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P18074/entry",
+            "_note":"FE2-S134"
+        },
+        {
+            "type": "bond",
+            "bond": "B,SF4,1,FE3,B,SF4,1,S1,metalc,2.2;B,SF4,1,FE3,B,SF4,1,S2,metalc,2.2;B,SF4,1,FE3,B,SF4,1,S4,metalc,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P18074/entry",
+            "_note":"FE3-S124"
+        },
+        {
+            "type": "bond",
+            "bond": "B,SF4,1,FE4,B,SF4,1,S1,metalc,2.2;B,SF4,1,FE14,B,SF4,1,S2,metalc,2.2;B,SF4,1,FE4,B,SF4,1,S3,metalc,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P18074/entry",
+            "_note":"FE4-S123"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_7s69_coval.json b/apps/protein_folding/helixfold3/data/demo_7s69_coval.json
new file mode 100644
index 00000000..7b2550ff
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_7s69_coval.json
@@ -0,0 +1,20 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "DRHHHHHHKLGKMKIVEEPNSFGLNNPFLSQTNKLQPRVQPSPVSGPSHLFRLAGKCFNLVESTYKYELCPFHNVTQHEQTFRWNAYSGILGIWQEWDIENNTFSGMWMREGDSCGNKNRQTKVLLVCGKANKLSSVSEPSTCLYSLTFETPLVCHPHSLLVYPTLSEGLQEKWNEAEQALYDELITEQGHGKILKEIFREAGYLKTTKPDGEGKETQDKPKEFDSLEKCNKGYTELTSEIQRLKKMLNEHGISYVTNGTSRSEGQPAEVNTTFARGEDKVHLRGDTGIRDGQ",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "sdf": "/repo/PaddleHelix/apps/protein_folding/helixfold3/data/7s69_glycan.sdf",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,ASN,74,ND2,B,UNK-1,1,C16,covale,2.3",
+            "_comment": "'A,74,ND2:B,1:CW,null' from RF2AA.",
+            "_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_E2-Ub.json b/apps/protein_folding/helixfold3/data/demo_E2-Ub.json
new file mode 100644
index 00000000..d9905cbe
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_E2-Ub.json
@@ -0,0 +1,23 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MSRAKRIMKEIQAVKDDPAAHITLEFVSESDIHHLKGTFLGPPGTPYEGGKFVVDIEVPMEYPFKPPKMQFDTKVYHPNISSVTGAICLDILKNAWSPVITLKSALISLQALLQSPEPNDPQDAEVAQHYLRDRESFNKTAALWTRLYASETSNGQKGNVEESDLYGIDHDLIDEFESQGFEKDKIVEVLRRLGVKSLDPNDNNTANRIIEELLK",
+            "count": 1,
+            "_case_from": "https://www.uniprot.org/uniprotkb/P21734/entry#sequences",
+            "_note": "E2"
+        },
+        {
+            "type": "protein",
+            "sequence": "MQIFVKTLTGKTITLEVESSDTIDNVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLADYNIQKESTLHLVLRLRGG",
+            "count": 1,
+            "_case_from": "https://www.uniprot.org/uniprotkb/P21734/entry#sequences",
+            "_note": "Ub"
+        },
+        {
+            "type": "bond",
+            "bond": "A,LYS,93,NZ,B,GLY,76,C,covale,1.66",
+            "_note": "Glycyl lysine isopeptide (Lys-Gly) (interchain with G-Cter in ubiquitin)"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_disulf.json b/apps/protein_folding/helixfold3/data/demo_disulf.json
new file mode 100644
index 00000000..780b9d7d
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_disulf.json
@@ -0,0 +1,14 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MASVKSSSSSSSSSFISLLLLILLVIVLQSQVIECQPQQSCTASLTGLNVCAPFLVPGSPTASTECCNAVQSINHDCMCNTMRIAAQIPAQCNLPPLSCSAN",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,41,SG,A,CYS,77,SG,disulf,2.2;A,CYS,51,SG,A,CYS,66,SG,disulf,2.2;A,CYS,67,SG,A,CYS,92,SG,disulf,2.2;A,CYS,79,SG,A,CYS,99,SG,disulf,2.2",
+            "_case_from": "https://www.uniprot.org/uniprotkb/Q43495/entry#ptm_processing"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_disulf_homodimer.json b/apps/protein_folding/helixfold3/data/demo_disulf_homodimer.json
new file mode 100644
index 00000000..fd56fcd4
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_disulf_homodimer.json
@@ -0,0 +1,33 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "NSVHPCCDPVICEPREGEHCISGPCCENCYFLNSGTICKRARGDGNQDYCTGITPDCPRNRYNV",
+            "count": 2
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,6,SG,A,CYS,29,SG,disulf,2.3;A,CYS,20,SG,A,CYS,26,SG,disulf,2.3;A,CYS,25,SG,A,CYS,50,SG,disulf,2.3;A,CYS,38,SG,A,CYS,57,SG,disulf,2.3",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P83658/entry#ptm_processing",
+            "_note": "Intrachain, A"
+        },
+        {
+            "type": "bond",
+            "bond": "B,CYS,6,SG,B,CYS,29,SG,disulf,2.3;B,CYS,20,SG,B,CYS,26,SG,disulf,2.3;B,CYS,25,SG,B,CYS,50,SG,disulf,2.3;B,CYS,38,SG,B,CYS,57,SG,disulf,2.3",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P83658/entry#ptm_processing",
+            "_note": "Intrachain, B"
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,7,SG,B,CYS,12,SG,disulf,2.3",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P83658/entry#ptm_processing",
+            "_note": "Interchain, AB"
+        },
+        {
+            "type": "bond",
+            "bond": "B,CYS,7,SG,A,CYS,12,SG,disulf,2.3",
+            "_case_from": "https://www.uniprot.org/uniprotkb/P83658/entry#ptm_processing",
+            "_note": "Interchain, BA"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_p450_heme.json b/apps/protein_folding/helixfold3/data/demo_p450_heme.json
new file mode 100644
index 00000000..a414ef58
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_p450_heme.json
@@ -0,0 +1,19 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "ccd": "HEM",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
+            "count": 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_p450_heme_coval.json b/apps/protein_folding/helixfold3/data/demo_p450_heme_coval.json
new file mode 100644
index 00000000..6761a4bc
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_p450_heme_coval.json
@@ -0,0 +1,25 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "ccd": "HEM",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,CYS,445,SG,B,HEM,1,FE,covale,2.3",
+            "_comment": "<chain-id>,<residue name>,<residue index>,<atom id>,<chain-id>,<residue name>,<residue index>,<atom id>,<bond type>,<bond length>",
+            "_also_comment": "For ccd input, use CCD key as residue name; for smiles and file input, use `UNK-<index>` where index is the chain order you input"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_p450_heme_sdf.json b/apps/protein_folding/helixfold3/data/demo_p450_heme_sdf.json
new file mode 100644
index 00000000..72500c6e
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_p450_heme_sdf.json
@@ -0,0 +1,19 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "ccd": "HEM",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "sdf": "/mnt/data/yinying/tests/helixfold/ligands/60119277-3d.sdf",
+            "count": 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_p450_heme_smiles.json b/apps/protein_folding/helixfold3/data/demo_p450_heme_smiles.json
new file mode 100644
index 00000000..cb6a22f5
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_p450_heme_smiles.json
@@ -0,0 +1,19 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MDALYKSTVAKFNEVIQLDCSTEFFSIALSSIAGILLLLLLFRSKRHSSLKLPPGKLGIPFIGESFIFLRALRSNSLEQFFDERVKKFGLVFKTSLIGHPTVVLCGPAGNRLILSNEEKLVQMSWPAQFMKLMGENSVATRRGEDHIVMRSALAGFFGPGALQSYIGKMNTEIQSHINEKWKGKDEVNVLPLVRELVFNISAILFFNIYDKQEQDRLHKLLETILVGSFALPIDLPGFGFHRALQGRAKLNKIMLSLIKKRKEDLQSGSATATQDLLSVLLTFRDDKGTPLTNDEILDNFSSLLHASYDTTTSPMALIFKLLSSNPECYQKVVQEQLEILSNKEEGEEITWKDLKAMKYTWQVAQETLRMFPPVFGTFRKAITDIQYDGYTIPKGWKLLWTTYSTHPKDLYFNEPEKFMPSRFDQEGKHVAPYTFLPFGGGQRSCVGWEFSKMEILLFVHHFVKTFSSYTPVDPDEKISGDPLPPLPSKGFSIKLFPRP",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "smiles": "CC1=C(C2=CC3=C(C(=C([N-]3)C=C4C(=C(C(=N4)C=C5C(=C(C(=N5)C=C1[N-]2)C)C=C)C)C=C)C)CCC(=O)O)CCC(=O)O.[Fe+2]",
+            "count": 1
+        },
+        {
+            "type": "ligand",
+            "smiles": "CC1=C2CC[C@@]3(CCCC(=C)[C@H]3C[C@@H](C2(C)C)CC1)C",
+            "count": 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/data/demo_pUb.json b/apps/protein_folding/helixfold3/data/demo_pUb.json
new file mode 100644
index 00000000..00ee1749
--- /dev/null
+++ b/apps/protein_folding/helixfold3/data/demo_pUb.json
@@ -0,0 +1,21 @@
+{
+    "entities": [
+        {
+            "type": "protein",
+            "sequence": "MQIFVKTLTGKTITLEVESSDTIDNVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLADYNIQKESTLHLVLRLRGG",
+            "count": 1,
+            "_case_from": "https://www.rcsb.org/structure/5K9P",
+            "_note": "Ub"
+        },
+        {
+            "type": "ligand",
+            "ccd": "PO2",
+            "count": 1
+        },
+        {
+            "type": "bond",
+            "bond": "A,SER,20,OG,B,PO2,1,P,covale,1.66",
+            "_note": "Ser20 phosphorylated ubiquitin"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/helixfold/data/mmcif_parsing_paddle.py b/apps/protein_folding/helixfold3/helixfold/data/mmcif_parsing_paddle.py
index d5e8f875..d6f907eb 100644
--- a/apps/protein_folding/helixfold3/helixfold/data/mmcif_parsing_paddle.py
+++ b/apps/protein_folding/helixfold3/helixfold/data/mmcif_parsing_paddle.py
@@ -22,7 +22,7 @@
 from Bio import PDB
 import numpy as np
 import time
-import logging
+from absl import logging
 from helixfold.common.residue_constants import crystallization_aids, ligand_exclusion_list
 
 # Type aliases:
@@ -34,7 +34,6 @@
 SeqRes = str
 MmCIFDict = Mapping[str, Sequence[str]]
 
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 @dataclasses.dataclass(frozen=True)
 class Monomer:
diff --git a/apps/protein_folding/helixfold3/helixfold/data/pipeline_conf_bonds.py b/apps/protein_folding/helixfold3/helixfold/data/pipeline_conf_bonds.py
index dee10bb0..6b46bb18 100644
--- a/apps/protein_folding/helixfold3/helixfold/data/pipeline_conf_bonds.py
+++ b/apps/protein_folding/helixfold3/helixfold/data/pipeline_conf_bonds.py
@@ -1,9 +1,22 @@
 """Functions for building the input features (reference ccd features) for the HelixFold model."""
 
 import collections
-from typing import Optional
-from helixfold.common import residue_constants
+from dataclasses import dataclass
+import gzip
+import os
+import pickle
+import re
+from typing import Any, List, Literal, Optional, Tuple
+
+from absl import logging
+from immutabledict import immutabledict
 import numpy as np
+from openbabel import openbabel
+
+from helixfold.common import residue_constants
+from helixfold.data.tools import utils
+
+
 
 ALLOWED_LIGAND_BONDS_TYPE = {
     "SING": 1,
@@ -13,17 +26,131 @@
     "AROM": 12,
 }
 
+# Define the possible bond types as a Literal
+# https://mmcif.wwpdb.org/dictionaries/mmcif_std.dic/Items/_struct_conn_type.id.html
+BondType = Literal["covale", "covale_base", "covale_phosphate", "covale_sugar", "disulf", "hydrog",'metalc','mismat','modres','saltbr']
+
+
+@dataclass
+class AtomPartner:
+    """
+    Represents one partner atom in a covalent bond.
+
+    Attributes:
+        label_asym_id (str): The asymmetry identifier for the partner atom (i.e., chain ID).
+        label_comp_id (str): The component identifier for the partner atom (i.e., residue name).
+        seq_id (str): The sequence identifier for the partner atom (merged label_seq_id and auth_seq_id).
+        label_atom_id (str): The atom identifier for the partner atom (i.e., atom name).
+    """
+
+    label_asym_id: str  # Chain ID
+    label_comp_id: str  # Residue name
+    seq_id: str         # Merged sequence ID
+    label_atom_id: str  # Atom name
+
+
+@dataclass
+class CovalentBond:
+    """
+    Represents a covalent bond between two atoms in a molecular structure.
+
+    Attributes:
+        atom_1 (AtomPartner): The first partner atom in the bond.
+        atom_2 (AtomPartner): The second partner atom in the bond.
+        bond_type (BondType): The type of the bond.
+        pdbx_dist_value (float): The distance value as defined in the PDBx/mmCIF format.
+    """
+
+    atom_1: AtomPartner
+    atom_2: AtomPartner
+    bond_type: BondType
+    pdbx_dist_value: float
+
+def parse_covalent_bond_input(input_string: str) -> List[CovalentBond]:
+    """
+    Parses a human-readable string into a list of CovalentBond objects.
+
+    Args:
+        input_string (str): A string representing covalent bonds, where each bond is
+                            described by two atom partners separated by a comma,
+                            and multiple bonds are separated by semicolons.
+                            Example: "A,GLY,25,CA,A,GLY,25,N,covale,1.32; B,HIS,58,ND1,B,HIS,58,CE1,covale,1.39"
+
+    Returns:
+        List[CovalentBond]: A list of CovalentBond objects.
+    """
+    covalent_bonds = []
+
+    # Split the input string by semicolons to separate individual covalent bonds
+    bond_strings = input_string.split(';')
+
+    for bond_str in bond_strings:
+        # Split the individual bond string by commas to separate attributes
+        bond_parts = bond_str.split(',')
+
+        if len(bond_parts) != 10:
+            raise ValueError(f"Invalid bond format: {bond_str}. Expected 10 fields per bond.")
+
+        # Create AtomPartner instances for the two atoms in the bond
+        atom_1 = AtomPartner(
+            label_asym_id=bond_parts[0].strip(),
+            label_comp_id=bond_parts[1].strip(),
+            seq_id=bond_parts[2].strip(),
+            label_atom_id=bond_parts[3].strip()
+        )
+
+        atom_2 = AtomPartner(
+            label_asym_id=bond_parts[4].strip(),
+            label_comp_id=bond_parts[5].strip(),
+            seq_id=bond_parts[6].strip(),
+            label_atom_id=bond_parts[7].strip()
+        )
+
+        # Create a CovalentBond instance
+        covalent_bond = CovalentBond(
+            atom_1=atom_1,
+            atom_2=atom_2,
+            bond_type=bond_parts[8].strip(),
+            pdbx_dist_value=float(bond_parts[9].strip())
+        )
+
+        # Append the CovalentBond instance to the list
+        covalent_bonds.append(covalent_bond)
+    logging.info(f"Added {len(covalent_bonds)} bonds: {covalent_bonds}")
+
+    return covalent_bonds
+
+def load_ccd_dict(ccd_preprocessed_path: str) -> immutabledict[str, Any]:
+    if not os.path.exists(ccd_preprocessed_path):
+      raise FileNotFoundError(f'[CCD] ccd_preprocessed_path: {ccd_preprocessed_path} not exist.')
+    
+    if not ccd_preprocessed_path.endswith('.pkl.gz') and not ccd_preprocessed_path.endswith('.pkl'):
+        raise ValueError(f'[CCD] ccd_preprocessed_path: {ccd_preprocessed_path} not endswith .pkl.gz and .pkl')
+  
+    with utils.timing(f'Loading CCD dataset from {ccd_preprocessed_path}'):
+      if ccd_preprocessed_path.endswith('.pkl.gz'):
+          with gzip.open(ccd_preprocessed_path, "rb") as fp:
+              ccd_preprocessed_dict = immutabledict(pickle.load(fp))
+      else:
+          with open(ccd_preprocessed_path, "rb") as fp:
+              ccd_preprocessed_dict = immutabledict(pickle.load(fp))
+    
+    logging.info(f'CCD dataset contains {len(ccd_preprocessed_dict)} entries.')
+    
+    return ccd_preprocessed_dict
+
 def element_map_with_x(atom_symbol):
   # ## one-hot max shape == 128
   return residue_constants.ATOM_ELEMENT.get(atom_symbol, 127)
 
-def convert_atom_id_name(atom_id: str) -> int:
+def convert_atom_id_name(atom_id: str) -> list[int]:
   """
     Converts unique atom_id names to integer of atom_name. need to be padded to length 4.
     Each character is encoded as ord(c) − 32
   """
+  if (len_atom_id:=len(atom_id))>4:
+     raise ValueError(f'atom_id: `{atom_id}` is too long, max length is 4.')
   atom_id_pad = atom_id.ljust(4, ' ')
-  assert len(atom_id_pad) == 4
   return [ord(c) - 32 for c in atom_id_pad]
 
 
@@ -79,8 +206,16 @@ def make_ccd_conf_features(all_chain_info, ccd_preprocessed_dict,
       features['ref_element'].append(np.array([element_map_with_x(t[0].upper() + t[1:].lower())
                                               for t in _ccd_feats['atom_symbol']], dtype=np.int32))
       features['ref_charge'].append(np.array(_ccd_feats['charge'], dtype=np.int32))
+
+      # atom checks
+      for atom_id in _ccd_feats['atom_ids']:
+        converted_atom_id_name=convert_atom_id_name(atom_id.upper())
+        if max(converted_atom_id_name)>= 64:
+          raise ValueError(f'>>> Problematic atom in ligand ({residue_id=}, {ccd_id=}, {chain_id=}) {atom_id=}, {converted_atom_id_name=}')
+        # logging.debug(f'({residue_id=}, {ccd_id=}, {chain_id=}) {atom_id=}, {converted_atom_id_name=}')
+      
       features['ref_atom_name_chars'].append(
-                              np.array([convert_atom_id_name(atom_id) for atom_id in _ccd_feats['atom_ids']]
+                              np.array([convert_atom_id_name(atom_id.upper()) for atom_id in _ccd_feats['atom_ids']]
                                                                 , dtype=np.int32))
       
       # here we get ref_space_uid [ Each (chain id, residue index) tuple is assigned an integer on first appearance.]
@@ -107,13 +242,14 @@ def make_ccd_conf_features(all_chain_info, ccd_preprocessed_dict,
     features[k] = np.concatenate(v, axis=0)
   features['ref_atom_count'] = np.bincount(features['ref_token2atom_idx'])
 
-  assert np.max(features['ref_element']) < 128
-  assert np.max(features['ref_atom_name_chars']) < 64
-  assert len(set([len(v) for k, v in features.items() if k != 'ref_atom_count'])) == 1 ## To check same Atom-level features.
+  if (len_ref_element:=np.max(features['ref_element'])) >= 128:
+     raise ValueError(f'{len_ref_element=}, which is larger then 128.\n{features["ref_element"]}\n{"-"*79}')
+
+  assert len(set([len(v) for k, v in features.items() if k != 'ref_atom_count'])) == 1 ## To check same Atom-level features. # WTF?
   return features
 
 
-def make_bond_features(covalent_bond, all_chain_info, ccd_preprocessed_dict, 
+def make_bond_features(covalent_bond: List[CovalentBond], all_chain_info, ccd_preprocessed_dict, 
                                       extra_feats: Optional[dict]=None):
   """
       all_chain_info: dict, (chain_type_chain_id): ccd_seq (list of ccd), such as: protein_A: ['ALA', 'MET', 'GLY']
@@ -134,24 +270,29 @@ def make_bond_features(covalent_bond, all_chain_info, ccd_preprocessed_dict,
   _set_chain_id_list = set(chain_id_list)
   parsed_covalent_bond = []
   for _bond in covalent_bond:
-    left_bond_atomid, right_bond_atomid = _bond['ptnr1_label_atom_id'], _bond['ptnr2_label_atom_id']
-    left_bond_name, right_bond_name = _bond['ptnr1_label_comp_id'], _bond['ptnr2_label_comp_id']
-    left_bond, right_bond = _bond['ptnr1_label_asym_id'], _bond['ptnr2_label_asym_id']
+    # Accessing the AtomPartner attributes for both atoms in the covalent bond
+    left_bond_atomid, right_bond_atomid = _bond.atom_1.label_atom_id, _bond.atom_2.label_atom_id
+    left_bond_name, right_bond_name = _bond.atom_1.label_comp_id, _bond.atom_2.label_comp_id
+    left_bond, right_bond = _bond.atom_1.label_asym_id, _bond.atom_2.label_asym_id
     
-    left_bond_idx, right_bond_idx = _bond['ptnr1_label_seq_id'], _bond['ptnr2_label_seq_id']
-    auth_left_idx, auth_right_idx = _bond['ptnr1_auth_seq_id'], _bond['ptnr2_auth_seq_id']
+    left_bond_idx, right_bond_idx = _bond.atom_1.seq_id, _bond.atom_2.seq_id
+    auth_left_idx, auth_right_idx = _bond.atom_1.seq_id, _bond.atom_2.seq_id
+
     left_bond_idx = 1 if left_bond_idx == '.' else left_bond_idx
     right_bond_idx = 1 if right_bond_idx == '.' else right_bond_idx
     
-    if _bond['bond_type'] != "covale":
-      continue
+    if _bond.bond_type != "covale":
+      logging.warning(f'Detected non-covale bond type: {_bond.bond_type}')
+      # continue
     
-    if _bond['pdbx_dist_value'] > 2.4:
+    if _bond.pdbx_dist_value > 2.4:
       # the covalent_bond is cut off by distance=2.4
+      logging.warning(f'Ignore bonding with distance > 2.4: {_bond.pdbx_dist_value}')
       continue
     
     ## When some chainID is filtered, bond need to be filtered too.
     if (left_bond not in _set_chain_id_list) or (right_bond not in _set_chain_id_list):
+      logging.warning(f'Ignore bonding with left and right out of chain list: ')
       continue
 
     parsed_covalent_bond.append([left_bond, left_bond_name, left_bond_idx, left_bond_atomid, auth_left_idx,
@@ -167,6 +308,8 @@ def make_bond_features(covalent_bond, all_chain_info, ccd_preprocessed_dict,
   chainId_to_type = {}
   ligand_bond_type = [] # (i, j, bond_type), represent the bond between token i and token j
   bond_index = [] # (i,j) represent the bond between token i and token j
+
+  ccd_id2atom_ids: dict[str, list]={}
   ccd_standard_set = residue_constants.STANDARD_LIST
   for chain_type_id, ccd_seq in all_chain_info.items():
       chain_type, chain_id = chain_type_id.rsplit('_', 1)
@@ -187,6 +330,8 @@ def make_bond_features(covalent_bond, all_chain_info, ccd_preprocessed_dict,
             else:
                 _ccd_feats = ccd_preprocessed_dict[ccd_id]
             atom_ids = _ccd_feats['atom_ids']
+
+            ccd_id2atom_ids[ccd_id] = atom_ids
             assert len(atom_ids) > 0, f'TODO filter - Got CCD <{ccd_id}>: 0 atom nums.'
             
             all_token_nums += len(atom_ids)
@@ -233,16 +378,43 @@ def make_bond_features(covalent_bond, all_chain_info, ccd_preprocessed_dict,
           ptnr2_label_seq_id = ptnr2_auth_seq_id
 
       try:
-        assert ptnr1_label_asym_id in chainId_to_ccd_list and ptnr2_label_asym_id in chainId_to_ccd_list
+        if not (ptnr1_label_asym_id in chainId_to_ccd_list and ptnr2_label_asym_id in chainId_to_ccd_list):
+           raise ValueError(f"Invalid chain id:\n{ptnr1_label_asym_id}/{ptnr2_label_asym_id}\n{chainId_to_ccd_list}")
         ptnr1_ccd_id = chainId_to_ccd_list[ptnr1_label_asym_id][int(ptnr1_label_seq_id) - 1]
         ptnr2_ccd_id = chainId_to_ccd_list[ptnr2_label_asym_id][int(ptnr2_label_seq_id) - 1]
-        assert ptnr1_ccd_id == ptnr1_label_comp_id and ptnr2_ccd_id == ptnr2_label_comp_id
-      except:
+
+
+        # renamed ligand residues
+
+
+        if ptnr1_ccd_id != ptnr1_label_comp_id:
+           logging.warning(f"Find ligand residue: {ptnr1_label_comp_id} -> {ptnr1_ccd_id}")
+           #ptnr1_label_comp_id = ptnr1_ccd_id
+
+        if ptnr2_ccd_id != ptnr2_label_comp_id:
+           logging.warning(f"Find ligand residue: {ptnr2_label_comp_id} -> {ptnr2_ccd_id}")
+           #ptnr2_label_comp_id = ptnr2_ccd_id
+
+      except ValueError as e:
         ## some convalent-bond from mmcif is misslead, pass it.
+        logging.warning(f'Error occurred during covalent bond processing: {e}')
         continue
+
+
+      
+      if ptnr1_ccd_id in ccd_preprocessed_dict:
+        ptnr1_ccd_atoms_list = ccd_preprocessed_dict[ptnr1_ccd_id]['atom_ids']
+      else:
+        ptnr1_ccd_atoms_list = ccd_id2atom_ids[ptnr1_ccd_id]
+        
+      logging.debug(f'{ptnr1_ccd_id=}: {ptnr1_ccd_atoms_list=}')
       
-      ptnr1_ccd_atoms_list = ccd_preprocessed_dict[ptnr1_ccd_id]['atom_ids']
-      ptnr2_ccd_atoms_list = ccd_preprocessed_dict[ptnr2_ccd_id]['atom_ids']
+      if ptnr2_ccd_id in ccd_preprocessed_dict:
+        ptnr2_ccd_atoms_list = ccd_preprocessed_dict[ptnr2_ccd_id]['atom_ids']
+      else:
+        ptnr2_ccd_atoms_list  = ccd_id2atom_ids[ptnr2_ccd_id]
+
+      logging.debug(f'{ptnr2_ccd_id=}: {ptnr2_ccd_atoms_list=}')
 
       if ptnr1_ccd_id in ccd_standard_set:  
           ## if ccd_id is in the standard residue in HF3 (table 13), we didn't have to map to atom-leval index
diff --git a/apps/protein_folding/helixfold3/helixfold/data/pipeline_token_feature.py b/apps/protein_folding/helixfold3/helixfold/data/pipeline_token_feature.py
index 7bbb1805..92ea0e1e 100644
--- a/apps/protein_folding/helixfold3/helixfold/data/pipeline_token_feature.py
+++ b/apps/protein_folding/helixfold3/helixfold/data/pipeline_token_feature.py
@@ -3,16 +3,16 @@
 import collections
 import os
 import time
-from typing import MutableMapping, Optional, List
+from typing import Any, MutableMapping, Optional, List
 from absl import logging
 from helixfold.common import residue_constants
 from helixfold.data import parsers
+from helixfold.data.pipeline_conf_bonds import load_ccd_dict
 import numpy as np
 import json
-import gzip
-import pickle
+
 from rdkit import Chem
- 
+
 FeatureDict = MutableMapping[str, np.ndarray]
 ELEMENT_MAPPING = Chem.GetPeriodicTable()
  
@@ -56,6 +56,15 @@ def flatten_is_protein_features(is_protein_feats: np.ndarray) -> FeatureDict:
  
   return res
  
+
+def dump_all_ccd_keys(ccd_data: dict[str, Any]):
+  ccd_keys_file='all_ccd_keys.txt'
+  if not os.path.isfile(ccd_keys_file):
+      open(ccd_keys_file, 'w').write('\n'.join(ccd_data.keys()))
+  logging.warning(f'All ccd keys are dumped to {ccd_keys_file}')
+  return ccd_keys_file
+  
+   
 def make_sequence_features(
     all_chain_info, ccd_preprocessed_dict, 
     extra_feats: Optional[dict]=None) -> FeatureDict:
@@ -106,13 +115,18 @@ def make_sequence_features(
     sym_id = chainid_to_sym_id[_alphabet_chain_id]
     for residue_id, ccd_id in enumerate(ccd_seq):
       if ccd_id not in ccd_preprocessed_dict:
-        assert not extra_feats is None and ccd_id in extra_feats,\
-                  f'<{ccd_id}> not in ccd_preprocessed_dict, But got extra_feats is None'
+        if extra_feats is None:
+          ccd_kf=dump_all_ccd_keys(ccd_preprocessed_dict)
+          raise ValueError(f'<{ccd_id}> not in ccd_preprocessed_dict, But got extra_feats is None. See all keys in {ccd_kf}')
+        if ccd_id not in extra_feats:
+          ccd_kf=dump_all_ccd_keys(ccd_preprocessed_dict)
+          raise ValueError(f'<{ccd_id}> not in ccd_preprocessed_dict or extra_feats.  See all keys in {ccd_kf}')
         _ccd_feats = extra_feats[ccd_id]
       else:
         _ccd_feats = ccd_preprocessed_dict[ccd_id]
       num_atoms = len(_ccd_feats['position'])
-      assert num_atoms > 0, f'TODO filter - Got CCD <{ccd_id}>: 0 atom nums.'
+      if num_atoms == 0:
+         raise NotImplementedError(f'TODO filter - Got CCD <{ccd_id}>: 0 atom nums.')
       
       if ccd_id not in residue_constants.STANDARD_LIST: 
           features['asym_id'].append(np.array([chain_num_id] * num_atoms, dtype=np.int32))
@@ -197,13 +211,8 @@ def process(self,
       assembly_dict = unit_dict
 
     if ccd_preprocessed_dict is None:
-      ccd_preprocessed_dict = {}
-      st_1 = time.time()
-      if 'pkl.gz' in self.ccd_preprocessed_path:
-          with gzip.open(self.ccd_preprocessed_path, "rb") as fp:
-              ccd_preprocessed_dict = pickle.load(fp)
-      logging.info(f'load ccd dataset done. use {time.time()-st_1}s')
- 
+      ccd_preprocessed_dict=load_ccd_dict(self.ccd_preprocessed_path)
+
     if select_mmcif_chainID is not None:
       select_mmcif_chainID = set(select_mmcif_chainID)
  
diff --git a/apps/protein_folding/helixfold3/helixfold/data/templates.py b/apps/protein_folding/helixfold3/helixfold/data/templates.py
index f2e3289a..f9efdae0 100644
--- a/apps/protein_folding/helixfold3/helixfold/data/templates.py
+++ b/apps/protein_folding/helixfold3/helixfold/data/templates.py
@@ -817,19 +817,15 @@ def _process_single_hit(
           TemplateAtomMaskAllZerosError) as e:
     # These 3 errors indicate missing mmCIF experimental data rather than a
     # problem with the template search, so turn them into warnings.
-    warning = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
-               '%s, mmCIF parsing errors: %s'
-               % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
-                  str(e), parsing_result.errors))
+    warning = (f'{hit_pdb_code}_{hit_chain_id} (sum_probs: {hit.sum_probs}, rank: {hit.index}): feature extracting errors: '
+               f'{str(e)}, mmCIF parsing errors: {parsing_result.errors}')
     if strict_error_check:
       return SingleHitResult(features=None, error=warning, warning=None)
     else:
       return SingleHitResult(features=None, error=None, warning=warning)
   except Error as e:
-    error = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
-             '%s, mmCIF parsing errors: %s'
-             % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
-                str(e), parsing_result.errors))
+    error = (f'{hit_pdb_code}_{hit_chain_id} (sum_probs: {hit.sum_probs}, rank: {hit.index}): feature extracting errors: '
+               f'{str(e)}, mmCIF parsing errors: {parsing_result.errors}')
     return SingleHitResult(features=None, error=error, warning=None)
 
 
diff --git a/apps/protein_folding/helixfold3/helixfold/data/tools/utils.py b/apps/protein_folding/helixfold3/helixfold/data/tools/utils.py
index c415dac8..c588187d 100644
--- a/apps/protein_folding/helixfold3/helixfold/data/tools/utils.py
+++ b/apps/protein_folding/helixfold3/helixfold/data/tools/utils.py
@@ -16,7 +16,7 @@
 
 import contextlib
 import shutil
-import logging
+from absl import logging
 import tempfile
 import time
 from typing import Optional
diff --git a/apps/protein_folding/helixfold3/infer_scripts/feature_processing_aa.py b/apps/protein_folding/helixfold3/infer_scripts/feature_processing_aa.py
index b89f43f4..19797a64 100644
--- a/apps/protein_folding/helixfold3/infer_scripts/feature_processing_aa.py
+++ b/apps/protein_folding/helixfold3/infer_scripts/feature_processing_aa.py
@@ -2,7 +2,10 @@
 import collections
 import copy
 import os
-import time, gzip, pickle
+from pathlib import Path
+import pickle
+from typing import List, Mapping, Optional, Tuple
+
 import numpy as np
 import logging
 from helixfold.common import residue_constants
@@ -11,8 +14,10 @@
 from helixfold.data import pipeline_rna_multimer
 from helixfold.data import pipeline_conf_bonds, pipeline_token_feature, pipeline_hybrid
 from helixfold.data import label_utils
-from concurrent.futures import ProcessPoolExecutor, as_completed
-from .preprocess import digit2alphabet
+
+from helixfold.data.tools import utils
+
+from .preprocess import Entity, digit2alphabet
 
 logger = logging.getLogger(__file__)
 
@@ -20,20 +25,6 @@
 STRING_FEATURES = ['all_chain_ids', 'all_ccd_ids','all_atom_ids', 
                   'release_date','label_ccd_ids','label_atom_ids']
 
-def load_ccd_dict(ccd_preprocessed_path):
-    assert os.path.exists(ccd_preprocessed_path),\
-              (f'[CCD] ccd_preprocessed_path: {ccd_preprocessed_path} not exist.')
-    st_1 = time.time()
-    if 'pkl.gz' in ccd_preprocessed_path:
-        with gzip.open(ccd_preprocessed_path, "rb") as fp:
-            ccd_preprocessed_dict = pickle.load(fp)
-    elif '.pkl' in ccd_preprocessed_path:
-        with open(ccd_preprocessed_path, "rb") as fp:
-            ccd_preprocessed_dict = pickle.load(fp)
-    print(f'[CCD] load ccd dataset done. use {time.time()-st_1}s;'\
-                    f'Has length of {len(ccd_preprocessed_dict)}')
-    
-    return ccd_preprocessed_dict
 
 
 def crop_msa(feat, max_msa_depth=16384):
@@ -233,7 +224,7 @@ def get_inference_restype_mask(all_chain_features, ccd_preprocessed_dict, extra_
   }
 
 
-def add_assembly_features(all_chain_features, ccd_preprocessed_dict, no_msa_templ_feats=True):
+def add_assembly_features(all_chain_features: Mapping, ccd_preprocessed_dict: Mapping, no_msa_templ_feats:bool=True, covalent_bonds:Optional[List[pipeline_conf_bonds.CovalentBond]]=None):
   '''
     ## NOTE: keep the type and chainID orders.
     all_chain_features: {
@@ -307,7 +298,7 @@ def add_assembly_features(all_chain_features, ccd_preprocessed_dict, no_msa_temp
   ref_features = pipeline_conf_bonds.make_ccd_conf_features(all_chain_info=new_order_chain_infos,
                                                       ccd_preprocessed_dict=ccd_preprocessed_dict,
                                                       extra_feats=extra_feats_infos)
-  bond_features = pipeline_conf_bonds.make_bond_features(covalent_bond=[], 
+  bond_features = pipeline_conf_bonds.make_bond_features(covalent_bond=covalent_bonds, 
                                                       all_chain_info=new_order_chain_infos, 
                                                       ccd_preprocessed_dict=ccd_preprocessed_dict,
                                                       extra_feats=extra_feats_infos)
@@ -376,40 +367,36 @@ def process_chain_msa(args):
     return chain_id, raw_features, desc, seq
 
 
-def process_input_json(all_entitys, ccd_preprocessed_path, 
+def process_input_json(all_entitys: List[Entity], ccd_preprocessed_path, 
                           msa_templ_data_pipeline_dict, msa_output_dir,
                           no_msa_templ_feats=False):
 
     ## load ccd dict.
-    ccd_preprocessed_dict = load_ccd_dict(ccd_preprocessed_path)
+    ccd_preprocessed_dict = pipeline_conf_bonds.load_ccd_dict(ccd_preprocessed_path)
     all_chain_features = {}
     sequence_features = {} 
     num_chains = 0
-    for entity_items in all_entitys:
+    for entity in all_entitys:
+      if (dtype:=entity.dtype) not in residue_constants.CHAIN_type_order:
+        continue
       # dtype(protein, dna, rna, ligand): no_chains,  msa_seqs, seqs
-      dtype = list(entity_items.keys())[0]
-      items = list(entity_items.values())[0]
-      entity_count = items['count']
-      ccd_seqs = items['seqs']
-      msa_seqs = items['msa_seqs']
-      extra_mol_infos = items.get('extra_mol_infos', {}) ## dict, 「extra-add, ccd_id」: ccd_features.
-
-      for i in range(entity_count):
+
+      for i in range(entity.count):
         chain_num_ids = num_chains + i
         chain_id = digit2alphabet(chain_num_ids) # increase ++
         type_chain_id = dtype + '_' + chain_id
-        if ccd_seqs in sequence_features:
-          all_chain_features[type_chain_id] = copy.deepcopy(sequence_features[ccd_seqs])
+        if entity.seqs in sequence_features:
+          all_chain_features[type_chain_id] = copy.deepcopy(sequence_features[entity.seqs])
           continue
         
-        ccd_list = parsers.parse_ccd_fasta(ccd_seqs)
+        ccd_list = parsers.parse_ccd_fasta(entity.seqs)
         chain_features = {'msa_templ_feats': {},
                           'ccd_seqs': ccd_list, 
-                          'msa_seqs': msa_seqs,
-                          'extra_feats': extra_mol_infos}
+                          'msa_seqs': entity.msa_seqs,
+                          'extra_feats': entity.extra_mol_infos}
         all_chain_features[type_chain_id] = chain_features
-        sequence_features[ccd_seqs] = chain_features
-      num_chains += entity_count
+        sequence_features[entity.seqs] = chain_features
+      num_chains += entity.count
 
     if not no_msa_templ_feats:
       ## 1. get all msa_seqs for protein/rna MSA/Template search. Only for protein/rna.
@@ -472,8 +459,12 @@ def process_input_json(all_entitys, ccd_preprocessed_path,
           for _type_chain_id in fasta_seq_to_type_chain_id[fasta_seq]:
             chain_features['msa_templ_feats'] = copy.deepcopy(seqs_to_msa_features[fasta_seq])
 
+
+    # gather all defined covalent bonds
+    all_covalent_bonds=[bond for entity in all_entitys for bond in entity.msa_seqs if entity.dtype == 'bond']
+
     assert num_chains == len(all_chain_features.keys())
-    all_feats = add_assembly_features(all_chain_features, ccd_preprocessed_dict, no_msa_templ_feats)
+    all_feats = add_assembly_features(all_chain_features, ccd_preprocessed_dict, no_msa_templ_feats, all_covalent_bonds)
     np_example, label = all_feats['feats'], all_feats['label']
     assert num_chains == len(np.unique(np_example['all_chain_ids']))
 
diff --git a/apps/protein_folding/helixfold3/infer_scripts/preprocess.py b/apps/protein_folding/helixfold3/infer_scripts/preprocess.py
index 41cd44ac..467b9d8b 100644
--- a/apps/protein_folding/helixfold3/infer_scripts/preprocess.py
+++ b/apps/protein_folding/helixfold3/infer_scripts/preprocess.py
@@ -15,11 +15,17 @@
 import subprocess
 import tempfile
 import itertools
-sys.path.append('../')
+from absl import logging
+from typing import List, Optional, Tuple, Union, Mapping, Literal, Callable, Any
+from dataclasses import dataclass, field
 import rdkit
 from rdkit import Chem
 from rdkit.Chem import AllChem
 from helixfold.common import residue_constants
+from helixfold.data.pipeline_conf_bonds import CovalentBond, parse_covalent_bond_input
+from helixfold.data.tools import utils
+
+from openbabel import openbabel
 
 
 ## NOTE: this mapping is only useful for standard dna/rna/protein sequence input.
@@ -140,22 +146,76 @@ def smiles_to_ETKDGMol(smiles):
     return optimal_mol_wo_H
 
 
-def smiles_toMol_obabel(smiles):
-    """
-        generate mol from smiles using obabel;
-    """    
-    with tempfile.NamedTemporaryFile(suffix=".mol2") as temp_file:
-        print(f"[OBABEL] Temporary file created: {temp_file.name}")
-        obabel_cmd = f"{OBABEL_BIN} -:'{smiles}' -omol2 -O{temp_file.name} --gen3d"
+class Mol2MolObabel:
+    def __init__(self):
+        self.obabel_bin = os.getenv('OBABEL_BIN')
+        if not (self.obabel_bin and os.path.isfile(self.obabel_bin)):
+            raise FileNotFoundError(f'Cannot find obabel binary at {self.obabel_bin}.')
+        
+        # Get the supported formats
+        self.supported_formats: Tuple[str] = self._get_supported_formats()
+
+    def _get_supported_formats(self) -> Tuple[str]:
+        """
+        Retrieves the list of supported formats from obabel and filters out write-only formats.
+        
+        Returns:
+            tuple: A tuple of supported input formats.
+        """
+        obabel_cmd = f"{self.obabel_bin} -L formats"
         ret = subprocess.run(obabel_cmd, shell=True, capture_output=True, text=True)
-        mol = Chem.MolFromMol2File(temp_file.name, sanitize=False)
-        if '3D coordinate generation failed' in ret.stderr:
+        formats = [line.split()[0] for line in ret.stdout.splitlines() if '[Write-only]' not in line]
+        formats.append('smiles')
+        
+        return tuple(formats)
+    
+    def _load_mol(self, mol2_file:str, ret:Optional[subprocess.CompletedProcess]=None) -> Chem.Mol:
+        mol = Chem.MolFromMol2File(mol2_file, sanitize=False)
+        if isinstance(ret, subprocess.CompletedProcess) and '3D coordinate generation failed' in ret.stderr:
             mol = generate_ETKDGv3_conformer(mol)
         optimal_mol_wo_H = Chem.RemoveAllHs(mol, sanitize=False)
-    return optimal_mol_wo_H
 
+        return optimal_mol_wo_H
 
-def polymer_convert(items):
+    def _perform_conversion(self, input_type: str, input_value: str, generate_3d: bool=True) -> Chem.Mol:
+        if input_type == 'mol2' and input_value.endswith('.mol2'):
+            return self._load_mol(mol2_file=input_value)
+        
+        save_path=os.path.join('ligands',f'{os.path.basename(input_value)[:-(len(input_type)+1)] if input_type != "smiles" else "UNK"}.mol2')
+
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        
+        with utils.timing(f'converting {input_type} to mol2: {input_value}'):
+            if input_type == 'smiles':
+                obabel_cmd = f"{self.obabel_bin} -:'{input_value}' -omol2 -O{save_path} {'--gen3d' if generate_3d else ''}"
+                if len(input_value)>60:
+                    logging.warning(f'This takes a while ...')
+            else:
+                obabel_cmd = f"{self.obabel_bin} -i {input_type} {input_value} -omol2 -O{save_path} {'--gen3d' if generate_3d else ''}"
+            logging.debug(f'Launching command: `{obabel_cmd}`')
+            ret = subprocess.run(obabel_cmd, shell=True, capture_output=True, text=True)
+            return self._load_mol(mol2_file=save_path, ret=ret)
+            
+    def _convert_to_mol(self, input_type: str, input_value: str, generate_3d: bool=True) -> Chem.Mol:
+        if input_type not in self.supported_formats:
+            raise ValueError(f'Unsupported small molecule input: {input_type}. \nSupported formats: \n{self.supported_formats}\n')
+
+        if input_type != 'smiles' and not os.path.isfile(input_value):
+            raise FileNotFoundError(f'Cannot find the {input_type.upper()} file at {input_value}.')
+        
+        return self._perform_conversion(input_type, input_value, generate_3d)
+
+    __call__: Callable[[str, str, bool], Chem.Mol] = _convert_to_mol
+
+@dataclass
+class Entity:
+    dtype: Literal['protein', 'dna', 'rna', 'ligand', 'bond','non_polymer', 'ion']
+    seqs: str
+    msa_seqs: Union[str, List[CovalentBond]] = ''
+    count: int = 1
+    extra_mol_infos: dict[str, Any] = field(default_factory=dict)
+
+def polymer_convert(items)-> Entity:
     """
         "type": "protein",                          
         "sequence": "GPDSMEEVVVPEEPPKLVSALATYVQQERLCTMFLSIANKLLPLKP",  
@@ -178,18 +238,21 @@ def polymer_convert(items):
             raise ValueError(f'not support for the {dtype} in polymer_convert')
     ccd_seqs = ''.join(ccd_seqs) ## (GLY)(ALA).....
 
-    # repeat_ccds, repeat_fasta = [ccd_seqs], [msa_seqs]
-    return {
-        dtype: {
-            'seqs': ccd_seqs,
-            'msa_seqs': msa_seqs,
-            'count': count,
-            'extra_mol_infos': {}
-        }
-    }
+    return Entity(dtype=dtype, seqs=ccd_seqs, msa_seqs=msa_seqs,count=count)
+
+
+def covalent_bond_convert(items: Mapping[str, Union[int, str]]) -> Entity:
+    """
+        "type": "bond",
+        "bond": "A,ASN,74,ND2,B,UNK-,"
+    """
+    dtype = items['type']
+    bond = parse_covalent_bond_input(items['bond'])
+
+    return Entity(dtype=dtype, seqs='', msa_seqs=bond)
 
 
-def ligand_convert(items):
+def ligand_convert(items: Mapping[str, Union[int, str]]) -> Entity:
     """
         "type": "ligand",
         "ccd": "ATP", or "smiles": "CCccc(O)ccc",
@@ -197,33 +260,36 @@ def ligand_convert(items):
     """
     dtype = items['type']
     count = items['count']
+    converter=Mol2MolObabel()
     
     msa_seqs = ""
     _ccd_seqs = []
     ccd_to_extra_mol_infos = {}
     if 'ccd' in items:
         _ccd_seqs.append(f"({items['ccd']})")
-    elif 'smiles' in items:
-        _ccd_seqs.append(f"(UNK-)")
+
+    
+    elif any(f in items for f in converter.supported_formats):
+        for k in converter.supported_formats:
+            if k in items:
+                break
+
+        ligand_name="UNK-"
+        _ccd_seqs.append(f"({ligand_name})")
         # mol_wo_h = smiles_to_ETKDGMol(items['smiles'])
-        mol_wo_h = smiles_toMol_obabel(items['smiles'])
+        
+        mol_wo_h = converter(k, items[k], items.get('use_3d', True))
         _extra_mol_infos = make_basic_info_fromMol(mol_wo_h)
         ccd_to_extra_mol_infos = {
-            "UNK-": _extra_mol_infos
+            ligand_name: _extra_mol_infos
         }
     else:
-        raise ValueError(f'not support for the {dtype} in ligand_convert')
+        raise ValueError(f'not support for the {dtype} in ligand_convert, please check the input. \nSupported input: {converter.supported_formats}')
     ccd_seqs = ''.join(_ccd_seqs) ## (GLY)(ALA).....
 
     # repeat_ccds, repeat_fasta = [ccd_seqs], [msa_seqs]
-    return {
-        'ligand': {
-            'seqs': ccd_seqs,
-            'msa_seqs': msa_seqs,
-            'count': count,
-            'extra_mol_infos': ccd_to_extra_mol_infos,
-        }
-    }
+    return Entity(dtype='ligand', seqs=ccd_seqs,msa_seqs=msa_seqs,count=count,extra_mol_infos=ccd_to_extra_mol_infos)
+
 
 
 def entities_rename_and_filter(items):
@@ -231,39 +297,34 @@ def entities_rename_and_filter(items):
         'ion': 'ligand'
     }
     items['type'] = ligand_mapping.get(items['type'], items['type'])
-    if items['type'] not in ALLOWED_ENTITY_TYPE:
+    if items['type'] not in ALLOWED_ENTITY_TYPE and items['type'] != 'bond':
         raise ValueError(f'{items["type"]} is not allowed, will be ignored.')
     return items
 
 
-def modify_name_convert(entities: list):
+def modify_name_convert(entities: list[Entity]):
     cur_idx = 0
-    for entity_items in entities:
+    for entity in entities:
         # dtype(protein, dna, rna, ligand): no_chains,  msa_seqs, seqs
-        dtype = list(entity_items.keys())[0]
-        items = list(entity_items.values())[0]
-        entity_count = items['count']
-        msa_seqs = items['msa_seqs']
-        extra_mol_infos = items.get('extra_mol_infos', {}) ## dict, 「extra-add, ccd_id」: ccd_features.
 
-        extra_ccd_ids = list(extra_mol_infos.keys())
         ## rename UNK- to UNK-1, 2, 3, 4...
-        for k in extra_ccd_ids:
+        extra_mol_infos=copy.deepcopy(entity.extra_mol_infos)
+        for k in extra_mol_infos:
             user_name_3 = USER_LIG_IDS_3[cur_idx]
-            items['seqs'] = items['seqs'].replace('UNK-', user_name_3)
-            extra_mol_infos[user_name_3] = extra_mol_infos.pop('UNK-')
+            entity.seqs = entity.seqs.replace('UNK-', user_name_3)
+            entity.extra_mol_infos[user_name_3] = entity.extra_mol_infos.pop('UNK-')
             cur_idx += 1
 
     return entities
 
 
-def online_json_to_entity(json_path, out_dir):
+def online_json_to_entity(json_path: str, out_dir: str)-> list[Entity]:
     obj = read_json(json_path)
     entities = copy.deepcopy(obj['entities'])
 
     os.makedirs(out_dir, exist_ok=True)
     error_ids = []
-    success_entity = []
+    success_entity: list[Entity] = []
     for idx, items in enumerate(entities):
         try: 
             items = entities_rename_and_filter(items)
@@ -275,6 +336,8 @@ def online_json_to_entity(json_path, out_dir):
         try:
             if items['type'] == 'ligand':
                 json_obj = ligand_convert(items)
+            elif items['type'] == 'bond':
+                json_obj = covalent_bond_convert(items)
             else:
                 json_obj = polymer_convert(items)
             success_entity.append(json_obj)
@@ -289,5 +352,4 @@ def online_json_to_entity(json_path, out_dir):
     if len(error_ids) > 0:
         raise RuntimeError(f'[Error] Failed to convert {len(error_ids)}/{len(entities)} entities')    
     
-    success_entity = modify_name_convert(success_entity)
-    return success_entity
\ No newline at end of file
+    return modify_name_convert(success_entity)
\ No newline at end of file
diff --git a/apps/protein_folding/helixfold3/inference.py b/apps/protein_folding/helixfold3/inference.py
index 51cf6ec6..429809bb 100644
--- a/apps/protein_folding/helixfold3/inference.py
+++ b/apps/protein_folding/helixfold3/inference.py
@@ -23,9 +23,9 @@
 import pickle
 import pathlib
 import shutil
-import logging
 import numpy as np
 from helixfold.common import all_atom_pdb_save
+from helixfold.data.pipeline_conf_bonds import load_ccd_dict
 from helixfold.model import config, utils
 from helixfold.data import pipeline_parallel as pipeline
 from helixfold.data import pipeline_multimer_parallel as pipeline_multimer
@@ -95,19 +95,30 @@ def preprocess_json_entity(json_path, out_dir):
 def convert_to_json_compatible(obj):
     if isinstance(obj, np.ndarray):
         return obj.tolist()
-    elif isinstance(obj, np.integer):
+    if isinstance(obj, np.integer):
         return int(obj)
-    elif isinstance(obj, np.floating):
+    if isinstance(obj, np.floating):
         return float(obj)
-    elif isinstance(obj, dict):
+    if isinstance(obj, dict):
         return {k: convert_to_json_compatible(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
+    if isinstance(obj, list):
         return [convert_to_json_compatible(i) for i in obj]
-    else:
-        return obj
-    
-def get_msa_templates_pipeline(args) -> Dict:
-    use_precomputed_msas = True # FLAGS.use_precomputed_msas
+    return obj
+
+def resolve_bin_path(cfg_path: str, default_binary_name: str)-> str:
+    """Helper function to resolve the binary path."""
+    if cfg_path and os.path.isfile(cfg_path):
+        return cfg_path
+
+    if cfg_val:=shutil.which(default_binary_name):
+        logging.warning(f'Using resolved {default_binary_name}: {cfg_val}')
+        return cfg_val
+
+    raise FileNotFoundError(f"Could not find a proper binary path for {default_binary_name}: {cfg_path}.")
+
+def get_msa_templates_pipeline(cfg: DictConfig) -> Dict:
+    use_precomputed_msas = True  # Assuming this is a constant or should be set globally
+
     template_searcher = hmmsearch.Hmmsearch(
         binary_path=args.hmmsearch_binary_path,
         hmmbuild_binary_path=args.hmmbuild_binary_path,
diff --git a/apps/protein_folding/helixfold3/utils/model.py b/apps/protein_folding/helixfold3/utils/model.py
index 67b36128..4a5b2d65 100644
--- a/apps/protein_folding/helixfold3/utils/model.py
+++ b/apps/protein_folding/helixfold3/utils/model.py
@@ -17,7 +17,6 @@
 import numpy as np
 import paddle
 import paddle.nn as nn
-import logging
 import io
 
 from helixfold.model import modules_all_atom