@@ -556,4 +556,132 @@ func Test_write_file_encoding()
556556 % bw !
557557endfunc
558558
559+ " Test for writing and reading a file starting with a BOM.
560+ " Byte Order Mark (BOM) character for various encodings is below:
561+ " UTF-8 : EF BB BF
562+ " UTF-16 (BE): FE FF
563+ " UTF-16 (LE): FF FE
564+ " UTF-32 (BE): 00 00 FE FF
565+ " UTF-32 (LE): FF FE 00 00
566+ func Test_readwrite_file_with_bom ()
567+ let utf8_bom = " \xEF\xBB\xBF "
568+ let utf16be_bom = " \xFE\xFF "
569+ let utf16le_bom = " \xFF\xFE "
570+ let utf32be_bom = " \n\n \xFE\xFF "
571+ let utf32le_bom = " \xFF\xFE \n\n "
572+ let save_fileencoding = &fileencoding
573+ set cpoptions += S
574+
575+ " Check that editing a latin1 file doesn't see a BOM
576+ call writefile ([" \xFE\xFE latin-1" ], ' Xtest1' )
577+ edit Xtest1
578+ call assert_equal (' latin1' , &fileencoding )
579+ call assert_equal (0 , &bomb )
580+ set fenc = latin1
581+ write Xfile2
582+ call assert_equal ([" \xFE\xFE latin-1" , ' ' ], readfile (' Xfile2' , ' b' ))
583+ set bomb fenc = latin1
584+ write Xtest3
585+ call assert_equal ([" \xFE\xFE latin-1" , ' ' ], readfile (' Xtest3' , ' b' ))
586+ set bomb &
587+
588+ " Check utf-8 BOM
589+ % bw !
590+ call writefile ([utf8_bom .. " utf-8" ], ' Xtest1' )
591+ edit ! Xtest1
592+ call assert_equal (' utf-8' , &fileencoding )
593+ call assert_equal (1 , &bomb )
594+ call assert_equal (' utf-8' , getline (1 ))
595+ set fenc = latin1
596+ write ! Xfile2
597+ call assert_equal ([' utf-8' , ' ' ], readfile (' Xfile2' , ' b' ))
598+ set fenc = utf- 8
599+ w ! Xtest3
600+ call assert_equal ([utf8_bom .. " utf-8" , ' ' ], readfile (' Xtest3' , ' b' ))
601+
602+ " Check utf-8 with an error (will fall back to latin-1)
603+ % bw !
604+ call writefile ([utf8_bom .. " utf-8\x80 err" ], ' Xtest1' )
605+ edit ! Xtest1
606+ call assert_equal (' latin1' , &fileencoding )
607+ call assert_equal (0 , &bomb )
608+ call assert_equal (" \xC3\xAF\xC2\xBB\xC2\xBF utf-8\xC2\x80 err" , getline (1 ))
609+ set fenc = latin1
610+ write ! Xfile2
611+ call assert_equal ([utf8_bom .. " utf-8\x80 err" , ' ' ], readfile (' Xfile2' , ' b' ))
612+ set fenc = utf- 8
613+ w ! Xtest3
614+ call assert_equal ([" \xC3\xAF\xC2\xBB\xC2\xBF utf-8\xC2\x80 err" , ' ' ],
615+ \ readfile (' Xtest3' , ' b' ))
616+
617+ " Check ucs-2 BOM
618+ % bw !
619+ call writefile ([utf16be_bom .. " \n u\n c\n s\n -\n 2\n " ], ' Xtest1' )
620+ edit ! Xtest1
621+ call assert_equal (' utf-16' , &fileencoding )
622+ call assert_equal (1 , &bomb )
623+ call assert_equal (' ucs-2' , getline (1 ))
624+ set fenc = latin1
625+ write ! Xfile2
626+ call assert_equal ([" ucs-2" , ' ' ], readfile (' Xfile2' , ' b' ))
627+ set fenc = ucs- 2
628+ w ! Xtest3
629+ call assert_equal ([utf16be_bom .. " \n u\n c\n s\n -\n 2\n " , ' ' ],
630+ \ readfile (' Xtest3' , ' b' ))
631+
632+ " Check ucs-2le BOM
633+ % bw !
634+ call writefile ([utf16le_bom .. " u\n c\n s\n -\n 2\n l\n e\n " ], ' Xtest1' )
635+ " Need to add a NUL byte after the NL byte
636+ call writefile (0z00 , ' Xtest1' , ' a' )
637+ edit ! Xtest1
638+ call assert_equal (' utf-16le' , &fileencoding )
639+ call assert_equal (1 , &bomb )
640+ call assert_equal (' ucs-2le' , getline (1 ))
641+ set fenc = latin1
642+ write ! Xfile2
643+ call assert_equal ([" ucs-2le" , ' ' ], readfile (' Xfile2' , ' b' ))
644+ set fenc = ucs- 2 le
645+ w ! Xtest3
646+ call assert_equal ([utf16le_bom .. " u\n c\n s\n -\n 2\n l\n e\n " , " \n " ],
647+ \ readfile (' Xtest3' , ' b' ))
648+
649+ " Check ucs-4 BOM
650+ % bw !
651+ call writefile ([utf32be_bom .. " \n\n\n u\n\n\n c\n\n\n s\n\n\n -\n\n\n 4\n\n\n " ], ' Xtest1' )
652+ edit ! Xtest1
653+ call assert_equal (' ucs-4' , &fileencoding )
654+ call assert_equal (1 , &bomb )
655+ call assert_equal (' ucs-4' , getline (1 ))
656+ set fenc = latin1
657+ write ! Xfile2
658+ call assert_equal ([" ucs-4" , ' ' ], readfile (' Xfile2' , ' b' ))
659+ set fenc = ucs- 4
660+ w ! Xtest3
661+ call assert_equal ([utf32be_bom .. " \n\n\n u\n\n\n c\n\n\n s\n\n\n -\n\n\n 4\n\n\n " , ' ' ], readfile (' Xtest3' , ' b' ))
662+
663+ " Check ucs-4le BOM
664+ % bw !
665+ call writefile ([utf32le_bom .. " u\n\n\n c\n\n\n s\n\n\n -\n\n\n 4\n\n\n l\n\n\n e\n\n\n " ], ' Xtest1' )
666+ " Need to add three NUL bytes after the NL byte
667+ call writefile (0z000000 , ' Xtest1' , ' a' )
668+ edit ! Xtest1
669+ call assert_equal (' ucs-4le' , &fileencoding )
670+ call assert_equal (1 , &bomb )
671+ call assert_equal (' ucs-4le' , getline (1 ))
672+ set fenc = latin1
673+ write ! Xfile2
674+ call assert_equal ([" ucs-4le" , ' ' ], readfile (' Xfile2' , ' b' ))
675+ set fenc = ucs- 4 le
676+ w ! Xtest3
677+ call assert_equal ([utf32le_bom .. " u\n\n\n c\n\n\n s\n\n\n -\n\n\n 4\n\n\n l\n\n\n e\n\n\n " , " \n\n\n " ], readfile (' Xtest3' , ' b' ))
678+
679+ set cpoptions -= S
680+ let &fileencoding = save_fileencoding
681+ call delete (' Xtest1' )
682+ call delete (' Xtest2' )
683+ call delete (' Xtest3' )
684+ % bw !
685+ endfunc
686+
559687" vim: shiftwidth = 2 sts = 2 expandtab
0 commit comments