Skip to content

Commit ee2c77a

Browse files
KirCutej2rong4cnPIKACHUIM
authored
fix(archive/zip): user specific encoding for non-EFS zips (#1599)
* fix(archive/zip): user specific encoding for non-EFS zips * fix(stream): simplify head cache initialization and improve reader retrieval logic * fix: support multipart zips (.z01) * chore(deps): update github.com/KirCute/zip to v1.0.1 --------- Co-authored-by: j2rong4cn <j2rong@qq.com> Co-authored-by: Pikachu Ren <40362270+PIKACHUIM@users.noreply.github.com>
1 parent fc90ec1 commit ee2c77a

File tree

11 files changed

+189
-209
lines changed

11 files changed

+189
-209
lines changed

go.mod

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.23.4
55
require (
66
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.1
77
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.2
8+
github.com/KirCute/zip v1.0.1
89
github.com/OpenListTeam/go-cache v0.1.0
910
github.com/OpenListTeam/sftpd-openlist v1.0.1
1011
github.com/OpenListTeam/tache v0.2.1
@@ -57,7 +58,6 @@ require (
5758
github.com/pquerna/otp v1.5.0
5859
github.com/quic-go/quic-go v0.54.1
5960
github.com/rclone/rclone v1.70.3
60-
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
6161
github.com/shirou/gopsutil/v4 v4.25.5
6262
github.com/sirupsen/logrus v1.9.3
6363
github.com/spf13/afero v1.14.0
@@ -68,7 +68,6 @@ require (
6868
github.com/u2takey/ffmpeg-go v0.5.0
6969
github.com/upyun/go-sdk/v3 v3.0.4
7070
github.com/winfsp/cgofuse v1.6.0
71-
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9
7271
github.com/zzzhr1990/go-common-entity v0.0.0-20250202070650-1a200048f0d3
7372
golang.org/x/crypto v0.40.0
7473
golang.org/x/image v0.29.0

go.sum

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
3939
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
4040
github.com/Da3zKi7/saferith v0.33.0-fixed h1:fnIWTk7EP9mZAICf7aQjeoAwpfrlCrkOvqmi6CbWdTk=
4141
github.com/Da3zKi7/saferith v0.33.0-fixed/go.mod h1:QKJhjoqUtBsXCAVEjw38mFqoi7DebT7kthcD7UzbnoA=
42+
github.com/KirCute/zip v1.0.1 h1:L/tVZglOiDVKDi9Ud+fN49htgKdQ3Z0H80iX8OZk13c=
43+
github.com/KirCute/zip v1.0.1/go.mod h1:xhF7dCB+Bjvy+5a56lenYCKBsH+gxDNPZSy5Cp+nlXk=
4244
github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
4345
github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
4446
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd h1:nzE1YQBdx1bq9IlZinHa+HVffy+NmVRoKr+wHN8fpLE=
@@ -639,8 +641,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
639641
github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk=
640642
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8=
641643
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8=
642-
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
643-
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
644644
github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4 h1:PT+ElG/UUFMfqy5HrxJxNzj3QBOf7dZwupeVC+mG1Lo=
645645
github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4/go.mod h1:MnkX001NG75g3p8bhFycnyIjeQoOjGL6CEIsdE/nKSY=
646646
github.com/shabbyrobe/gocovmerge v0.0.0-20230507112040-c3350d9342df h1:S77Pf5fIGMa7oSwp8SQPp7Hb4ZiI38K3RNBKD2LLeEM=
@@ -713,8 +713,6 @@ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavM
713713
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
714714
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
715715
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
716-
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 h1:K8gF0eekWPEX+57l30ixxzGhHH/qscI3JCnuhbN6V4M=
717-
github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9/go.mod h1:9BnoKCcgJ/+SLhfAXj15352hTOuVmG5Gzo8xNRINfqI=
718716
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
719717
github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA=
720718
github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=

internal/archive/rardecode/rardecode.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"io"
55
"os"
66
"path/filepath"
7+
"regexp"
78
"strings"
89

910
"github.com/OpenListTeam/OpenList/v4/internal/archive/tool"
@@ -21,7 +22,7 @@ func (RarDecoder) AcceptedExtensions() []string {
2122

2223
func (RarDecoder) AcceptedMultipartExtensions() map[string]tool.MultipartExtension {
2324
return map[string]tool.MultipartExtension{
24-
".part1.rar": {".part%d.rar", 2},
25+
".part1.rar": {regexp.MustCompile("^.*\\.part(\\d+)\\.rar$"), 2},
2526
}
2627
}
2728

internal/archive/sevenzip/sevenzip.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package sevenzip
22

33
import (
44
"io"
5+
"regexp"
56
"strings"
67

78
"github.com/OpenListTeam/OpenList/v4/internal/archive/tool"
@@ -18,7 +19,7 @@ func (SevenZip) AcceptedExtensions() []string {
1819

1920
func (SevenZip) AcceptedMultipartExtensions() map[string]tool.MultipartExtension {
2021
return map[string]tool.MultipartExtension{
21-
".7z.001": {".7z.%.3d", 2},
22+
".7z.001": {regexp.MustCompile("^.*\\.7z\\.(\\d+)$"), 2},
2223
}
2324
}
2425

internal/archive/tool/base.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@ package tool
22

33
import (
44
"io"
5+
"regexp"
56

67
"github.com/OpenListTeam/OpenList/v4/internal/model"
78
"github.com/OpenListTeam/OpenList/v4/internal/stream"
89
)
910

1011
type MultipartExtension struct {
11-
PartFileFormat string
12+
PartFileFormat *regexp.Regexp
1213
SecondPartIndex int
1314
}
1415

internal/archive/zip/utils.go

Lines changed: 48 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,15 @@ import (
44
"bytes"
55
"io"
66
"io/fs"
7-
stdpath "path"
87
"strings"
98

9+
"github.com/KirCute/zip"
1010
"github.com/OpenListTeam/OpenList/v4/internal/archive/tool"
11+
"github.com/OpenListTeam/OpenList/v4/internal/conf"
1112
"github.com/OpenListTeam/OpenList/v4/internal/errs"
13+
"github.com/OpenListTeam/OpenList/v4/internal/setting"
1214
"github.com/OpenListTeam/OpenList/v4/internal/stream"
13-
"github.com/saintfish/chardet"
14-
"github.com/yeka/zip"
15-
"golang.org/x/text/encoding"
16-
"golang.org/x/text/encoding/charmap"
17-
"golang.org/x/text/encoding/japanese"
18-
"golang.org/x/text/encoding/korean"
19-
"golang.org/x/text/encoding/simplifiedchinese"
20-
"golang.org/x/text/encoding/traditionalchinese"
21-
"golang.org/x/text/encoding/unicode"
22-
"golang.org/x/text/encoding/unicode/utf32"
15+
"golang.org/x/text/encoding/ianaindex"
2316
"golang.org/x/text/transform"
2417
)
2518

@@ -37,22 +30,23 @@ func (r *WrapReader) Files() []tool.SubFile {
3730

3831
type WrapFileInfo struct {
3932
fs.FileInfo
33+
efs bool
4034
}
4135

4236
func (f *WrapFileInfo) Name() string {
43-
return decodeName(f.FileInfo.Name())
37+
return decodeName(f.FileInfo.Name(), f.efs)
4438
}
4539

4640
type WrapFile struct {
4741
f *zip.File
4842
}
4943

5044
func (f *WrapFile) Name() string {
51-
return decodeName(f.f.Name)
45+
return decodeName(f.f.Name, isEFS(f.f.Flags))
5246
}
5347

5448
func (f *WrapFile) FileInfo() fs.FileInfo {
55-
return &WrapFileInfo{FileInfo: f.f.FileInfo()}
49+
return &WrapFileInfo{FileInfo: f.f.FileInfo(), efs: isEFS(f.f.Flags)}
5650
}
5751

5852
func (f *WrapFile) Open() (io.ReadCloser, error) {
@@ -67,16 +61,33 @@ func (f *WrapFile) SetPassword(password string) {
6761
f.f.SetPassword(password)
6862
}
6963

70-
func getReader(ss []*stream.SeekableStream) (*zip.Reader, error) {
71-
if len(ss) > 1 && stdpath.Ext(ss[1].GetName()) == ".z01" {
72-
// FIXME: Incorrect parsing method for standard multipart zip format
73-
ss = append(ss[1:], ss[0])
74-
}
75-
reader, err := stream.NewMultiReaderAt(ss)
64+
func makePart(ss *stream.SeekableStream) (zip.SizeReaderAt, error) {
65+
ra, err := stream.NewReadAtSeeker(ss, 0)
7666
if err != nil {
7767
return nil, err
7868
}
79-
return zip.NewReader(reader, reader.Size())
69+
return &inlineSizeReaderAt{ReaderAt: ra, size: ss.GetSize()}, nil
70+
}
71+
72+
func (z *Zip) getReader(ss []*stream.SeekableStream) (*zip.Reader, error) {
73+
if len(ss) > 1 && z.traditionalSecondPartRegExp.MatchString(ss[1].GetName()) {
74+
ss = append(ss[1:], ss[0])
75+
ras := make([]zip.SizeReaderAt, 0, len(ss))
76+
for _, s := range ss {
77+
ra, err := makePart(s)
78+
if err != nil {
79+
return nil, err
80+
}
81+
ras = append(ras, ra)
82+
}
83+
return zip.NewMultipartReader(ras)
84+
} else {
85+
reader, err := stream.NewMultiReaderAt(ss)
86+
if err != nil {
87+
return nil, err
88+
}
89+
return zip.NewReader(reader, reader.Size())
90+
}
8091
}
8192

8293
func filterPassword(err error) error {
@@ -86,110 +97,29 @@ func filterPassword(err error) error {
8697
return err
8798
}
8899

89-
func decodeName(name string) string {
90-
b := []byte(name)
91-
detector := chardet.NewTextDetector()
92-
results, err := detector.DetectAll(b)
93-
if err != nil {
100+
func decodeName(name string, efs bool) string {
101+
if efs {
94102
return name
95103
}
96-
var ce, re, enc encoding.Encoding
97-
for _, r := range results {
98-
if r.Confidence > 30 {
99-
ce = getCommonEncoding(r.Charset)
100-
if ce != nil {
101-
break
102-
}
103-
}
104-
if re == nil {
105-
re = getEncoding(r.Charset)
106-
}
107-
}
108-
if ce != nil {
109-
enc = ce
110-
} else if re != nil {
111-
enc = re
112-
} else {
104+
enc, err := ianaindex.IANA.Encoding(setting.GetStr(conf.NonEFSZipEncoding))
105+
if err != nil {
113106
return name
114107
}
115-
i := bytes.NewReader(b)
108+
i := bytes.NewReader([]byte(name))
116109
decoder := transform.NewReader(i, enc.NewDecoder())
117110
content, _ := io.ReadAll(decoder)
118111
return string(content)
119112
}
120113

121-
func getCommonEncoding(name string) (enc encoding.Encoding) {
122-
switch name {
123-
case "UTF-8":
124-
enc = unicode.UTF8
125-
case "UTF-16LE":
126-
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
127-
case "Shift_JIS":
128-
enc = japanese.ShiftJIS
129-
case "GB-18030":
130-
enc = simplifiedchinese.GB18030
131-
case "EUC-KR":
132-
enc = korean.EUCKR
133-
case "Big5":
134-
enc = traditionalchinese.Big5
135-
default:
136-
enc = nil
137-
}
138-
return
139-
}
140-
141-
func getEncoding(name string) (enc encoding.Encoding) {
142-
switch name {
143-
case "UTF-8":
144-
enc = unicode.UTF8
145-
case "UTF-16BE":
146-
enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
147-
case "UTF-16LE":
148-
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
149-
case "UTF-32BE":
150-
enc = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM)
151-
case "UTF-32LE":
152-
enc = utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM)
153-
case "ISO-8859-1":
154-
enc = charmap.ISO8859_1
155-
case "ISO-8859-2":
156-
enc = charmap.ISO8859_2
157-
case "ISO-8859-3":
158-
enc = charmap.ISO8859_3
159-
case "ISO-8859-4":
160-
enc = charmap.ISO8859_4
161-
case "ISO-8859-5":
162-
enc = charmap.ISO8859_5
163-
case "ISO-8859-6":
164-
enc = charmap.ISO8859_6
165-
case "ISO-8859-7":
166-
enc = charmap.ISO8859_7
167-
case "ISO-8859-8":
168-
enc = charmap.ISO8859_8
169-
case "ISO-8859-8-I":
170-
enc = charmap.ISO8859_8I
171-
case "ISO-8859-9":
172-
enc = charmap.ISO8859_9
173-
case "windows-1251":
174-
enc = charmap.Windows1251
175-
case "windows-1256":
176-
enc = charmap.Windows1256
177-
case "KOI8-R":
178-
enc = charmap.KOI8R
179-
case "Shift_JIS":
180-
enc = japanese.ShiftJIS
181-
case "GB-18030":
182-
enc = simplifiedchinese.GB18030
183-
case "EUC-JP":
184-
enc = japanese.EUCJP
185-
case "EUC-KR":
186-
enc = korean.EUCKR
187-
case "Big5":
188-
enc = traditionalchinese.Big5
189-
case "ISO-2022-JP":
190-
enc = japanese.ISO2022JP
191-
default:
192-
enc = nil
193-
}
194-
return
114+
func isEFS(flags uint16) bool {
115+
return (flags & 0x800) > 0
116+
}
117+
118+
type inlineSizeReaderAt struct {
119+
io.ReaderAt
120+
size int64
121+
}
122+
123+
func (i *inlineSizeReaderAt) Size() int64 {
124+
return i.size
195125
}

0 commit comments

Comments
 (0)