-
Notifications
You must be signed in to change notification settings - Fork 499
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
stability: retry truncating sst files upon failure #484
Changes from 9 commits
00a8e59
b34437a
30dca2c
f9c1cb9
e12ac8e
4bd7fba
7b2b98a
40336a1
88ca8b6
9a06a75
fe1172a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,13 +14,21 @@ | |
package ops | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
"github.com/pingcap/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
const ( | ||
retryLimit = 15 | ||
maxSSTFilesToTruncate = 20 | ||
) | ||
|
||
type TruncateOptions struct { | ||
Namespace string | ||
Cluster string | ||
|
@@ -32,7 +40,7 @@ type TiKVOps struct { | |
} | ||
|
||
func (ops *TiKVOps) TruncateSSTFile(opts TruncateOptions) error { | ||
glog.Infof("truncate sst option: %+v", opts) | ||
logHdr := fmt.Sprintf("store: %s cluster: [%s/%s] ", opts.Store, opts.Namespace, opts.Cluster) | ||
|
||
tc, err := ops.PingcapV1alpha1().TidbClusters(opts.Namespace).Get(opts.Cluster, metav1.GetOptions{}) | ||
if err != nil { | ||
|
@@ -54,41 +62,61 @@ func (ops *TiKVOps) TruncateSSTFile(opts TruncateOptions) error { | |
}) | ||
} | ||
|
||
stdout, stderr, err := exec("find", "/var/lib/tikv/db", "-name", "*.sst", "-o", "-name", "*.save") | ||
if err != nil { | ||
glog.Errorf("list sst files: stderr=%s err=%s", stderr, err.Error()) | ||
return errors.Annotate(err, "list sst files") | ||
} | ||
retryCount := 0 | ||
for ; retryCount < retryLimit; retryCount++ { | ||
if retryCount > 0 { | ||
time.Sleep(10 * time.Second) | ||
} | ||
stdout, stderr, err := exec("find", "/var/lib/tikv/db", "-name", "*.sst", "-o", "-name", "*.save") | ||
if err != nil { | ||
glog.Warningf(logHdr+"list sst files: stderr=%s err=%s", stderr, err.Error()) | ||
continue | ||
} | ||
|
||
sstCandidates := make(map[string]bool) | ||
sstCandidates := make(map[string]bool) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. delete the blanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the one more indent because it's in retry loop now. |
||
|
||
for _, f := range strings.Split(stdout, "\n") { | ||
f = strings.TrimSpace(f) | ||
if len(f) > 0 { | ||
sstCandidates[f] = true | ||
for _, f := range strings.Split(stdout, "\n") { | ||
f = strings.TrimSpace(f) | ||
if len(f) > 0 { | ||
sstCandidates[f] = true | ||
} | ||
} | ||
} | ||
|
||
sst := "" | ||
for k := range sstCandidates { | ||
if strings.HasSuffix(k, ".sst") && !sstCandidates[k+".save"] { | ||
sst = k | ||
ssts := make([]string, 0, maxSSTFilesToTruncate) | ||
for k := range sstCandidates { | ||
if len(ssts) >= maxSSTFilesToTruncate { | ||
break | ||
} | ||
if strings.HasSuffix(k, ".sst") && !sstCandidates[k+".save"] { | ||
ssts = append(ssts, k) | ||
} | ||
} | ||
if len(ssts) == 0 { | ||
glog.Warning(logHdr + "cannot find a sst file") | ||
continue | ||
} | ||
} | ||
if len(sst) == 0 { | ||
return errors.New("cannot find a sst file") | ||
} | ||
|
||
_, stderr, err = exec("cp", sst, sst+".save") | ||
if err != nil { | ||
glog.Errorf("backup sst file: stderr=%s err=%s", stderr, err.Error()) | ||
return errors.Annotate(err, "backup sst file") | ||
truncated := 0 | ||
for _, sst := range ssts { | ||
_, stderr, err = exec("sh", "-c", | ||
fmt.Sprintf("cp %s %s.save && truncate -s 0 %s", sst, sst, sst)) | ||
if err != nil { | ||
glog.Warningf(logHdr+"truncate sst file: sst=%s stderr=%s err=%s", sst, stderr, err.Error()) | ||
continue | ||
} | ||
truncated++ | ||
} | ||
if truncated == 0 { | ||
glog.Warningf(logHdr + "no sst file has been truncated") | ||
continue | ||
} | ||
|
||
glog.Infof(logHdr+"%d sst files got truncated", truncated) | ||
break | ||
} | ||
|
||
_, stderr, err = exec("truncate", "-s", "0", sst) | ||
if err != nil { | ||
glog.Errorf("truncate sst file: stderr=%s err=%s", stderr, err.Error()) | ||
return errors.Annotate(err, "truncate sst file") | ||
if retryCount == retryLimit { | ||
return errors.New("failed to truncate sst file after " + strconv.Itoa(retryLimit) + " trials") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the error log needs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added annotations to log methods. however, it's caller who pass the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but the caller has not added these fields to error log too There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added some error logs to the case func. If you think every error should be annotated with additional fields, it would be better to create an another PR to do it, because most of methods of |
||
} | ||
|
||
return nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.