Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error position detention at WRITE PERM #357

Merged
merged 4 commits into from
Aug 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-centos7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ jobs:
uses: actions/checkout@v1
- name: Build LTFS
id: build
uses: LinearTapeFileSystem/CentOS7-Build@v1.3
uses: LinearTapeFileSystem/CentOS7-Build@v1.4
with:
destination: '/tmp/ltfs'
2 changes: 1 addition & 1 deletion .github/workflows/build-centos8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ jobs:
uses: actions/checkout@v1
- name: Build LTFS
id: build
uses: LinearTapeFileSystem/CentOS8-Build@v1.5
uses: LinearTapeFileSystem/CentOS8-Build@v1.6
with:
destination: '/tmp/ltfs'
3 changes: 2 additions & 1 deletion messages/iosched_unified/root.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ root:table {
13022W:string { "Freeing a dentry priv with outstanding write requests. This is a bug." }
//unused 13023W:string { "Failed to copy a request: will stop writing file to the index partition." }
13024I:string { "Clean up extents and append index at index partition (%d)." }
13025I:string { "Get error position (%d, %d)." }
13025I:string { "Truncate extents larger than position (%d, %lld), block size = %ld." }
13026E:string { "Write perm handling error : %s (%d)." }
13027I:string { "Error position is larger than last index position: (%d, %lld), last index = %lld." }
}
}
3 changes: 2 additions & 1 deletion messages/libltfs/root.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ root:table {
11332I:string { "Load successful." }
11333I:string { "A cartridge with write-perm error is detected on %s. Seek the newest index (IP: Gen = %llu, VCR = %llu) (DP: Gen = %llu, VCR = %llu) (VCR = %llu)." }
11334I:string { "Remove extent : %s (%llu, %llu)." }
11335D:string { "Get physical block position (%d - %d)." }
//unused 11335D:string { "Get physical block position (%d - %d)." }
11336I:string { "The attribute does not exist. Ignore the expected error." }
11337I:string { "Update index-dirty flag (%d) - %s (0x%p)." }
11338I:string { "Syncing index of %s %s." }
Expand Down Expand Up @@ -833,6 +833,7 @@ v
17289I:string { "Skip parsing the final index on IP." }
17290I:string { "Partitioning the medium with the destructive method." }
17291I:string { "Unpartitioning the medium with the destructive method." }
17292I:string { "Current position is (%llu, %llu), Error position is (%llu, %llu)." }

// For Debug 19999I:string { "%s %s %d." }

Expand Down
18 changes: 16 additions & 2 deletions src/iosched/unified.c
Original file line number Diff line number Diff line change
Expand Up @@ -2250,6 +2250,7 @@ int _unified_write_index_after_perm(int write_ret, struct unified_data *priv)
{
int ret = 0;
struct tc_position err_pos;
uint64_t last_index_pos = UINT64_MAX;
unsigned long blocksize;

if (!IS_WRITE_PERM(-write_ret)) {
Expand All @@ -2263,14 +2264,27 @@ int _unified_write_index_after_perm(int write_ret, struct unified_data *priv)
ltfsmsg(LTFS_ERR, 13026E, "update MAM", ret);

blocksize = ltfs_get_blocksize(priv->vol);
ret = tape_get_physical_block_position(priv->vol->device, &err_pos);

ret = tape_get_first_untransfered_position(priv->vol->device, &err_pos);
if (ret < 0) {
ltfsmsg(LTFS_ERR, 13026E, "get error pos", ret);
return ret;
}

ltfsmsg(LTFS_INFO, 13025I, (int)err_pos.block, (int)blocksize);
/* Check the err_pos is larger than the last index position of the partition */
if (err_pos.partition == ltfs_part_id2num(priv->vol->label->partid_ip, priv->vol)) {
last_index_pos = priv->vol->ip_coh.set_id;
} else {
last_index_pos = priv->vol->dp_coh.set_id;
}

if (last_index_pos > err_pos.block) {
MayraPD marked this conversation as resolved.
Show resolved Hide resolved
ltfsmsg(LTFS_INFO, 13027I, (int)err_pos.partition,
(unsigned long long)err_pos.block, (unsigned long long)last_index_pos);
err_pos.block = last_index_pos + 1;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@piste-jp-ibm So if last_index_pos is "the end of the partition" you are moving error_pos to the end of the partition?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is first block of the index. But it is enough because every extents must not have that block at all.

So LTFS sets the last position on tape to last_index_pos + 1 and cleanup extents. As a result, all extents after the last index will be removed.

}

ltfsmsg(LTFS_INFO, 13025I, (int)err_pos.partition, (unsigned long long)err_pos.block, blocksize);
ret = ltfs_fsraw_cleanup_extent(priv->vol->index->root, err_pos, blocksize, priv->vol);
if (ret < 0) {
ltfsmsg(LTFS_ERR, 13026E, "extent cleanup", ret);
Expand Down
22 changes: 20 additions & 2 deletions src/libltfs/ltfs_fsops_raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,9 @@ int ltfs_fsraw_add_extent(struct dentry *d, struct extent_info *ext, bool update
int ltfs_fsraw_cleanup_extent(struct dentry *d, struct tc_position err_pos, unsigned long blocksize, struct ltfs_volume *vol)
{
int ret = 0;
struct name_list *entry, *tmp;
struct name_list *entry, *tmp;
struct extent_info *ext, *preventry;
struct tc_position extent_last = {0, 0, UINT32_MAX, false, false};

if (HASH_COUNT(d->child_list) != 0) {
HASH_ITER(hh, d->child_list, entry, tmp) {
Expand All @@ -453,7 +454,24 @@ int ltfs_fsraw_cleanup_extent(struct dentry *d, struct tc_position err_pos, unsi
}
else {
TAILQ_FOREACH_REVERSE_SAFE(ext, &entry->d->extentlist, extent_struct, list, preventry) {
if (err_pos.block <= (ext->start.block + ext->bytecount/blocksize)) {
if (ext->start.block && ext->bytecount) {
extent_last.partition = ltfs_part_id2num(ext->start.partition, vol);
/* Calculate the last block of this extent */
extent_last.block = ext->start.block + (ext->bytecount / blocksize);
Copy link

@MayraPD MayraPD Aug 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@piste-jp-ibm so LTFS always uses fixed length never variable length right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, a extent is constructed from one or more 512KB fixed blocks and no or one variable block.

if ( (ext->bytecount % blocksize) == 0 )
extent_last.block--;
} else {
extent_last.partition = UINT32_MAX;
extent_last.block = 0;
}

/*
* err_pos has the first block number that tape drive has it on buffer
* but not transferred to the medium.
* It means position (err_pos-1) is the last block on the medium.
*/
if ( extent_last.partition == err_pos.partition && err_pos.block <= extent_last.block ) {

ltfsmsg(LTFS_INFO, 11334I, entry->name, (unsigned long long)ext->start.block, (unsigned long long)ext->bytecount);

ret = ltfs_get_volume_lock(false, vol);
Expand Down
16 changes: 9 additions & 7 deletions src/libltfs/tape.c
Original file line number Diff line number Diff line change
Expand Up @@ -1122,30 +1122,32 @@ int tape_update_position(struct device_data *dev, struct tc_position *pos)
return 0;
}

int tape_get_physical_block_position(struct device_data *dev, struct tc_position *pos)
int tape_get_first_untransfered_position(struct device_data *dev, struct tc_position *pos)
{
int ret;
unsigned int block;

CHECK_ARG_NULL(dev, -LTFS_NULL_ARG);
CHECK_ARG_NULL(pos, -LTFS_NULL_ARG);

/* Update current position, just in case. Because no penalty here */
ret = dev->backend->readpos(dev->backend_data, &dev->position);
if (ret < 0) {
ltfsmsg(LTFS_ERR, 17132E);
return ret;
}

ret = dev->backend->get_block_in_buffer(dev->backend_data, &block);
/* Capture first untransferred position */
ret = dev->backend->get_next_block_to_xfer(dev->backend_data, pos);
if (ret < 0) {
ltfsmsg(LTFS_ERR, 17132E);
return ret;
}

memcpy(pos, &dev->position, sizeof(struct tc_position));

ltfsmsg(LTFS_DEBUG, 11335D, (int)pos->block, block);
pos->block -= block;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@piste-jp-ibm at first glance I would have thought on this line as the guilty of the overwriting of the previous block

Copy link
Member Author

@piste-jp piste-jp Aug 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. this line is one of the root cause. But logic itself is correct. The idea how to fetch the error position is bad.

We thought that the only way to fetch the error position is subtract number of records in buffer from the current position. But this idea is not good when a WRITE PERM happens just after a locate (for append). Because the drive mould have records which is read by the locate in buffer.

Actually, we realized error position it self is provided by READ_POSITION command.

ltfsmsg(LTFS_INFO, 17292I,
(unsigned long long)dev->position.partition,
(unsigned long long)dev->position.block,
(unsigned long long)pos->partition,
(unsigned long long)pos->block);

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/libltfs/tape.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ int tape_update_position(struct device_data *dev, struct tc_position *pos);
int tape_seek(struct device_data *dev, struct tc_position *pos);
int tape_seek_eod(struct device_data *dev, tape_partition_t partition);
int tape_seek_append_position(struct device_data *dev, tape_partition_t prt, bool unlock_write);
int tape_get_physical_block_position(struct device_data *dev, struct tc_position *pos);
int tape_get_first_untransfered_position(struct device_data *dev, struct tc_position *pos);

int tape_spacefm(struct device_data *dev, int count);
ssize_t tape_read(struct device_data *dev, char *buf, size_t count, const bool unusual_size,
Expand Down
6 changes: 3 additions & 3 deletions src/libltfs/tape_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -915,12 +915,12 @@ struct tape_ops {
int (*set_profiler)(void *device, char *work_dir, bool enable);

/**
* Get block number stored in the drive buffer
* Get first block number which is not transferred to the medium yet in the buffer
* @param device A pointer to the tape device
* @param block Number of blocks stored in the drive buffer
* @param pos Position of the record that is not transferred yet in the buffer
* @return 0 on success or a negative value on error
*/
int (*get_block_in_buffer)(void *device, unsigned int *block);
int (*get_next_block_to_xfer)(void *device, struct tc_position *pos);

/**
* Check if the generation of tape drive and the current loaded cartridge is read-only combination
Expand Down
13 changes: 7 additions & 6 deletions src/tape_drivers/freebsd/cam/cam_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1193,10 +1193,10 @@ int camtape_unload(void *device, struct tc_position *pos)
}

/*
* Get the number of blocks in the buffer on the tape drive after a write. Eventually it would
* Get the first block position that is not transferred to the medium. Eventually it would
* be nice to include this in status information returned from the sa(4) driver.
*/
static int camtape_get_block_in_buffer(void *device, uint32_t *block)
static int camtape_get_next_block_to_xfer(void *device, struct tc_position *pos)
{
int rc;
union ccb *ccb = NULL;
Expand Down Expand Up @@ -1240,9 +1240,10 @@ static int camtape_get_block_in_buffer(void *device, uint32_t *block)
if (rc != DEVICE_GOOD)
camtape_process_errors(softc, rc, msg, "READPOS", true);
else {
*block = scsi_3btoul(ext_data.num_objects);
ltfsmsg(LTFS_DEBUG, 30398D, "blocks-in-buffer",
(unsigned long long) *block, 0, 0, softc->drive_serial);
pos->partition = ext_data.partition;
pos->block = scsi_8btou64(ext_data.last_object)
ltfsmsg(LTFS_DEBUG, 30398D, "next-block-to-xfer",
(unsigned long long) pos->block, 0, 0, softc->drive_serial);
}

bailout:
Expand Down Expand Up @@ -4170,7 +4171,7 @@ struct tape_ops camtape_drive_handler = {
.get_serialnumber = camtape_get_serialnumber,
.get_info = camtape_get_info,
.set_profiler = camtape_set_profiler,
.get_block_in_buffer = camtape_get_block_in_buffer,
.get_next_block_to_xfer = camtape_get_next_block_to_xfer,
.is_readonly = camtape_is_readonly
};

Expand Down
16 changes: 10 additions & 6 deletions src/tape_drivers/generic/file/filedebug_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,8 @@ int filedebug_write(void *device, const char *buf, size_t count, struct tc_posit
return -EDEV_WRITE_PERM;
} else if ( state->write_counter > (state->force_writeperm - THRESHOLD_FORCE_WRITE_NO_WRITE) ) {
ltfsmsg(LTFS_INFO, 30019I);
pos->block++;
++state->current_position.block;
pos->block = state->current_position.block;
return DEVICE_GOOD;
}
}
Expand Down Expand Up @@ -1872,8 +1873,7 @@ int filedebug_set_xattr(void *device, const char *name, const char *buf, size_t
state->force_writeperm = perm_count;
state->clear_by_pc = false;
}
if (state->force_writeperm && state->force_writeperm < THRESHOLD_FORCE_WRITE_NO_WRITE)
state->force_writeperm = THRESHOLD_FORCE_WRITE_NO_WRITE;

state->write_counter = 0;
ret = DEVICE_GOOD;
} else if (! strcmp(name, "ltfs.vendor.IBM.forceErrorType")) {
Expand Down Expand Up @@ -2776,9 +2776,13 @@ int filedebug_set_profiler(void *device, char *work_dir, bool enable)
return 0;
}

int filedebug_get_block_in_buffer(void *device, unsigned int *block)
int filedebug_get_next_block_to_xfer(void *device, struct tc_position *pos)
{
*block = 0;
struct filedebug_data *state = (struct filedebug_data *)device;

pos->partition = state->current_position.partition;
pos->block = state->current_position.block - THRESHOLD_FORCE_WRITE_NO_WRITE;

return 0;
}

Expand Down Expand Up @@ -2837,7 +2841,7 @@ struct tape_ops filedebug_handler = {
.get_serialnumber = filedebug_get_serialnumber,
.get_info = filedebug_get_info,
.set_profiler = filedebug_set_profiler,
.get_block_in_buffer = filedebug_get_block_in_buffer,
.get_next_block_to_xfer = filedebug_get_next_block_to_xfer,
.is_readonly = filedebug_is_readonly,
};

Expand Down
8 changes: 4 additions & 4 deletions src/tape_drivers/generic/itdtimg/itdtimg_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1457,10 +1457,10 @@ int itdtimage_set_profiler(void *device, char *work_dir, bool enable)
return 0;
}

int itdtimage_get_block_in_buffer(void *device, unsigned int *block)
int itdtimage_get_next_block_to_xfer(void *device, struct tc_position *pos)
{
*block = 0;
return 0;
/* This backend never accept write command */
return -EDEV_WRITE_PROTECTED;;
}

/* Local functions */
Expand Down Expand Up @@ -1664,7 +1664,7 @@ struct tape_ops itdtimage_handler = {
.get_serialnumber = itdtimage_get_serialnumber,
.get_info = itdtimage_get_info,
.set_profiler = itdtimage_set_profiler,
.get_block_in_buffer = itdtimage_get_block_in_buffer,
.get_next_block_to_xfer = itdtimage_get_next_block_to_xfer,
.is_readonly = itdtimage_is_readonly,
};

Expand Down
11 changes: 6 additions & 5 deletions src/tape_drivers/linux/lin_tape/lin_tape_ibmtape.c
Original file line number Diff line number Diff line change
Expand Up @@ -2067,7 +2067,7 @@ int lin_tape_ibmtape_unload(void *device, struct tc_position *pos)
}
}

int lin_tape_ibmtape_get_block_in_buffer(void *device, uint32_t *block)
int lin_tape_ibmtape_get_next_block_to_xfer(void *device, struct tc_position *pos)
{
int rc;
char *msg;
Expand All @@ -2077,12 +2077,13 @@ int lin_tape_ibmtape_get_block_in_buffer(void *device, uint32_t *block)
ltfs_profiler_add_entry(priv->profiler, NULL, TAPEBEND_REQ_ENTER(REQ_TC_READPOS));
memset(&rp, 0, sizeof(struct read_tape_position));

rp.data_format = RP_SHORT_FORM;
rp.data_format = RP_EXTENDED_FORM;

rc = _sioc_stioc_command(device, STIOC_READ_POSITION_EX, "READPOS SHORT", &rp, &msg);
rc = _sioc_stioc_command(device, STIOC_READ_POSITION_EX, "READPOS EXT", &rp, &msg);

if (rc == DEVICE_GOOD) {
*block = (uint32_t)ltfs_betou32(rp.rp_data.rp_short.num_buffer_logical_obj);
pos->partition = rp.rp_data.rp_extended.active_partition;
pos->block = ltfs_betou64(rp.rp_data.rp_extended.last_logical_obj_position);
}
else {
lin_tape_ibmtape_process_errors(device, rc, msg, "get block in buf", true);
Expand Down Expand Up @@ -4419,7 +4420,7 @@ struct tape_ops lin_tape_ibmtape_drive_handler = {
.get_serialnumber = lin_tape_ibmtape_get_serialnumber,
.get_info = lin_tape_ibmtape_get_info,
.set_profiler = lin_tape_ibmtape_set_profiler,
.get_block_in_buffer = lin_tape_ibmtape_get_block_in_buffer,
.get_next_block_to_xfer = lin_tape_ibmtape_get_next_block_to_xfer,
.is_readonly = lin_tape_ibmtape_is_readonly,
};

Expand Down
18 changes: 11 additions & 7 deletions src/tape_drivers/linux/sg/sg_tape.c
Original file line number Diff line number Diff line change
Expand Up @@ -2749,7 +2749,7 @@ int sg_readpos(void *device, struct tc_position *pos)
struct sg_data *priv = (struct sg_data*)device;

sg_io_hdr_t req;
unsigned char cdb[CDB6_LEN];
unsigned char cdb[CDB10_LEN];
unsigned char sense[MAXSENSE];
int timeout;
char cmd_desc[COMMAND_DESCRIPTION_LENGTH] = "READPOS";
Expand Down Expand Up @@ -4956,14 +4956,14 @@ int sg_set_profiler(void *device, char *work_dir, bool enable)
return rc;
}

int sg_get_block_in_buffer(void *device, uint32_t *block)
int sg_get_next_block_to_xfer(void *device, struct tc_position *pos)
{
int ret = -EDEV_UNKNOWN;
int ret_ep = DEVICE_GOOD;
struct sg_data *priv = (struct sg_data*)device;

sg_io_hdr_t req;
unsigned char cdb[CDB6_LEN];
unsigned char cdb[CDB10_LEN];
unsigned char sense[MAXSENSE];
int timeout;
char cmd_desc[COMMAND_DESCRIPTION_LENGTH] = "READPOS";
Expand All @@ -4972,6 +4972,8 @@ int sg_get_block_in_buffer(void *device, uint32_t *block)

ltfs_profiler_add_entry(priv->profiler, NULL, TAPEBEND_REQ_ENTER(REQ_TC_READPOS));

memset(pos, 0, sizeof(struct tc_position));

/* Zero out the CDB and the result buffer */
ret = init_sg_io_header(&req);
if (ret < 0)
Expand All @@ -4983,6 +4985,7 @@ int sg_get_block_in_buffer(void *device, uint32_t *block)
/* Build CDB */
cdb[0] = READ_POSITION;
cdb[1] = 0x08; /* Extended Format */
ltfs_u16tobe(cdb + 7, sizeof(buf)); /* allocation length */

timeout = get_timeout(priv->timeouts, cdb[0]);
if (timeout < 0)
Expand All @@ -5001,10 +5004,11 @@ int sg_get_block_in_buffer(void *device, uint32_t *block)

ret = sg_issue_cdb_command(&priv->dev, &req, &msg);
if (ret == DEVICE_GOOD) {
*block = (buf[5] << 16) + (buf[6] << 8) + (int)buf[7];
pos->partition = (tape_partition_t)buf[1];
pos->block = ltfs_betou64(buf + 16);

ltfsmsg(LTFS_DEBUG, 30398D, "blocks-in-buffer",
(unsigned long long)*block, (unsigned long long)0, (unsigned long long)0, priv->drive_serial);
ltfsmsg(LTFS_DEBUG, 30398D, "next-block-to-xfer",
(unsigned long long)pos->partition, (unsigned long long)pos->block, (unsigned long long)0, priv->drive_serial);
} else {
ret_ep = _process_errors(device, ret, msg, cmd_desc, true, true);
if (ret_ep < 0)
Expand Down Expand Up @@ -5201,7 +5205,7 @@ struct tape_ops sg_handler = {
.get_serialnumber = sg_get_serialnumber,
.get_info = sg_get_info,
.set_profiler = sg_set_profiler,
.get_block_in_buffer = sg_get_block_in_buffer,
.get_next_block_to_xfer = sg_get_next_block_to_xfer,
.is_readonly = sg_is_readonly,
};

Expand Down
Loading