1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4 */
5
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9
nvme_revalidate_zones(struct nvme_ns * ns)10 int nvme_revalidate_zones(struct nvme_ns *ns)
11 {
12 struct request_queue *q = ns->queue;
13
14 blk_queue_chunk_sectors(q, ns->zsze);
15 blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
16
17 return blk_revalidate_disk_zones(ns->disk, NULL);
18 }
19
nvme_set_max_append(struct nvme_ctrl * ctrl)20 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
21 {
22 struct nvme_command c = { };
23 struct nvme_id_ctrl_zns *id;
24 int status;
25
26 id = kzalloc(sizeof(*id), GFP_KERNEL);
27 if (!id)
28 return -ENOMEM;
29
30 c.identify.opcode = nvme_admin_identify;
31 c.identify.cns = NVME_ID_CNS_CS_CTRL;
32 c.identify.csi = NVME_CSI_ZNS;
33
34 status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
35 if (status) {
36 kfree(id);
37 return status;
38 }
39
40 if (id->zasl)
41 ctrl->max_zone_append = 1 << (id->zasl + 3);
42 else
43 ctrl->max_zone_append = ctrl->max_hw_sectors;
44 kfree(id);
45 return 0;
46 }
47
nvme_update_zone_info(struct nvme_ns * ns,unsigned lbaf)48 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
49 {
50 struct nvme_effects_log *log = ns->head->effects;
51 struct request_queue *q = ns->queue;
52 struct nvme_command c = { };
53 struct nvme_id_ns_zns *id;
54 int status;
55
56 /* Driver requires zone append support */
57 if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
58 NVME_CMD_EFFECTS_CSUPP)) {
59 if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
60 dev_warn(ns->ctrl->device,
61 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
62 ns->head->ns_id);
63 } else {
64 set_bit(NVME_NS_FORCE_RO, &ns->flags);
65 dev_warn(ns->ctrl->device,
66 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
67 ns->head->ns_id);
68 }
69
70 /* Lazily query controller append limit for the first zoned namespace */
71 if (!ns->ctrl->max_zone_append) {
72 status = nvme_set_max_append(ns->ctrl);
73 if (status)
74 return status;
75 }
76
77 id = kzalloc(sizeof(*id), GFP_KERNEL);
78 if (!id)
79 return -ENOMEM;
80
81 c.identify.opcode = nvme_admin_identify;
82 c.identify.nsid = cpu_to_le32(ns->head->ns_id);
83 c.identify.cns = NVME_ID_CNS_CS_NS;
84 c.identify.csi = NVME_CSI_ZNS;
85
86 status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
87 if (status)
88 goto free_data;
89
90 /*
91 * We currently do not handle devices requiring any of the zoned
92 * operation characteristics.
93 */
94 if (id->zoc) {
95 dev_warn(ns->ctrl->device,
96 "zone operations:%x not supported for namespace:%u\n",
97 le16_to_cpu(id->zoc), ns->head->ns_id);
98 status = -ENODEV;
99 goto free_data;
100 }
101
102 ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
103 if (!is_power_of_2(ns->zsze)) {
104 dev_warn(ns->ctrl->device,
105 "invalid zone size:%llu for namespace:%u\n",
106 ns->zsze, ns->head->ns_id);
107 status = -ENODEV;
108 goto free_data;
109 }
110
111 disk_set_zoned(ns->disk, BLK_ZONED_HM);
112 blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
113 disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1);
114 disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1);
115 free_data:
116 kfree(id);
117 return status;
118 }
119
nvme_zns_alloc_report_buffer(struct nvme_ns * ns,unsigned int nr_zones,size_t * buflen)120 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
121 unsigned int nr_zones, size_t *buflen)
122 {
123 struct request_queue *q = ns->disk->queue;
124 size_t bufsize;
125 void *buf;
126
127 const size_t min_bufsize = sizeof(struct nvme_zone_report) +
128 sizeof(struct nvme_zone_descriptor);
129
130 nr_zones = min_t(unsigned int, nr_zones,
131 get_capacity(ns->disk) >> ilog2(ns->zsze));
132
133 bufsize = sizeof(struct nvme_zone_report) +
134 nr_zones * sizeof(struct nvme_zone_descriptor);
135 bufsize = min_t(size_t, bufsize,
136 queue_max_hw_sectors(q) << SECTOR_SHIFT);
137 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
138
139 while (bufsize >= min_bufsize) {
140 buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
141 if (buf) {
142 *buflen = bufsize;
143 return buf;
144 }
145 bufsize >>= 1;
146 }
147 return NULL;
148 }
149
nvme_zone_parse_entry(struct nvme_ns * ns,struct nvme_zone_descriptor * entry,unsigned int idx,report_zones_cb cb,void * data)150 static int nvme_zone_parse_entry(struct nvme_ns *ns,
151 struct nvme_zone_descriptor *entry,
152 unsigned int idx, report_zones_cb cb,
153 void *data)
154 {
155 struct blk_zone zone = { };
156
157 if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
158 dev_err(ns->ctrl->device, "invalid zone type %#x\n",
159 entry->zt);
160 return -EINVAL;
161 }
162
163 zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
164 zone.cond = entry->zs >> 4;
165 zone.len = ns->zsze;
166 zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
167 zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
168 if (zone.cond == BLK_ZONE_COND_FULL)
169 zone.wp = zone.start + zone.len;
170 else
171 zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
172
173 return cb(&zone, idx, data);
174 }
175
nvme_ns_report_zones(struct nvme_ns * ns,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)176 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
177 unsigned int nr_zones, report_zones_cb cb, void *data)
178 {
179 struct nvme_zone_report *report;
180 struct nvme_command c = { };
181 int ret, zone_idx = 0;
182 unsigned int nz, i;
183 size_t buflen;
184
185 if (ns->head->ids.csi != NVME_CSI_ZNS)
186 return -EINVAL;
187
188 report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
189 if (!report)
190 return -ENOMEM;
191
192 c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
193 c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
194 c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
195 c.zmr.zra = NVME_ZRA_ZONE_REPORT;
196 c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
197 c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
198
199 sector &= ~(ns->zsze - 1);
200 while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
201 memset(report, 0, buflen);
202
203 c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
204 ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
205 if (ret) {
206 if (ret > 0)
207 ret = -EIO;
208 goto out_free;
209 }
210
211 nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
212 if (!nz)
213 break;
214
215 for (i = 0; i < nz && zone_idx < nr_zones; i++) {
216 ret = nvme_zone_parse_entry(ns, &report->entries[i],
217 zone_idx, cb, data);
218 if (ret)
219 goto out_free;
220 zone_idx++;
221 }
222
223 sector += ns->zsze * nz;
224 }
225
226 if (zone_idx > 0)
227 ret = zone_idx;
228 else
229 ret = -EINVAL;
230 out_free:
231 kvfree(report);
232 return ret;
233 }
234
nvme_setup_zone_mgmt_send(struct nvme_ns * ns,struct request * req,struct nvme_command * c,enum nvme_zone_mgmt_action action)235 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
236 struct nvme_command *c, enum nvme_zone_mgmt_action action)
237 {
238 memset(c, 0, sizeof(*c));
239
240 c->zms.opcode = nvme_cmd_zone_mgmt_send;
241 c->zms.nsid = cpu_to_le32(ns->head->ns_id);
242 c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
243 c->zms.zsa = action;
244
245 if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
246 c->zms.select_all = 1;
247
248 return BLK_STS_OK;
249 }
250