1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5  *
6  * Authors:
7  *  Sunil Muthuswamy <sunilmut@microsoft.com>
8  *  Wei Liu <wei.liu@kernel.org>
9  */
10 
11 #include <linux/pci.h>
12 #include <linux/irq.h>
13 #include <asm/mshyperv.h>
14 
hv_map_interrupt(union hv_device_id device_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)15 static int hv_map_interrupt(union hv_device_id device_id, bool level,
16 		int cpu, int vector, struct hv_interrupt_entry *entry)
17 {
18 	struct hv_input_map_device_interrupt *input;
19 	struct hv_output_map_device_interrupt *output;
20 	struct hv_device_interrupt_descriptor *intr_desc;
21 	unsigned long flags;
22 	u64 status;
23 	int nr_bank, var_size;
24 
25 	local_irq_save(flags);
26 
27 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
28 	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
29 
30 	intr_desc = &input->interrupt_descriptor;
31 	memset(input, 0, sizeof(*input));
32 	input->partition_id = hv_current_partition_id;
33 	input->device_id = device_id.as_uint64;
34 	intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
35 	intr_desc->vector_count = 1;
36 	intr_desc->target.vector = vector;
37 
38 	if (level)
39 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
40 	else
41 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
42 
43 	intr_desc->target.vp_set.valid_bank_mask = 0;
44 	intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
45 	nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
46 	if (nr_bank < 0) {
47 		local_irq_restore(flags);
48 		pr_err("%s: unable to generate VP set\n", __func__);
49 		return EINVAL;
50 	}
51 	intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
52 
53 	/*
54 	 * var-sized hypercall, var-size starts after vp_mask (thus
55 	 * vp_set.format does not count, but vp_set.valid_bank_mask
56 	 * does).
57 	 */
58 	var_size = nr_bank + 1;
59 
60 	status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
61 			input, output);
62 	*entry = output->interrupt_entry;
63 
64 	local_irq_restore(flags);
65 
66 	if (!hv_result_success(status))
67 		pr_err("%s: hypercall failed, status %lld\n", __func__, status);
68 
69 	return hv_result(status);
70 }
71 
hv_unmap_interrupt(u64 id,struct hv_interrupt_entry * old_entry)72 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
73 {
74 	unsigned long flags;
75 	struct hv_input_unmap_device_interrupt *input;
76 	struct hv_interrupt_entry *intr_entry;
77 	u64 status;
78 
79 	local_irq_save(flags);
80 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
81 
82 	memset(input, 0, sizeof(*input));
83 	intr_entry = &input->interrupt_entry;
84 	input->partition_id = hv_current_partition_id;
85 	input->device_id = id;
86 	*intr_entry = *old_entry;
87 
88 	status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
89 	local_irq_restore(flags);
90 
91 	return hv_result(status);
92 }
93 
94 #ifdef CONFIG_PCI_MSI
95 struct rid_data {
96 	struct pci_dev *bridge;
97 	u32 rid;
98 };
99 
get_rid_cb(struct pci_dev * pdev,u16 alias,void * data)100 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
101 {
102 	struct rid_data *rd = data;
103 	u8 bus = PCI_BUS_NUM(rd->rid);
104 
105 	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
106 		rd->bridge = pdev;
107 		rd->rid = alias;
108 	}
109 
110 	return 0;
111 }
112 
hv_build_pci_dev_id(struct pci_dev * dev)113 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
114 {
115 	union hv_device_id dev_id;
116 	struct rid_data data = {
117 		.bridge = NULL,
118 		.rid = PCI_DEVID(dev->bus->number, dev->devfn)
119 	};
120 
121 	pci_for_each_dma_alias(dev, get_rid_cb, &data);
122 
123 	dev_id.as_uint64 = 0;
124 	dev_id.device_type = HV_DEVICE_TYPE_PCI;
125 	dev_id.pci.segment = pci_domain_nr(dev->bus);
126 
127 	dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
128 	dev_id.pci.bdf.device = PCI_SLOT(data.rid);
129 	dev_id.pci.bdf.function = PCI_FUNC(data.rid);
130 	dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
131 
132 	if (data.bridge) {
133 		int pos;
134 
135 		/*
136 		 * Microsoft Hypervisor requires a bus range when the bridge is
137 		 * running in PCI-X mode.
138 		 *
139 		 * To distinguish conventional vs PCI-X bridge, we can check
140 		 * the bridge's PCI-X Secondary Status Register, Secondary Bus
141 		 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
142 		 * Specification Revision 1.0 5.2.2.1.3.
143 		 *
144 		 * Value zero means it is in conventional mode, otherwise it is
145 		 * in PCI-X mode.
146 		 */
147 
148 		pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
149 		if (pos) {
150 			u16 status;
151 
152 			pci_read_config_word(data.bridge, pos +
153 					PCI_X_BRIDGE_SSTATUS, &status);
154 
155 			if (status & PCI_X_SSTATUS_FREQ) {
156 				/* Non-zero, PCI-X mode */
157 				u8 sec_bus, sub_bus;
158 
159 				dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
160 
161 				pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
162 				dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
163 				pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
164 				dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
165 			}
166 		}
167 	}
168 
169 	return dev_id;
170 }
171 
hv_map_msi_interrupt(struct pci_dev * dev,int cpu,int vector,struct hv_interrupt_entry * entry)172 static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
173 				struct hv_interrupt_entry *entry)
174 {
175 	union hv_device_id device_id = hv_build_pci_dev_id(dev);
176 
177 	return hv_map_interrupt(device_id, false, cpu, vector, entry);
178 }
179 
entry_to_msi_msg(struct hv_interrupt_entry * entry,struct msi_msg * msg)180 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
181 {
182 	/* High address is always 0 */
183 	msg->address_hi = 0;
184 	msg->address_lo = entry->msi_entry.address.as_uint32;
185 	msg->data = entry->msi_entry.data.as_uint32;
186 }
187 
188 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
hv_irq_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)189 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
190 {
191 	struct msi_desc *msidesc;
192 	struct pci_dev *dev;
193 	struct hv_interrupt_entry out_entry, *stored_entry;
194 	struct irq_cfg *cfg = irqd_cfg(data);
195 	const cpumask_t *affinity;
196 	int cpu;
197 	u64 status;
198 
199 	msidesc = irq_data_get_msi_desc(data);
200 	dev = msi_desc_to_pci_dev(msidesc);
201 
202 	if (!cfg) {
203 		pr_debug("%s: cfg is NULL", __func__);
204 		return;
205 	}
206 
207 	affinity = irq_data_get_effective_affinity_mask(data);
208 	cpu = cpumask_first_and(affinity, cpu_online_mask);
209 
210 	if (data->chip_data) {
211 		/*
212 		 * This interrupt is already mapped. Let's unmap first.
213 		 *
214 		 * We don't use retarget interrupt hypercalls here because
215 		 * Microsoft Hypervisor doens't allow root to change the vector
216 		 * or specify VPs outside of the set that is initially used
217 		 * during mapping.
218 		 */
219 		stored_entry = data->chip_data;
220 		data->chip_data = NULL;
221 
222 		status = hv_unmap_msi_interrupt(dev, stored_entry);
223 
224 		kfree(stored_entry);
225 
226 		if (status != HV_STATUS_SUCCESS) {
227 			pr_debug("%s: failed to unmap, status %lld", __func__, status);
228 			return;
229 		}
230 	}
231 
232 	stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
233 	if (!stored_entry) {
234 		pr_debug("%s: failed to allocate chip data\n", __func__);
235 		return;
236 	}
237 
238 	status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
239 	if (status != HV_STATUS_SUCCESS) {
240 		kfree(stored_entry);
241 		return;
242 	}
243 
244 	*stored_entry = out_entry;
245 	data->chip_data = stored_entry;
246 	entry_to_msi_msg(&out_entry, msg);
247 
248 	return;
249 }
250 
hv_unmap_msi_interrupt(struct pci_dev * dev,struct hv_interrupt_entry * old_entry)251 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
252 {
253 	return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
254 }
255 
hv_teardown_msi_irq(struct pci_dev * dev,struct irq_data * irqd)256 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
257 {
258 	struct hv_interrupt_entry old_entry;
259 	struct msi_msg msg;
260 	u64 status;
261 
262 	if (!irqd->chip_data) {
263 		pr_debug("%s: no chip data\n!", __func__);
264 		return;
265 	}
266 
267 	old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
268 	entry_to_msi_msg(&old_entry, &msg);
269 
270 	kfree(irqd->chip_data);
271 	irqd->chip_data = NULL;
272 
273 	status = hv_unmap_msi_interrupt(dev, &old_entry);
274 
275 	if (status != HV_STATUS_SUCCESS)
276 		pr_err("%s: hypercall failed, status %lld\n", __func__, status);
277 }
278 
hv_msi_free_irq(struct irq_domain * domain,struct msi_domain_info * info,unsigned int virq)279 static void hv_msi_free_irq(struct irq_domain *domain,
280 			    struct msi_domain_info *info, unsigned int virq)
281 {
282 	struct irq_data *irqd = irq_get_irq_data(virq);
283 	struct msi_desc *desc;
284 
285 	if (!irqd)
286 		return;
287 
288 	desc = irq_data_get_msi_desc(irqd);
289 	if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
290 		return;
291 
292 	hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
293 }
294 
295 /*
296  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
297  * which implement the MSI or MSI-X Capability Structure.
298  */
299 static struct irq_chip hv_pci_msi_controller = {
300 	.name			= "HV-PCI-MSI",
301 	.irq_unmask		= pci_msi_unmask_irq,
302 	.irq_mask		= pci_msi_mask_irq,
303 	.irq_ack		= irq_chip_ack_parent,
304 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
305 	.irq_compose_msi_msg	= hv_irq_compose_msi_msg,
306 	.irq_set_affinity	= msi_domain_set_affinity,
307 	.flags			= IRQCHIP_SKIP_SET_WAKE,
308 };
309 
310 static struct msi_domain_ops pci_msi_domain_ops = {
311 	.msi_free		= hv_msi_free_irq,
312 	.msi_prepare		= pci_msi_prepare,
313 };
314 
315 static struct msi_domain_info hv_pci_msi_domain_info = {
316 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
317 			  MSI_FLAG_PCI_MSIX,
318 	.ops		= &pci_msi_domain_ops,
319 	.chip		= &hv_pci_msi_controller,
320 	.handler	= handle_edge_irq,
321 	.handler_name	= "edge",
322 };
323 
hv_create_pci_msi_domain(void)324 struct irq_domain * __init hv_create_pci_msi_domain(void)
325 {
326 	struct irq_domain *d = NULL;
327 	struct fwnode_handle *fn;
328 
329 	fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
330 	if (fn)
331 		d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
332 
333 	/* No point in going further if we can't get an irq domain */
334 	BUG_ON(!d);
335 
336 	return d;
337 }
338 
339 #endif /* CONFIG_PCI_MSI */
340 
hv_unmap_ioapic_interrupt(int ioapic_id,struct hv_interrupt_entry * entry)341 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
342 {
343 	union hv_device_id device_id;
344 
345 	device_id.as_uint64 = 0;
346 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
347 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
348 
349 	return hv_unmap_interrupt(device_id.as_uint64, entry);
350 }
351 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
352 
hv_map_ioapic_interrupt(int ioapic_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)353 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
354 		struct hv_interrupt_entry *entry)
355 {
356 	union hv_device_id device_id;
357 
358 	device_id.as_uint64 = 0;
359 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
360 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
361 
362 	return hv_map_interrupt(device_id, level, cpu, vector, entry);
363 }
364 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
365