-
-
Save gnif/a4ac1d4fb6d7ba04347dcc91a579ee36 to your computer and use it in GitHub Desktop.
static int reset_amdgpu_vega(struct pci_dev *dev, int probe) { | |
#define AMDGPU_MAX_USEC_TIMEOUT 100000 | |
#define MP0_BASE 0x16000 | |
#define mmMP0_SMN_C2PMSG_33 ((MP0_BASE + 0x0061) * 4) | |
#define mmMP0_SMN_C2PMSG_64 ((MP0_BASE + 0x0080) * 4) | |
#define mmMP0_SMN_C2PMSG_81 ((MP0_BASE + 0x0091) * 4) | |
resource_size_t rmmio_base, rmmio_size; | |
void __iomem *rmmio; | |
int ret; | |
int i; | |
uint32_t val; | |
if (probe) | |
return 0; | |
pci_clear_master(dev); | |
pci_save_state(dev); | |
rmmio_base = pci_resource_start(dev, 5); | |
rmmio_size = pci_resource_len(dev, 5); | |
rmmio = ioremap(rmmio_base, rmmio_size); | |
if (rmmio == NULL) { | |
printk(KERN_ERR "[reset_amdgpu_vega] failed to ioremap the device\n"); | |
ret = -ENOMEM; | |
goto out; | |
} | |
#if 0 | |
/* check the sign of life register to see if we even need to reset */ | |
if (!readl(rmmio + mmMP0_SMN_C2PMSG_81)) { | |
printk(KERN_INFO "[reset_amdgpu_vega] psp is not running\n"); | |
ret = 0; | |
goto out_unmap; | |
} | |
/* ensure the PSP is working */ | |
ret = -EINVAL; | |
for(i = 0; i < AMDGPU_MAX_USEC_TIMEOUT; i++) { | |
val = readl(rmmio + mmMP0_SMN_C2PMSG_64); | |
if ((val & 0x8000FFFF) == 0x80000000) { | |
ret = 0; | |
break; | |
} | |
udelay(1); | |
} | |
if (ret) { | |
printk(KERN_ERR "[reset_amdgpu_vega] psp is not working correctly\n"); | |
goto out_unmap; | |
} | |
#endif | |
/* send the mode 1 reset command */ | |
writel(0x70000, rmmio + mmMP0_SMN_C2PMSG_64); | |
mdelay(1000); | |
/* wait for the reset to complete */ | |
ret = -EINVAL; | |
for(i = 0; i < AMDGPU_MAX_USEC_TIMEOUT; i++) { | |
val = readl(rmmio + mmMP0_SMN_C2PMSG_33); | |
if ((val & 0x80000000) == 0x80000000) { | |
ret = 0; | |
break; | |
} | |
udelay(1); | |
} | |
if (ret) { | |
printk(KERN_ERR "[reset_amdgpu_vega] reset failed\n"); | |
goto out_unmap; | |
} | |
pcie_flr(dev); | |
ret = 0; | |
printk(KERN_INFO "[reset_amdgpu_vega] reset success\n"); | |
out_unmap: | |
iounmap(rmmio); | |
out: | |
pci_restore_state(dev); | |
return ret; | |
} |
@gnif Any updates?
@gnif looking forward to this - any news from AMD? Have been testing WX 8200 that also cannot be reset properly after vm shutdown.
@gnif Thanks for all your hard work! Has there been any updates to this?
No updates I am sorry, it doesn't seem to be a priority for AMD to fix this.
So sad that we are forced to use Nvidia by AMD them selves. Hope they fix this someday.
No updates I am sorry, it doesn't seem to be a priority for AMD to fix this.
Where's the best place to keep being up to date regarding this issue?
What's broken?
The card is not recoverable after issuing the official mode1 PSP reset.
On my ASUS VEGA 64 Strix, that reset application sadly doesn't work. Can I help you debug it? PS. Probably you're aware of this, but I found this in the kernel code which may be of interest: torvalds/linux@98512bb
Trying to add the PCI reset quirk with this device id now, 0x1043, 0x4c4, not sure if that'll make a difference
(lspci has this for my VEGA strix:
Subsystem: ASUSTeK Computer Inc. Vega 10 XL/XT [Radeon RX Vega 56/64] [1043:04c4])
Thanks but the PSP reset is broken in amdgpu also. I am working on an updated Vega 10 reset, I have been overseas and progress has been on hold until I returned (literally just now)
Is there a list somewhere of AMD cards that do not have this problem?
Everything Vega onwards has this issue, no exceptions. Some Polaris also have issues with resetting also.
Everything Vega onwards has this issue, no exceptions. Some Polaris also have issues with resetting also.
So that means some Polaris cards do not, correct? I'm trying to find out which cards do not...
This is the basis of a pci FLR reset quirk for the kernel, it's incomplete pending AMD providing the remaining reset details.