ACK/cmnt : [PATCH 1/2][Zesty] powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
Thadeu Lima de Souza Cascardo
cascardo at canonical.com
Tue Jul 11 19:19:11 UTC 2017
On Mon, Jul 10, 2017 at 02:42:43PM +0200, Stefan Bader wrote:
> On 29.06.2017 18:40, Breno Leitao wrote:
> > From: Alistair Popple <alistair at popple.id.au>
> >
> > BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1701272
> >
> > NPU2 requires an extra explicit flush to an active GPU PID when
> > sending address translation shoot downs (ATSDs) to reliably flush the
> > GPU TLB. This patch adds just such a flush at the end of each sequence
> > of ATSDs.
> >
> > We can safely use PID 0 which is always reserved and active on the
> > GPU. PID 0 is only used for init_mm which will never be a user mm on
> > the GPU. To enforce this we add a check in pnv_npu2_init_context()
> > just in case someone tries to use PID 0 on the GPU.
> >
> > Signed-off-by: Alistair Popple <alistair at popple.id.au>
> > [mpe: Use true/false for bool literals]
> > Signed-off-by: Michael Ellerman <mpe at ellerman.id.au>
> > (cherry picked from commit bbd5ff50afffcf4a01d05367524736c57607a478)
> > Signed-off-by: Breno Leitao <leitao at debian.org>
> Acked-by: Stefan Bader <stefan.bader at canonical.com>
>
> > ---
>
> Two rather minor nitpicks: Cannot remember exactly what it was but vaguely think
> that when referring to bugs the form https://bugs.launchpad.net/bugs/<bugnr> is
> preferred as more general. Other forms refer to specific projects/tasks. An it
> is not like the linux task would become invalid anytime soon, so just a note.
> The other thing, when submitting more than one patch it would be good to add a
> cover email to that. For one to give some quick info about the set, and second
> it gives the lazy reviewer an anchor to ack the set. ;)
Also, some tools some of us use care about the ACKs sent to cover
letters, making it very helpful if you had sent one.
Cascardo.
>
> That said this ACK should be for the 2nd patch as well.
>
> -Stefan
> > arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++++++++----------
> > 1 file changed, 65 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> > index 3184c9e265fb..9e042cd4aa03 100644
> > --- a/arch/powerpc/platforms/powernv/npu-dma.c
> > +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> > @@ -448,7 +448,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
> > return mmio_atsd_reg;
> > }
> >
> > -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
> > +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
> > {
> > unsigned long launch;
> >
> > @@ -464,12 +464,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
> > /* PID */
> > launch |= pid << PPC_BITLSHIFT(38);
> >
> > + /* No flush */
> > + launch |= !flush << PPC_BITLSHIFT(39);
> > +
> > /* Invalidating the entire process doesn't use a va */
> > return mmio_launch_invalidate(npu, launch, 0);
> > }
> >
> > static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > - unsigned long pid)
> > + unsigned long pid, bool flush)
> > {
> > unsigned long launch;
> >
> > @@ -485,26 +488,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > /* PID */
> > launch |= pid << PPC_BITLSHIFT(38);
> >
> > + /* No flush */
> > + launch |= !flush << PPC_BITLSHIFT(39);
> > +
> > return mmio_launch_invalidate(npu, launch, va);
> > }
> >
> > #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
> >
> > +struct mmio_atsd_reg {
> > + struct npu *npu;
> > + int reg;
> > +};
> > +
> > +static void mmio_invalidate_wait(
> > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
> > +{
> > + struct npu *npu;
> > + int i, reg;
> > +
> > + /* Wait for all invalidations to complete */
> > + for (i = 0; i <= max_npu2_index; i++) {
> > + if (mmio_atsd_reg[i].reg < 0)
> > + continue;
> > +
> > + /* Wait for completion */
> > + npu = mmio_atsd_reg[i].npu;
> > + reg = mmio_atsd_reg[i].reg;
> > + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> > + cpu_relax();
> > +
> > + put_mmio_atsd_reg(npu, reg);
> > +
> > + /*
> > + * The GPU requires two flush ATSDs to ensure all entries have
> > + * been flushed. We use PID 0 as it will never be used for a
> > + * process on the GPU.
> > + */
> > + if (flush)
> > + mmio_invalidate_pid(npu, 0, true);
> > + }
> > +}
> > +
> > /*
> > * Invalidate either a single address or an entire PID depending on
> > * the value of va.
> > */
> > static void mmio_invalidate(struct npu_context *npu_context, int va,
> > - unsigned long address)
> > + unsigned long address, bool flush)
> > {
> > - int i, j, reg;
> > + int i, j;
> > struct npu *npu;
> > struct pnv_phb *nphb;
> > struct pci_dev *npdev;
> > - struct {
> > - struct npu *npu;
> > - int reg;
> > - } mmio_atsd_reg[NV_MAX_NPUS];
> > + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
> > unsigned long pid = npu_context->mm->context.id;
> >
> > /*
> > @@ -524,10 +561,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> >
> > if (va)
> > mmio_atsd_reg[i].reg =
> > - mmio_invalidate_va(npu, address, pid);
> > + mmio_invalidate_va(npu, address, pid,
> > + flush);
> > else
> > mmio_atsd_reg[i].reg =
> > - mmio_invalidate_pid(npu, pid);
> > + mmio_invalidate_pid(npu, pid, flush);
> >
> > /*
> > * The NPU hardware forwards the shootdown to all GPUs
> > @@ -543,18 +581,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
> > */
> > flush_tlb_mm(npu_context->mm);
> >
> > - /* Wait for all invalidations to complete */
> > - for (i = 0; i <= max_npu2_index; i++) {
> > - if (mmio_atsd_reg[i].reg < 0)
> > - continue;
> > -
> > - /* Wait for completion */
> > - npu = mmio_atsd_reg[i].npu;
> > - reg = mmio_atsd_reg[i].reg;
> > - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
> > - cpu_relax();
> > - put_mmio_atsd_reg(npu, reg);
> > - }
> > + mmio_invalidate_wait(mmio_atsd_reg, flush);
> > + if (flush)
> > + /* Wait for the flush to complete */
> > + mmio_invalidate_wait(mmio_atsd_reg, false);
> > }
> >
> > static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > @@ -570,7 +600,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > * There should be no more translation requests for this PID, but we
> > * need to ensure any entries for it are removed from the TLB.
> > */
> > - mmio_invalidate(npu_context, 0, 0);
> > + mmio_invalidate(npu_context, 0, 0, true);
> > }
> >
> > static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > @@ -580,7 +610,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > {
> > struct npu_context *npu_context = mn_to_npu_context(mn);
> >
> > - mmio_invalidate(npu_context, 1, address);
> > + mmio_invalidate(npu_context, 1, address, true);
> > }
> >
> > static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
> > @@ -589,7 +619,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
> > {
> > struct npu_context *npu_context = mn_to_npu_context(mn);
> >
> > - mmio_invalidate(npu_context, 1, address);
> > + mmio_invalidate(npu_context, 1, address, true);
> > }
> >
> > static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > @@ -599,8 +629,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > struct npu_context *npu_context = mn_to_npu_context(mn);
> > unsigned long address;
> >
> > - for (address = start; address <= end; address += PAGE_SIZE)
> > - mmio_invalidate(npu_context, 1, address);
> > + for (address = start; address < end; address += PAGE_SIZE)
> > + mmio_invalidate(npu_context, 1, address, false);
> > +
> > + /* Do the flush only on the final addess == end */
> > + mmio_invalidate(npu_context, 1, address, true);
> > }
> >
> > static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> > @@ -650,8 +683,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
> > /* No nvlink associated with this GPU device */
> > return ERR_PTR(-ENODEV);
> >
> > - if (!mm) {
> > - /* kernel thread contexts are not supported */
> > + if (!mm || mm->context.id == 0) {
> > + /*
> > + * Kernel thread contexts are not supported and context id 0 is
> > + * reserved on the GPU.
> > + */
> > return ERR_PTR(-EINVAL);
> > }
> >
> >
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list