diff mbox

[v2,libdrm,4/7] tegra: Add channel, job, pushbuf and fence APIs

Message ID 1397043630-13972-5-git-send-email-thierry.reding@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thierry Reding April 9, 2014, 11:40 a.m. UTC
From: Thierry Reding <treding@nvidia.com>

These functions can be used to open channels to engines, manage job
submissions, create push buffers to store command streams in and wait
until jobs have been completed.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
Changes in v2:
- automatically allocate buffer objects as required by pushbuffers
- pushbuffers can now have more than one associated buffer object
- add drm_tegra_pushbuf_prepare() function

 tegra/Makefile.am |   4 ++
 tegra/channel.c   | 127 +++++++++++++++++++++++++++++++++
 tegra/fence.c     |  71 +++++++++++++++++++
 tegra/job.c       | 180 +++++++++++++++++++++++++++++++++++++++++++++++
 tegra/private.h   |  72 +++++++++++++++++++
 tegra/pushbuf.c   | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tegra/tegra.c     |   2 +
 tegra/tegra.h     |  52 ++++++++++++++
 8 files changed, 713 insertions(+)
 create mode 100644 tegra/channel.c
 create mode 100644 tegra/fence.c
 create mode 100644 tegra/job.c
 create mode 100644 tegra/pushbuf.c

Comments

Erik Faye-Lund April 10, 2014, 5:13 p.m. UTC | #1
On Wed, Apr 9, 2014 at 1:40 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
> diff --git a/tegra/fence.c b/tegra/fence.c
> new file mode 100644
> index 000000000000..f58725ca8472
> --- /dev/null
> +++ b/tegra/fence.c
> +drm_public
> +int drm_tegra_fence_wait(struct drm_tegra_fence *fence)
> +{
> +       return drm_tegra_fence_wait_timeout(fence, -1);
> +}

Perhaps a convenience-wrapper like this should be inline in the header
to reduce function-call overhead?

> +drm_public
> +int drm_tegra_job_submit(struct drm_tegra_job *job,
> +                        struct drm_tegra_fence **fencep)
> +{
> +       struct drm_tegra *drm = job->channel->drm;
> +       struct drm_tegra_pushbuf_private *pushbuf;
> +       struct drm_tegra_fence *fence = NULL;
> +       struct drm_tegra_cmdbuf *cmdbufs;
> +       struct drm_tegra_syncpt *syncpts;
> +       struct drm_tegra_submit args;
> +       unsigned int i;
> +       int err;
> +
> +       /*
> +        * Make sure the current command stream buffer is queued for
> +        * submission.
> +        */
> +       err = drm_tegra_pushbuf_queue(job->pushbuf);
> +       if (err < 0)
> +               return err;
> +
> +       job->pushbuf = NULL;
> +
> +       if (fencep) {
> +               fence = calloc(1, sizeof(*fence));
> +               if (!fence)
> +                       return -ENOMEM;
> +       }
> +
> +       syncpts = calloc(1, sizeof(*syncpts));
> +       if (!syncpts) {
> +               free(cmdbufs);

cmdbufs never gets assigned to, yet it gets free'd here (and a few
more places further down). I guess this is left-overs from the
previous round that should just die?

But this raises the question, how is job->cmdbufs (and job->relocs)
supposed to get free'd?

I'm a bit curious as to what the expected life-time of these objects
are. Do I need to create a new job-object for each submit, or can I
reuse a job? I think the latter would be preferable... So perhaps we
should have a drm_tegra_job_reset that allows us to throw away the
accumulated cmdbufs and relocs, and start building a new job?

> diff --git a/tegra/private.h b/tegra/private.h
> index 9b6bc9395d23..fc74fb56b58d 100644
> --- a/tegra/private.h
> +++ b/tegra/private.h
> @@ -26,13 +26,31 @@
>  #define __DRM_TEGRA_PRIVATE_H__ 1
>
>  #include <stdbool.h>
> +#include <stddef.h>
>  #include <stdint.h>
>
>  #include <libdrm.h>
> +#include <libdrm_lists.h>
>  #include <xf86atomic.h>
>
> +#include "tegra_drm.h"
>  #include "tegra.h"
>
> +#define container_of(ptr, type, member) ({                             \
> +               const typeof(((type *)0)->member) *__mptr = (ptr);      \
> +               (type *)((char *)__mptr - offsetof(type, member));      \
> +       })
> +
<snip>
> +
> +struct drm_tegra_pushbuf_private {
> +       struct drm_tegra_pushbuf base;
> +       struct drm_tegra_job *job;
> +       drmMMListHead list;
> +       drmMMListHead bos;
> +
> +       struct drm_tegra_bo *bo;
> +       uint32_t *start;
> +       uint32_t *end;
> +};
> +
> +static inline struct drm_tegra_pushbuf_private *
> +pushbuf_priv(struct drm_tegra_pushbuf *pb)
> +{
> +       return container_of(pb, struct drm_tegra_pushbuf_private, base);
> +}
> +

This seems to be the only use-case for container_of, and it just
happens to return the exact same pointer as was passed in... so do we
really need that helper?

> diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
> new file mode 100644
> index 000000000000..178d5cd57541
> --- /dev/null
> +++ b/tegra/pushbuf.c
> @@ -0,0 +1,205 @@
<snip>
> +drm_public
> +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
> +                         struct drm_tegra_job *job)
> +{
> +       struct drm_tegra_pushbuf_private *pushbuf;
> +       void *ptr;
> +       int err;
> +
> +       pushbuf = calloc(1, sizeof(*pushbuf));
> +       if (!pushbuf)
> +               return -ENOMEM;
> +
> +       DRMINITLISTHEAD(&pushbuf->list);
> +       DRMINITLISTHEAD(&pushbuf->bos);
> +       pushbuf->job = job;
> +
> +       *pushbufp = &pushbuf->base;
> +
> +       DRMLISTADD(&pushbuf->list, &job->pushbufs);

Hmm. So the job keeps a list of pushbufs. What purpose does this
serve? Shouldn't the job only need the cmdbuf-list (which it already
has) and the BOs (so they can be dereferenced after being submitted)?

AFAICT, we could make drm_tegra_pushbuf_queue append the BO to a list
in the job instead. That way we don't need to keep all the
pushbuf-objects around, and we might even be able to reuse the same
object rather than keep reallocating them. No?

> +drm_public
> +int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
> +                          enum drm_tegra_syncpt_cond cond)
> +{
> +       struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
> +
> +       if (cond >= DRM_TEGRA_SYNCPT_COND_MAX)
> +               return -EINVAL;
> +
> +       *pushbuf->ptr++ = HOST1X_OPCODE_NONINCR(0x0, 0x1);
> +       *pushbuf->ptr++ = cond << 8 | priv->job->syncpt;

Perhaps "HOST1X_OPCODE_IMM(0x0, cond << 8 | priv->job->syncpt)", which
saves a word?

Either way, I think this should either call drm_tegra_pushbuf_prepare
to make sure two words are available, or be renamed to reflect that it
causes a push (or two, as in the current form).

> +struct drm_tegra_channel;
> +struct drm_tegra_job;
> +
> +struct drm_tegra_pushbuf {
> +       uint32_t *ptr;
> +};

Hmm, single-member structs always makes me cringe a bit. But in this
case it's much better than a double-pointer IMO.

But perhaps some of the members in the private-struct might be useful
out here? For instance, if "uint32_t *end;" was also available, we
could do:

inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
*pushbuf, uint32_t value)
{
       assert(pushbuf->ptr < pushbuf->end);
       *pushbuf->ptr++ = value;
}

...and easily pick up bugs with too few words? The helper would of
course be in the header-file, so the write gets inlined nicely.

But all in all, a really nice read. Thanks!
Thierry Reding May 2, 2014, 2:06 p.m. UTC | #2
On Thu, Apr 10, 2014 at 07:13:22PM +0200, Erik Faye-Lund wrote:
> On Wed, Apr 9, 2014 at 1:40 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
> > diff --git a/tegra/fence.c b/tegra/fence.c
> > new file mode 100644
> > index 000000000000..f58725ca8472
> > --- /dev/null
> > +++ b/tegra/fence.c
> > +drm_public
> > +int drm_tegra_fence_wait(struct drm_tegra_fence *fence)
> > +{
> > +       return drm_tegra_fence_wait_timeout(fence, -1);
> > +}
> 
> Perhaps a convenience-wrapper like this should be inline in the header
> to reduce function-call overhead?

Okay, I've moved this into the tegra.h header file.

> 
> > +drm_public
> > +int drm_tegra_job_submit(struct drm_tegra_job *job,
> > +                        struct drm_tegra_fence **fencep)
> > +{
> > +       struct drm_tegra *drm = job->channel->drm;
> > +       struct drm_tegra_pushbuf_private *pushbuf;
> > +       struct drm_tegra_fence *fence = NULL;
> > +       struct drm_tegra_cmdbuf *cmdbufs;
> > +       struct drm_tegra_syncpt *syncpts;
> > +       struct drm_tegra_submit args;
> > +       unsigned int i;
> > +       int err;
> > +
> > +       /*
> > +        * Make sure the current command stream buffer is queued for
> > +        * submission.
> > +        */
> > +       err = drm_tegra_pushbuf_queue(job->pushbuf);
> > +       if (err < 0)
> > +               return err;
> > +
> > +       job->pushbuf = NULL;
> > +
> > +       if (fencep) {
> > +               fence = calloc(1, sizeof(*fence));
> > +               if (!fence)
> > +                       return -ENOMEM;
> > +       }
> > +
> > +       syncpts = calloc(1, sizeof(*syncpts));
> > +       if (!syncpts) {
> > +               free(cmdbufs);
> 
> cmdbufs never gets assigned to, yet it gets free'd here (and a few
> more places further down). I guess this is left-overs from the
> previous round that should just die?

Indeed. pushbuf was also unused to I removed it as well.

> But this raises the question, how is job->cmdbufs (and job->relocs)
> supposed to get free'd?
> 
> I'm a bit curious as to what the expected life-time of these objects
> are. Do I need to create a new job-object for each submit, or can I
> reuse a job? I think the latter would be preferable... So perhaps we
> should have a drm_tegra_job_reset that allows us to throw away the
> accumulated cmdbufs and relocs, and start building a new job?

They are currently freed by drm_tegra_job_free(), so yes, the current
intended usage is to create a new job for each submit. Do you mind if we
keep your proposal for a drm_tegra_job_reset() in mind for a follow-up
patch. It's an optimization that we can easily add later on and I'd like
to avoid too much premature optimization at this point.

> > diff --git a/tegra/private.h b/tegra/private.h
> > index 9b6bc9395d23..fc74fb56b58d 100644
> > --- a/tegra/private.h
> > +++ b/tegra/private.h
> > @@ -26,13 +26,31 @@
> >  #define __DRM_TEGRA_PRIVATE_H__ 1
> >
> >  #include <stdbool.h>
> > +#include <stddef.h>
> >  #include <stdint.h>
> >
> >  #include <libdrm.h>
> > +#include <libdrm_lists.h>
> >  #include <xf86atomic.h>
> >
> > +#include "tegra_drm.h"
> >  #include "tegra.h"
> >
> > +#define container_of(ptr, type, member) ({                             \
> > +               const typeof(((type *)0)->member) *__mptr = (ptr);      \
> > +               (type *)((char *)__mptr - offsetof(type, member));      \
> > +       })
> > +
> <snip>
> > +
> > +struct drm_tegra_pushbuf_private {
> > +       struct drm_tegra_pushbuf base;
> > +       struct drm_tegra_job *job;
> > +       drmMMListHead list;
> > +       drmMMListHead bos;
> > +
> > +       struct drm_tegra_bo *bo;
> > +       uint32_t *start;
> > +       uint32_t *end;
> > +};
> > +
> > +static inline struct drm_tegra_pushbuf_private *
> > +pushbuf_priv(struct drm_tegra_pushbuf *pb)
> > +{
> > +       return container_of(pb, struct drm_tegra_pushbuf_private, base);
> > +}
> > +
> 
> This seems to be the only use-case for container_of, and it just
> happens to return the exact same pointer as was passed in... so do we
> really need that helper?

It has the benefit of keeping this code working even when somebody
suddenly adds to the beginning of drm_tegra_pushbuf_private. I'd rather
not resolve to force casting just to be on the safe side.

> > diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
> > new file mode 100644
> > index 000000000000..178d5cd57541
> > --- /dev/null
> > +++ b/tegra/pushbuf.c
> > @@ -0,0 +1,205 @@
> <snip>
> > +drm_public
> > +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
> > +                         struct drm_tegra_job *job)
> > +{
> > +       struct drm_tegra_pushbuf_private *pushbuf;
> > +       void *ptr;
> > +       int err;
> > +
> > +       pushbuf = calloc(1, sizeof(*pushbuf));
> > +       if (!pushbuf)
> > +               return -ENOMEM;
> > +
> > +       DRMINITLISTHEAD(&pushbuf->list);
> > +       DRMINITLISTHEAD(&pushbuf->bos);
> > +       pushbuf->job = job;
> > +
> > +       *pushbufp = &pushbuf->base;
> > +
> > +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
> 
> Hmm. So the job keeps a list of pushbufs. What purpose does this
> serve? Shouldn't the job only need the cmdbuf-list (which it already
> has) and the BOs (so they can be dereferenced after being submitted)?

This is currently used to keep track of existing pushbuffers and make
sure that they are freed (when the job is freed).

> AFAICT, we could make drm_tegra_pushbuf_queue append the BO to a list
> in the job instead. That way we don't need to keep all the
> pushbuf-objects around, and we might even be able to reuse the same
> object rather than keep reallocating them. No?

I guess we could make drm_tegra_pushbuf_new() reinitialize the current
pushbuf object and always return the same. But perhaps there are
occasions where it's useful to keep a few of them around?

If reusing the pushbuf objects makes sense, then perhaps a different API
would be more useful. Rather than allocate in the context of a job, they
could be allocated in a channel-wide context and added to/associated
with a job only as needed.

> > +drm_public
> > +int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
> > +                          enum drm_tegra_syncpt_cond cond)
> > +{
> > +       struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
> > +
> > +       if (cond >= DRM_TEGRA_SYNCPT_COND_MAX)
> > +               return -EINVAL;
> > +
> > +       *pushbuf->ptr++ = HOST1X_OPCODE_NONINCR(0x0, 0x1);
> > +       *pushbuf->ptr++ = cond << 8 | priv->job->syncpt;
> 
> Perhaps "HOST1X_OPCODE_IMM(0x0, cond << 8 | priv->job->syncpt)", which
> saves a word?

Interesting. Have you ever tested whether that works properly that way?
I've never seen that in a command stream from the binary driver.

> Either way, I think this should either call drm_tegra_pushbuf_prepare
> to make sure two words are available, or be renamed to reflect that it
> causes a push (or two, as in the current form).

I've added a call to drm_tegra_pushbuf_prepare() here and in
drm_tegra_pushbuf_relocate() which also pushes one word to the pushbuf.

> > +struct drm_tegra_channel;
> > +struct drm_tegra_job;
> > +
> > +struct drm_tegra_pushbuf {
> > +       uint32_t *ptr;
> > +};
> 
> Hmm, single-member structs always makes me cringe a bit. But in this
> case it's much better than a double-pointer IMO.
> 
> But perhaps some of the members in the private-struct might be useful
> out here? For instance, if "uint32_t *end;" was also available, we
> could do:
> 
> inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
> *pushbuf, uint32_t value)
> {
>        assert(pushbuf->ptr < pushbuf->end);
>        *pushbuf->ptr++ = value;
> }
> 
> ...and easily pick up bugs with too few words? The helper would of
> course be in the header-file, so the write gets inlined nicely.

Sounds good. If you have no strong objections, I'd like to keep that for
a follow on patch when there's more code that actively uses this API. It
should be fairly safe to make more members public via drm_tegra_pushbuf
rather than the other way around, so I'd like to err on the side of
carefulness for a bit longer.

Thanks for the review, and apologies for being sluggish.

Thierry
Erik Faye-Lund May 2, 2014, 2:53 p.m. UTC | #3
On Fri, May 2, 2014 at 4:06 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
> On Thu, Apr 10, 2014 at 07:13:22PM +0200, Erik Faye-Lund wrote:
>>
>> But this raises the question, how is job->cmdbufs (and job->relocs)
>> supposed to get free'd?
>>
>> I'm a bit curious as to what the expected life-time of these objects
>> are. Do I need to create a new job-object for each submit, or can I
>> reuse a job? I think the latter would be preferable... So perhaps we
>> should have a drm_tegra_job_reset that allows us to throw away the
>> accumulated cmdbufs and relocs, and start building a new job?
>
> They are currently freed by drm_tegra_job_free(), so yes, the current
> intended usage is to create a new job for each submit. Do you mind if we
> keep your proposal for a drm_tegra_job_reset() in mind for a follow-up
> patch. It's an optimization that we can easily add later on and I'd like
> to avoid too much premature optimization at this point.

Sure, I don't mind.

>> > diff --git a/tegra/private.h b/tegra/private.h
>> > index 9b6bc9395d23..fc74fb56b58d 100644
>> > --- a/tegra/private.h
>> > +++ b/tegra/private.h
>> > @@ -26,13 +26,31 @@
>> >  #define __DRM_TEGRA_PRIVATE_H__ 1
>> >
>> >  #include <stdbool.h>
>> > +#include <stddef.h>
>> >  #include <stdint.h>
>> >
>> >  #include <libdrm.h>
>> > +#include <libdrm_lists.h>
>> >  #include <xf86atomic.h>
>> >
>> > +#include "tegra_drm.h"
>> >  #include "tegra.h"
>> >
>> > +#define container_of(ptr, type, member) ({                             \
>> > +               const typeof(((type *)0)->member) *__mptr = (ptr);      \
>> > +               (type *)((char *)__mptr - offsetof(type, member));      \
>> > +       })
>> > +
>> <snip>
>> > +
>> > +struct drm_tegra_pushbuf_private {
>> > +       struct drm_tegra_pushbuf base;
>> > +       struct drm_tegra_job *job;
>> > +       drmMMListHead list;
>> > +       drmMMListHead bos;
>> > +
>> > +       struct drm_tegra_bo *bo;
>> > +       uint32_t *start;
>> > +       uint32_t *end;
>> > +};
>> > +
>> > +static inline struct drm_tegra_pushbuf_private *
>> > +pushbuf_priv(struct drm_tegra_pushbuf *pb)
>> > +{
>> > +       return container_of(pb, struct drm_tegra_pushbuf_private, base);
>> > +}
>> > +
>>
>> This seems to be the only use-case for container_of, and it just
>> happens to return the exact same pointer as was passed in... so do we
>> really need that helper?
>
> It has the benefit of keeping this code working even when somebody
> suddenly adds to the beginning of drm_tegra_pushbuf_private. I'd rather
> not resolve to force casting just to be on the safe side.

Is that a really a legitimate worry? There's a very clear relationship
between the two structures. Both nouveau and freedreno does the same
thing (for nouveau_bo_priv, nouveau_device_priv, msm_device, msm_pipe
and msm_bo structures), and it seems to work fine for them. Perhaps a
comment along the lines of "/* this needs to be the first member */"
is enough to discourage people from messing with it?

>> > diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
>> > new file mode 100644
>> > index 000000000000..178d5cd57541
>> > --- /dev/null
>> > +++ b/tegra/pushbuf.c
>> > @@ -0,0 +1,205 @@
>> <snip>
>> > +drm_public
>> > +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
>> > +                         struct drm_tegra_job *job)
>> > +{
>> > +       struct drm_tegra_pushbuf_private *pushbuf;
>> > +       void *ptr;
>> > +       int err;
>> > +
>> > +       pushbuf = calloc(1, sizeof(*pushbuf));
>> > +       if (!pushbuf)
>> > +               return -ENOMEM;
>> > +
>> > +       DRMINITLISTHEAD(&pushbuf->list);
>> > +       DRMINITLISTHEAD(&pushbuf->bos);
>> > +       pushbuf->job = job;
>> > +
>> > +       *pushbufp = &pushbuf->base;
>> > +
>> > +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
>>
>> Hmm. So the job keeps a list of pushbufs. What purpose does this
>> serve? Shouldn't the job only need the cmdbuf-list (which it already
>> has) and the BOs (so they can be dereferenced after being submitted)?
>
> This is currently used to keep track of existing pushbuffers and make
> sure that they are freed (when the job is freed).
>

OK, it seems to me that the need for keeping the pushbuffers around is
simply not there. Only the BOs needs to be kept around, no?

>> AFAICT, we could make drm_tegra_pushbuf_queue append the BO to a list
>> in the job instead. That way we don't need to keep all the
>> pushbuf-objects around, and we might even be able to reuse the same
>> object rather than keep reallocating them. No?
>
> I guess we could make drm_tegra_pushbuf_new() reinitialize the current
> pushbuf object and always return the same. But perhaps there are
> occasions where it's useful to keep a few of them around?

This sounds a bit hacky (and potentially hazardous, depending on the
client). I'd rather have clear semantics, and flexibility for the
clients to do what suits it best, as long as the cost is negligible.

> If reusing the pushbuf objects makes sense, then perhaps a different API
> would be more useful. Rather than allocate in the context of a job, they
> could be allocated in a channel-wide context and added to/associated
> with a job only as needed.

Yes, I think this might make sense. I'll have to ponder a bit more
about this, though.

>> > +drm_public
>> > +int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
>> > +                          enum drm_tegra_syncpt_cond cond)
>> > +{
>> > +       struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
>> > +
>> > +       if (cond >= DRM_TEGRA_SYNCPT_COND_MAX)
>> > +               return -EINVAL;
>> > +
>> > +       *pushbuf->ptr++ = HOST1X_OPCODE_NONINCR(0x0, 0x1);
>> > +       *pushbuf->ptr++ = cond << 8 | priv->job->syncpt;
>>
>> Perhaps "HOST1X_OPCODE_IMM(0x0, cond << 8 | priv->job->syncpt)", which
>> saves a word?
>
> Interesting. Have you ever tested whether that works properly that way?
> I've never seen that in a command stream from the binary driver.

No, I haven't. But from the documentation, there's nothing to suggest
it wouldn't work.

I'll give it a spin later to verify!

>> Either way, I think this should either call drm_tegra_pushbuf_prepare
>> to make sure two words are available, or be renamed to reflect that it
>> causes a push (or two, as in the current form).
>
> I've added a call to drm_tegra_pushbuf_prepare() here and in
> drm_tegra_pushbuf_relocate() which also pushes one word to the pushbuf.
>

Actually, I think drm_tegra_pushbuf_relocate should just be renamed to
drm_tegra_pushbuf_push_reloc(), as that conveys quite clearly that it
is just a push-helper and need to be counted when preparing. That way
it doesn't need to prepare itself.

The reason why I think this is better for that function, is that
relocs will naturally appear in the middle of other pushes, so it
makes sense to perpare them just like the other pushes. I *think*
drm_tegra_pushbuf_sync is only really useful when switching between
GR2D and GR3D, so it's not such a big concern. But I could be wrong,
dunno. Perhaps it's also useful when texturing with a newly rendered
FBO also.

>> > +struct drm_tegra_channel;
>> > +struct drm_tegra_job;
>> > +
>> > +struct drm_tegra_pushbuf {
>> > +       uint32_t *ptr;
>> > +};
>>
>> Hmm, single-member structs always makes me cringe a bit. But in this
>> case it's much better than a double-pointer IMO.
>>
>> But perhaps some of the members in the private-struct might be useful
>> out here? For instance, if "uint32_t *end;" was also available, we
>> could do:
>>
>> inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
>> *pushbuf, uint32_t value)
>> {
>>        assert(pushbuf->ptr < pushbuf->end);
>>        *pushbuf->ptr++ = value;
>> }
>>
>> ...and easily pick up bugs with too few words? The helper would of
>> course be in the header-file, so the write gets inlined nicely.
>
> Sounds good. If you have no strong objections, I'd like to keep that for
> a follow on patch when there's more code that actively uses this API. It
> should be fairly safe to make more members public via drm_tegra_pushbuf
> rather than the other way around, so I'd like to err on the side of
> carefulness for a bit longer.

Hmm, isn't it the *current* code that is "careless" in this case? The
client doesn't have enough context to validate this itself (without
having access to the end-pointer)?

> Thanks for the review, and apologies for being sluggish.

No worries, thanks for the work so far! :)
Thierry Reding May 2, 2014, 3:16 p.m. UTC | #4
On Fri, May 02, 2014 at 04:53:55PM +0200, Erik Faye-Lund wrote:
> On Fri, May 2, 2014 at 4:06 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
> > On Thu, Apr 10, 2014 at 07:13:22PM +0200, Erik Faye-Lund wrote:
[...]
> >> > +static inline struct drm_tegra_pushbuf_private *
> >> > +pushbuf_priv(struct drm_tegra_pushbuf *pb)
> >> > +{
> >> > +       return container_of(pb, struct drm_tegra_pushbuf_private, base);
> >> > +}
> >> > +
> >>
> >> This seems to be the only use-case for container_of, and it just
> >> happens to return the exact same pointer as was passed in... so do we
> >> really need that helper?
> >
> > It has the benefit of keeping this code working even when somebody
> > suddenly adds to the beginning of drm_tegra_pushbuf_private. I'd rather
> > not resolve to force casting just to be on the safe side.
> 
> Is that a really a legitimate worry? There's a very clear relationship
> between the two structures. Both nouveau and freedreno does the same
> thing (for nouveau_bo_priv, nouveau_device_priv, msm_device, msm_pipe
> and msm_bo structures), and it seems to work fine for them. Perhaps a
> comment along the lines of "/* this needs to be the first member */"
> is enough to discourage people from messing with it?

Oh well, if everybody else is doing it and you keep insisting I'll just
drop it. I do like explicit upcasting, but I guess doing it implicitly
will work as well. Worst case this will crash on people if they don't
know what they're doing.

> >> > diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
> >> > new file mode 100644
> >> > index 000000000000..178d5cd57541
> >> > --- /dev/null
> >> > +++ b/tegra/pushbuf.c
> >> > @@ -0,0 +1,205 @@
> >> <snip>
> >> > +drm_public
> >> > +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
> >> > +                         struct drm_tegra_job *job)
> >> > +{
> >> > +       struct drm_tegra_pushbuf_private *pushbuf;
> >> > +       void *ptr;
> >> > +       int err;
> >> > +
> >> > +       pushbuf = calloc(1, sizeof(*pushbuf));
> >> > +       if (!pushbuf)
> >> > +               return -ENOMEM;
> >> > +
> >> > +       DRMINITLISTHEAD(&pushbuf->list);
> >> > +       DRMINITLISTHEAD(&pushbuf->bos);
> >> > +       pushbuf->job = job;
> >> > +
> >> > +       *pushbufp = &pushbuf->base;
> >> > +
> >> > +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
> >>
> >> Hmm. So the job keeps a list of pushbufs. What purpose does this
> >> serve? Shouldn't the job only need the cmdbuf-list (which it already
> >> has) and the BOs (so they can be dereferenced after being submitted)?
> >
> > This is currently used to keep track of existing pushbuffers and make
> > sure that they are freed (when the job is freed).
> >
> 
> OK, it seems to me that the need for keeping the pushbuffers around is
> simply not there. Only the BOs needs to be kept around, no?

Right. But unless we come up with an alternative API I can't just drop
this. Otherwise we'll be leaking pushbuffers all over the place if
callers don't clean them up themselves.

> > If reusing the pushbuf objects makes sense, then perhaps a different API
> > would be more useful. Rather than allocate in the context of a job, they
> > could be allocated in a channel-wide context and added to/associated
> > with a job only as needed.
> 
> Yes, I think this might make sense. I'll have to ponder a bit more
> about this, though.

What I've been thinking is something rougly like this:

int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbuf, struct
			  drm_tegra_channel *channel);
int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf);
int drm_tegra_pushbuf_reset(struct drm_tegra_pushbuf *pushbuf);

int drm_tegra_job_queue(struct drm_tegra_job *job,
			struct drm_tegra_pushbuf *pushbuf);

Then we could either reset the pushbuf in drm_tegra_job_queue() (in
which case drm_tegra_pushbuf_reset() could be private) or leave it up to
the caller to manually reset when they need to reuse the pushbuffer.

> >> Either way, I think this should either call drm_tegra_pushbuf_prepare
> >> to make sure two words are available, or be renamed to reflect that it
> >> causes a push (or two, as in the current form).
> >
> > I've added a call to drm_tegra_pushbuf_prepare() here and in
> > drm_tegra_pushbuf_relocate() which also pushes one word to the pushbuf.
> >
> 
> Actually, I think drm_tegra_pushbuf_relocate should just be renamed to
> drm_tegra_pushbuf_push_reloc(), as that conveys quite clearly that it
> is just a push-helper and need to be counted when preparing. That way
> it doesn't need to prepare itself.

I don't think we necessarily need to rename the function to make it
obvious. This is completely new API anyway, so we can add comments to
explain that drm_tegra_pushbuf_relocate() will push a placeholder word
with a dummy address that will be used to relocate a BO and needs to be
included when preparing a pushbuffer.

> >> > +struct drm_tegra_channel;
> >> > +struct drm_tegra_job;
> >> > +
> >> > +struct drm_tegra_pushbuf {
> >> > +       uint32_t *ptr;
> >> > +};
> >>
> >> Hmm, single-member structs always makes me cringe a bit. But in this
> >> case it's much better than a double-pointer IMO.
> >>
> >> But perhaps some of the members in the private-struct might be useful
> >> out here? For instance, if "uint32_t *end;" was also available, we
> >> could do:
> >>
> >> inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
> >> *pushbuf, uint32_t value)
> >> {
> >>        assert(pushbuf->ptr < pushbuf->end);
> >>        *pushbuf->ptr++ = value;
> >> }
> >>
> >> ...and easily pick up bugs with too few words? The helper would of
> >> course be in the header-file, so the write gets inlined nicely.
> >
> > Sounds good. If you have no strong objections, I'd like to keep that for
> > a follow on patch when there's more code that actively uses this API. It
> > should be fairly safe to make more members public via drm_tegra_pushbuf
> > rather than the other way around, so I'd like to err on the side of
> > carefulness for a bit longer.
> 
> Hmm, isn't it the *current* code that is "careless" in this case? The
> client doesn't have enough context to validate this itself (without
> having access to the end-pointer)?

What I meant was being careful about how much we expose. Whatever we
expose now we can potentially never hide again because code may depend
on it being public. I don't think it's particularly careless to require
the caller to prepare a buffer. If they then write past its end, that's
their issue. That's like malloc()ing a buffer and then filling it with
more bytes than you've allocated. That's not the level of safety that
this API needs in my opinion.

Thierry
Erik Faye-Lund May 2, 2014, 3:59 p.m. UTC | #5
On Fri, May 2, 2014 at 5:16 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
> On Fri, May 02, 2014 at 04:53:55PM +0200, Erik Faye-Lund wrote:
>> On Fri, May 2, 2014 at 4:06 PM, Thierry Reding <thierry.reding@gmail.com> wrote:
>> > On Thu, Apr 10, 2014 at 07:13:22PM +0200, Erik Faye-Lund wrote:
>> >> > diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
>> >> > new file mode 100644
>> >> > index 000000000000..178d5cd57541
>> >> > --- /dev/null
>> >> > +++ b/tegra/pushbuf.c
>> >> > @@ -0,0 +1,205 @@
>> >> <snip>
>> >> > +drm_public
>> >> > +int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
>> >> > +                         struct drm_tegra_job *job)
>> >> > +{
>> >> > +       struct drm_tegra_pushbuf_private *pushbuf;
>> >> > +       void *ptr;
>> >> > +       int err;
>> >> > +
>> >> > +       pushbuf = calloc(1, sizeof(*pushbuf));
>> >> > +       if (!pushbuf)
>> >> > +               return -ENOMEM;
>> >> > +
>> >> > +       DRMINITLISTHEAD(&pushbuf->list);
>> >> > +       DRMINITLISTHEAD(&pushbuf->bos);
>> >> > +       pushbuf->job = job;
>> >> > +
>> >> > +       *pushbufp = &pushbuf->base;
>> >> > +
>> >> > +       DRMLISTADD(&pushbuf->list, &job->pushbufs);
>> >>
>> >> Hmm. So the job keeps a list of pushbufs. What purpose does this
>> >> serve? Shouldn't the job only need the cmdbuf-list (which it already
>> >> has) and the BOs (so they can be dereferenced after being submitted)?
>> >
>> > This is currently used to keep track of existing pushbuffers and make
>> > sure that they are freed (when the job is freed).
>> >
>>
>> OK, it seems to me that the need for keeping the pushbuffers around is
>> simply not there. Only the BOs needs to be kept around, no?
>
> Right. But unless we come up with an alternative API I can't just drop
> this. Otherwise we'll be leaking pushbuffers all over the place if
> callers don't clean them up themselves.
>

OK. But just to get some clarity, if the same level of abstraction
calls drm_tegra_pushbuf_new() and drm_tegra_pushbuf_free() when it's
done, we'll currently get use-after-free and double-frees, right?

As-is, the client would have to call drm_tegra_pushbuf_new(), transfer
the ownership of that object to the job object by calling
drm_tegra_pushbuf_prepare(), and clean it up by calling
drm_tegra_job_submit()? Or am I missing something?

>> > If reusing the pushbuf objects makes sense, then perhaps a different API
>> > would be more useful. Rather than allocate in the context of a job, they
>> > could be allocated in a channel-wide context and added to/associated
>> > with a job only as needed.
>>
>> Yes, I think this might make sense. I'll have to ponder a bit more
>> about this, though.
>
> What I've been thinking is something rougly like this:
>
> int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbuf, struct
>                           drm_tegra_channel *channel);
> int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf);
> int drm_tegra_pushbuf_reset(struct drm_tegra_pushbuf *pushbuf);
>
> int drm_tegra_job_queue(struct drm_tegra_job *job,
>                         struct drm_tegra_pushbuf *pushbuf);
>
> Then we could either reset the pushbuf in drm_tegra_job_queue() (in
> which case drm_tegra_pushbuf_reset() could be private) or leave it up to
> the caller to manually reset when they need to reuse the pushbuffer.
>

OK, this looks better to me. I'll still have to wrap my head around
these things a bit better to tell for sure, though ;)

>> >> Either way, I think this should either call drm_tegra_pushbuf_prepare
>> >> to make sure two words are available, or be renamed to reflect that it
>> >> causes a push (or two, as in the current form).
>> >
>> > I've added a call to drm_tegra_pushbuf_prepare() here and in
>> > drm_tegra_pushbuf_relocate() which also pushes one word to the pushbuf.
>> >
>>
>> Actually, I think drm_tegra_pushbuf_relocate should just be renamed to
>> drm_tegra_pushbuf_push_reloc(), as that conveys quite clearly that it
>> is just a push-helper and need to be counted when preparing. That way
>> it doesn't need to prepare itself.
>
> I don't think we necessarily need to rename the function to make it
> obvious. This is completely new API anyway, so we can add comments to
> explain that drm_tegra_pushbuf_relocate() will push a placeholder word
> with a dummy address that will be used to relocate a BO and needs to be
> included when preparing a pushbuffer.
>

True.

I have a "secret plan" here, which I guess I should just be open about:

I wonder if it would make sense in the long run to add multiple
push-variants, to save client code from repeating the same logic,
similar to the drm_tegra_pushbuf_push_word I mentioned above:

inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
*pushbuf, uint32_t value)
{
    assert(pushbuf->ptr < pushbuf->end);
    *pushbuf->ptr++ = value;
}

inline void drm_tegra_pushbuf_push_float(struct drm_tegra_pushbuf
*pushbuf, float value)
{
    union {
        float f;
        uint32_t i;
    } u;
    u.f = value;
    drm_tegra_pushbuf_push_word(pushbuf, u.i);
}

And probably a similar version for FP20. My thinking was that a reloc
belongs in that same category, and probably should have a similar
interface.

...But thinking about this a bit more, I think libdrm is the wrong
place to add these things. Most registers are packed bits of sorts, so
I guess the client code *needs* to wrap up loads of similar things
anyway. And besides, DDX doesn't care about many of these, so perhaps
Mesa is a better suited place for these "higher-level pushes".

>> >> But perhaps some of the members in the private-struct might be useful
>> >> out here? For instance, if "uint32_t *end;" was also available, we
>> >> could do:
>> >>
>> >> inline void drm_tegra_pushbuf_push_word(struct drm_tegra_pushbuf
>> >> *pushbuf, uint32_t value)
>> >> {
>> >>        assert(pushbuf->ptr < pushbuf->end);
>> >>        *pushbuf->ptr++ = value;
>> >> }
>> >>
>> >> ...and easily pick up bugs with too few words? The helper would of
>> >> course be in the header-file, so the write gets inlined nicely.
>> >
>> > Sounds good. If you have no strong objections, I'd like to keep that for
>> > a follow on patch when there's more code that actively uses this API. It
>> > should be fairly safe to make more members public via drm_tegra_pushbuf
>> > rather than the other way around, so I'd like to err on the side of
>> > carefulness for a bit longer.
>>
>> Hmm, isn't it the *current* code that is "careless" in this case? The
>> client doesn't have enough context to validate this itself (without
>> having access to the end-pointer)?
>
> What I meant was being careful about how much we expose. Whatever we
> expose now we can potentially never hide again because code may depend
> on it being public.

Fair enough. We're still hidden behind an experimental api-switch ATM.
But yeah, we should be careful about what we expose.

> I don't think it's particularly careless to require
> the caller to prepare a buffer. If they then write past its end, that's
> their issue. That's like malloc()ing a buffer and then filling it with
> more bytes than you've allocated. That's not the level of safety that
> this API needs in my opinion.

True, but we should also try to be helpful in finding out what went
wrong. And to follow up on your malloc-example, most sane CRTs have
some heap-corruption detection mechanisms to aid debugging. I don't
think this suggestion in particular is bending over backwards, though.
diff mbox

Patch

diff --git a/tegra/Makefile.am b/tegra/Makefile.am
index 1b83145b120d..c73587e8661e 100644
--- a/tegra/Makefile.am
+++ b/tegra/Makefile.am
@@ -11,6 +11,10 @@  libdrm_tegra_la_LDFLAGS = -version-number 0:0:0 -no-undefined
 libdrm_tegra_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@
 
 libdrm_tegra_la_SOURCES = \
+	channel.c \
+	fence.c \
+	job.c \
+	pushbuf.c \
 	tegra.c
 
 libdrm_tegraincludedir = ${includedir}/libdrm
diff --git a/tegra/channel.c b/tegra/channel.c
new file mode 100644
index 000000000000..3ab1d578f8e5
--- /dev/null
+++ b/tegra/channel.c
@@ -0,0 +1,127 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <string.h>
+
+#include "private.h"
+
+static int drm_tegra_channel_setup(struct drm_tegra_channel *channel)
+{
+	struct drm_tegra *drm = channel->drm;
+	struct drm_tegra_get_syncpt args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.context = channel->context;
+	args.index = 0;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_GET_SYNCPT, &args);
+	if (err < 0)
+		return -errno;
+
+	channel->syncpt = args.id;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_channel_open(struct drm_tegra_channel **channelp,
+			   struct drm_tegra *drm,
+			   enum drm_tegra_class client)
+{
+	struct drm_tegra_open_channel args;
+	struct drm_tegra_channel *channel;
+	enum host1x_class class;
+	int err;
+
+	switch (client) {
+	case DRM_TEGRA_GR2D:
+		class = HOST1X_CLASS_GR2D;
+		break;
+
+	case DRM_TEGRA_GR3D:
+		class = HOST1X_CLASS_GR3D;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	channel = calloc(1, sizeof(*channel));
+	if (!channel)
+		return -ENOMEM;
+
+	channel->drm = drm;
+
+	memset(&args, 0, sizeof(args));
+	args.client = class;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_OPEN_CHANNEL, &args);
+	if (err < 0) {
+		free(channel);
+		return -errno;
+	}
+
+	channel->context = args.context;
+	channel->class = class;
+
+	err = drm_tegra_channel_setup(channel);
+	if (err < 0) {
+		free(channel);
+		return err;
+	}
+
+	*channelp = channel;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_channel_close(struct drm_tegra_channel *channel)
+{
+	struct drm_tegra_close_channel args;
+	struct drm_tegra *drm;
+	int err;
+
+	if (!channel)
+		return -EINVAL;
+
+	drm = channel->drm;
+
+	memset(&args, 0, sizeof(args));
+	args.context = channel->context;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_CLOSE_CHANNEL, &args);
+	if (err < 0)
+		return -errno;
+
+	free(channel);
+
+	return 0;
+}
diff --git a/tegra/fence.c b/tegra/fence.c
new file mode 100644
index 000000000000..f58725ca8472
--- /dev/null
+++ b/tegra/fence.c
@@ -0,0 +1,71 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <string.h>
+
+#include "private.h"
+
+drm_public
+int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
+				 unsigned long timeout)
+{
+	struct drm_tegra_syncpt_wait args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.id = fence->syncpt;
+	args.thresh = fence->value;
+	args.timeout = timeout;
+
+	while (true) {
+		err = ioctl(fence->drm->fd, DRM_IOCTL_TEGRA_SYNCPT_WAIT, &args);
+		if (err < 0) {
+			if (errno == EINTR)
+				continue;
+
+			return -errno;
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_fence_wait(struct drm_tegra_fence *fence)
+{
+	return drm_tegra_fence_wait_timeout(fence, -1);
+}
+
+drm_public
+void drm_tegra_fence_free(struct drm_tegra_fence *fence)
+{
+	free(fence);
+}
diff --git a/tegra/job.c b/tegra/job.c
new file mode 100644
index 000000000000..8df643c9a9bb
--- /dev/null
+++ b/tegra/job.c
@@ -0,0 +1,180 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "private.h"
+
+int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
+			    const struct drm_tegra_reloc *reloc)
+{
+	struct drm_tegra_reloc *relocs;
+	size_t size;
+
+	size = (job->num_relocs + 1) * sizeof(*reloc);
+
+	relocs = realloc(job->relocs, size);
+	if (!reloc)
+		return -ENOMEM;
+
+	job->relocs = relocs;
+
+	job->relocs[job->num_relocs++] = *reloc;
+
+	return 0;
+}
+
+int drm_tegra_job_add_cmdbuf(struct drm_tegra_job *job,
+			     const struct drm_tegra_cmdbuf *cmdbuf)
+{
+	struct drm_tegra_cmdbuf *cmdbufs;
+	size_t size;
+
+	size = (job->num_cmdbufs + 1) * sizeof(*cmdbuf);
+
+	cmdbufs = realloc(job->cmdbufs, size);
+	if (!cmdbufs)
+		return -ENOMEM;
+
+	cmdbufs[job->num_cmdbufs++] = *cmdbuf;
+	job->cmdbufs = cmdbufs;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_new(struct drm_tegra_job **jobp,
+		      struct drm_tegra_channel *channel)
+{
+	struct drm_tegra_job *job;
+
+	job = calloc(1, sizeof(*job));
+	if (!job)
+		return -ENOMEM;
+
+	DRMINITLISTHEAD(&job->pushbufs);
+	job->channel = channel;
+
+	*jobp = job;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_free(struct drm_tegra_job *job)
+{
+	struct drm_tegra_pushbuf_private *pushbuf;
+
+	if (!job)
+		return -EINVAL;
+
+	DRMLISTFOREACHENTRY(pushbuf, &job->pushbufs, list)
+		drm_tegra_pushbuf_free(&pushbuf->base);
+
+	free(job->cmdbufs);
+	free(job->relocs);
+	free(job);
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_job_submit(struct drm_tegra_job *job,
+			 struct drm_tegra_fence **fencep)
+{
+	struct drm_tegra *drm = job->channel->drm;
+	struct drm_tegra_pushbuf_private *pushbuf;
+	struct drm_tegra_fence *fence = NULL;
+	struct drm_tegra_cmdbuf *cmdbufs;
+	struct drm_tegra_syncpt *syncpts;
+	struct drm_tegra_submit args;
+	unsigned int i;
+	int err;
+
+	/*
+	 * Make sure the current command stream buffer is queued for
+	 * submission.
+	 */
+	err = drm_tegra_pushbuf_queue(job->pushbuf);
+	if (err < 0)
+		return err;
+
+	job->pushbuf = NULL;
+
+	if (fencep) {
+		fence = calloc(1, sizeof(*fence));
+		if (!fence)
+			return -ENOMEM;
+	}
+
+	syncpts = calloc(1, sizeof(*syncpts));
+	if (!syncpts) {
+		free(cmdbufs);
+		free(fence);
+		return -ENOMEM;
+	}
+
+	syncpts[0].id = job->syncpt;
+	syncpts[0].incrs = job->increments;
+
+	memset(&args, 0, sizeof(args));
+	args.context = job->channel->context;
+	args.num_syncpts = 1;
+	args.num_cmdbufs = job->num_cmdbufs;
+	args.num_relocs = job->num_relocs;
+	args.num_waitchks = 0;
+	args.waitchk_mask = 0;
+	args.timeout = 1000;
+
+	args.syncpts = (uintptr_t)syncpts;
+	args.cmdbufs = (uintptr_t)job->cmdbufs;
+	args.relocs = (uintptr_t)job->relocs;
+	args.waitchks = 0;
+
+	err = ioctl(drm->fd, DRM_IOCTL_TEGRA_SUBMIT, &args);
+	if (err < 0) {
+		free(syncpts);
+		free(cmdbufs);
+		free(fence);
+		return -errno;
+	}
+
+	if (fence) {
+		fence->syncpt = job->syncpt;
+		fence->value = args.fence;
+		fence->drm = drm;
+		*fencep = fence;
+	}
+
+	free(syncpts);
+	free(cmdbufs);
+
+	return 0;
+}
diff --git a/tegra/private.h b/tegra/private.h
index 9b6bc9395d23..fc74fb56b58d 100644
--- a/tegra/private.h
+++ b/tegra/private.h
@@ -26,13 +26,31 @@ 
 #define __DRM_TEGRA_PRIVATE_H__ 1
 
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
 
 #include <libdrm.h>
+#include <libdrm_lists.h>
 #include <xf86atomic.h>
 
+#include "tegra_drm.h"
 #include "tegra.h"
 
+#define container_of(ptr, type, member) ({				\
+		const typeof(((type *)0)->member) *__mptr = (ptr);	\
+		(type *)((char *)__mptr - offsetof(type, member));	\
+	})
+
+#define align(offset, align) \
+	(((offset) + (align) - 1) & ~((align) - 1))
+
+enum host1x_class {
+	HOST1X_CLASS_HOST1X = 0x01,
+	HOST1X_CLASS_GR2D = 0x51,
+	HOST1X_CLASS_GR2D_SB = 0x52,
+	HOST1X_CLASS_GR3D = 0x60,
+};
+
 struct drm_tegra {
 	bool close;
 	int fd;
@@ -40,6 +58,7 @@  struct drm_tegra {
 
 struct drm_tegra_bo {
 	struct drm_tegra *drm;
+	drmMMListHead list;
 	uint32_t handle;
 	uint32_t offset;
 	uint32_t flags;
@@ -48,4 +67,57 @@  struct drm_tegra_bo {
 	void *map;
 };
 
+struct drm_tegra_channel {
+	struct drm_tegra *drm;
+	enum host1x_class class;
+	uint64_t context;
+	uint32_t syncpt;
+};
+
+struct drm_tegra_fence {
+	struct drm_tegra *drm;
+	uint32_t syncpt;
+	uint32_t value;
+};
+
+struct drm_tegra_pushbuf_private {
+	struct drm_tegra_pushbuf base;
+	struct drm_tegra_job *job;
+	drmMMListHead list;
+	drmMMListHead bos;
+
+	struct drm_tegra_bo *bo;
+	uint32_t *start;
+	uint32_t *end;
+};
+
+static inline struct drm_tegra_pushbuf_private *
+pushbuf_priv(struct drm_tegra_pushbuf *pb)
+{
+	return container_of(pb, struct drm_tegra_pushbuf_private, base);
+}
+
+int drm_tegra_pushbuf_queue(struct drm_tegra_pushbuf_private *pushbuf);
+
+struct drm_tegra_job {
+	struct drm_tegra_channel *channel;
+
+	unsigned int increments;
+	uint32_t syncpt;
+
+	struct drm_tegra_reloc *relocs;
+	unsigned int num_relocs;
+
+	struct drm_tegra_cmdbuf *cmdbufs;
+	unsigned int num_cmdbufs;
+
+	struct drm_tegra_pushbuf_private *pushbuf;
+	drmMMListHead pushbufs;
+};
+
+int drm_tegra_job_add_reloc(struct drm_tegra_job *job,
+			    const struct drm_tegra_reloc *reloc);
+int drm_tegra_job_add_cmdbuf(struct drm_tegra_job *job,
+			     const struct drm_tegra_cmdbuf *cmdbuf);
+
 #endif /* __DRM_TEGRA_PRIVATE_H__ */
diff --git a/tegra/pushbuf.c b/tegra/pushbuf.c
new file mode 100644
index 000000000000..178d5cd57541
--- /dev/null
+++ b/tegra/pushbuf.c
@@ -0,0 +1,205 @@ 
+/*
+ * Copyright © 2012, 2013 Thierry Reding
+ * Copyright © 2013 Erik Faye-Lund
+ * Copyright © 2014 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "private.h"
+
+#define HOST1X_OPCODE_NONINCR(offset, count) \
+	((0x2 << 28) | (((offset) & 0xfff) << 16) | ((count) & 0xffff))
+
+int drm_tegra_pushbuf_queue(struct drm_tegra_pushbuf_private *pushbuf)
+{
+	struct drm_tegra_cmdbuf cmdbuf;
+	int err;
+
+	if (!pushbuf || !pushbuf->bo)
+		return 0;
+
+	/* unmap buffer object since it won't be accessed anymore */
+	drm_tegra_bo_unmap(pushbuf->bo);
+
+	/* add buffer object as command buffers for this job */
+	memset(&cmdbuf, 0, sizeof(cmdbuf));
+	cmdbuf.words = pushbuf->base.ptr - pushbuf->start;
+	cmdbuf.handle = pushbuf->bo->handle;
+	cmdbuf.offset = 0;
+
+	err = drm_tegra_job_add_cmdbuf(pushbuf->job, &cmdbuf);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static inline unsigned long
+drm_tegra_pushbuf_get_offset(struct drm_tegra_pushbuf *pushbuf)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+
+	return (unsigned long)pushbuf->ptr - (unsigned long)priv->start;
+}
+
+drm_public
+int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
+			  struct drm_tegra_job *job)
+{
+	struct drm_tegra_pushbuf_private *pushbuf;
+	void *ptr;
+	int err;
+
+	pushbuf = calloc(1, sizeof(*pushbuf));
+	if (!pushbuf)
+		return -ENOMEM;
+
+	DRMINITLISTHEAD(&pushbuf->list);
+	DRMINITLISTHEAD(&pushbuf->bos);
+	pushbuf->job = job;
+
+	*pushbufp = &pushbuf->base;
+
+	DRMLISTADD(&pushbuf->list, &job->pushbufs);
+	job->pushbuf = pushbuf;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+	struct drm_tegra_bo *bo, *tmp;
+
+	if (!pushbuf)
+		return -EINVAL;
+
+	drm_tegra_bo_unmap(priv->bo);
+
+	DRMLISTFOREACHENTRYSAFE(bo, tmp, &priv->bos, list)
+		drm_tegra_bo_put(priv->bo);
+
+	DRMLISTDEL(&priv->list);
+	free(priv);
+
+	return 0;
+}
+
+/**
+ * drm_tegra_pushbuf_prepare() - prepare push buffer for a series of pushes
+ * @pushbuf: push buffer
+ * @words: maximum number of words in series of pushes to follow
+ */
+drm_public
+int drm_tegra_pushbuf_prepare(struct drm_tegra_pushbuf *pushbuf,
+			      unsigned int words)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+	struct drm_tegra_channel *channel = priv->job->channel;
+	struct drm_tegra_bo *bo;
+	void *ptr;
+	int err;
+
+	if (priv->bo && (pushbuf->ptr + words < priv->end))
+		return 0;
+
+	/*
+	 * Align to full pages, since buffer object allocations are page
+	 * granular anyway.
+	 */
+	words = align(words, 1024);
+
+	err = drm_tegra_bo_new(&bo, channel->drm, 0, words * sizeof(uint32_t));
+	if (err < 0)
+		return err;
+
+	err = drm_tegra_bo_map(bo, &ptr);
+	if (err < 0) {
+		drm_tegra_bo_put(bo);
+		return err;
+	}
+
+	/* queue current command stream buffer for submission */
+	err = drm_tegra_pushbuf_queue(priv);
+	if (err < 0) {
+		drm_tegra_bo_unmap(bo);
+		drm_tegra_bo_put(bo);
+		return err;
+	}
+
+	DRMLISTADD(&bo->list, &priv->bos);
+
+	priv->start = priv->base.ptr = ptr;
+	priv->end = priv->start + bo->size;
+	priv->bo = bo;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
+			       struct drm_tegra_bo *target,
+			       unsigned long offset,
+			       unsigned long shift)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+	struct drm_tegra_reloc reloc;
+	int err;
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.cmdbuf.handle = priv->bo->handle;
+	reloc.cmdbuf.offset = drm_tegra_pushbuf_get_offset(pushbuf);
+	reloc.target.handle = target->handle;
+	reloc.target.offset = offset;
+	reloc.shift = shift;
+
+	err = drm_tegra_job_add_reloc(priv->job, &reloc);
+	if (err < 0)
+		return err;
+
+	*pushbuf->ptr++ = 0xdeadbeef;
+
+	return 0;
+}
+
+drm_public
+int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
+			   enum drm_tegra_syncpt_cond cond)
+{
+	struct drm_tegra_pushbuf_private *priv = pushbuf_priv(pushbuf);
+
+	if (cond >= DRM_TEGRA_SYNCPT_COND_MAX)
+		return -EINVAL;
+
+	*pushbuf->ptr++ = HOST1X_OPCODE_NONINCR(0x0, 0x1);
+	*pushbuf->ptr++ = cond << 8 | priv->job->syncpt;
+	priv->job->increments++;
+
+	return 0;
+}
diff --git a/tegra/tegra.c b/tegra/tegra.c
index bf6d035453d5..1f7b9e7345b6 100644
--- a/tegra/tegra.c
+++ b/tegra/tegra.c
@@ -56,6 +56,7 @@  static void drm_tegra_bo_free(struct drm_tegra_bo *bo)
 
 	drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &args);
 
+	DRMLISTDEL(&bo->list);
 	free(bo);
 }
 
@@ -127,6 +128,7 @@  int drm_tegra_bo_new(struct drm_tegra_bo **bop, struct drm_tegra *drm,
 	if (!bo)
 		return -ENOMEM;
 
+	DRMINITLISTHEAD(&bo->list);
 	atomic_set(&bo->ref, 1);
 	bo->flags = flags;
 	bo->size = size;
diff --git a/tegra/tegra.h b/tegra/tegra.h
index 0731cb3bd4dc..4cf3b49a3e3e 100644
--- a/tegra/tegra.h
+++ b/tegra/tegra.h
@@ -28,6 +28,13 @@ 
 #include <stdint.h>
 #include <stdlib.h>
 
+#include <tegra_drm.h>
+
+enum drm_tegra_class {
+	DRM_TEGRA_GR2D,
+	DRM_TEGRA_GR3D,
+};
+
 struct drm_tegra_bo;
 struct drm_tegra;
 
@@ -44,4 +51,49 @@  int drm_tegra_bo_get_handle(struct drm_tegra_bo *bo, uint32_t *handle);
 int drm_tegra_bo_map(struct drm_tegra_bo *bo, void **ptr);
 int drm_tegra_bo_unmap(struct drm_tegra_bo *bo);
 
+struct drm_tegra_channel;
+struct drm_tegra_job;
+
+struct drm_tegra_pushbuf {
+	uint32_t *ptr;
+};
+
+struct drm_tegra_fence;
+
+enum drm_tegra_syncpt_cond {
+	DRM_TEGRA_SYNCPT_COND_IMMEDIATE,
+	DRM_TEGRA_SYNCPT_COND_OP_DONE,
+	DRM_TEGRA_SYNCPT_COND_RD_DONE,
+	DRM_TEGRA_SYNCPT_COND_WR_SAFE,
+	DRM_TEGRA_SYNCPT_COND_MAX,
+};
+
+int drm_tegra_channel_open(struct drm_tegra_channel **channelp,
+			   struct drm_tegra *drm,
+			   enum drm_tegra_class client);
+int drm_tegra_channel_close(struct drm_tegra_channel *channel);
+
+int drm_tegra_job_new(struct drm_tegra_job **jobp,
+		      struct drm_tegra_channel *channel);
+int drm_tegra_job_free(struct drm_tegra_job *job);
+int drm_tegra_job_submit(struct drm_tegra_job *job,
+			 struct drm_tegra_fence **fencep);
+
+int drm_tegra_pushbuf_new(struct drm_tegra_pushbuf **pushbufp,
+			  struct drm_tegra_job *job);
+int drm_tegra_pushbuf_free(struct drm_tegra_pushbuf *pushbuf);
+int drm_tegra_pushbuf_prepare(struct drm_tegra_pushbuf *pushbuf,
+			      unsigned int words);
+int drm_tegra_pushbuf_relocate(struct drm_tegra_pushbuf *pushbuf,
+			       struct drm_tegra_bo *target,
+			       unsigned long offset,
+			       unsigned long shift);
+int drm_tegra_pushbuf_sync(struct drm_tegra_pushbuf *pushbuf,
+			   enum drm_tegra_syncpt_cond cond);
+
+int drm_tegra_fence_wait_timeout(struct drm_tegra_fence *fence,
+				 unsigned long timeout);
+int drm_tegra_fence_wait(struct drm_tegra_fence *fence);
+void drm_tegra_fence_free(struct drm_tegra_fence *fence);
+
 #endif /* __DRM_TEGRA_H__ */