diff mbox

[v6,14/19] arm64:ilp32: add sys_ilp32.c and a separate table (in entry.S) to use it

Message ID 1447795019-30176-15-git-send-email-ynorov@caviumnetworks.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yury Norov Nov. 17, 2015, 9:16 p.m. UTC
From: Andrew Pinski <apinski@cavium.com>

Add a separate syscall-table for ILP32, which dispatches either to native
LP64 system call implementation or to compat-syscalls, as appropriate.

Reviewed-by: David Daney <ddaney@caviumnetworks.com>

Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>
---
 arch/arm64/include/asm/unistd.h |   7 ++-
 arch/arm64/kernel/Makefile      |   1 +
 arch/arm64/kernel/entry.S       |  12 +++-
 arch/arm64/kernel/sys_ilp32.c   | 118 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/kernel/sys_ilp32.c

Comments

Arnd Bergmann Nov. 17, 2015, 9:57 p.m. UTC | #1
On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
> From: Andrew Pinski <apinski@cavium.com>
> 
> Add a separate syscall-table for ILP32, which dispatches either to native
> LP64 system call implementation or to compat-syscalls, as appropriate.

I like it much better than the previous version, thanks for the rework!

However, it seems that you accidentally have a lot more redirects
than you should have:

> +/* Using non-compat syscalls where necessary */
> +#define compat_sys_fadvise64_64        sys_fadvise64_64
> +#define compat_sys_fallocate           sys_fallocate
> +#define compat_sys_ftruncate64         sys_ftruncate
> +#define compat_sys_pread64             sys_pread64
> +#define compat_sys_pwrite64            sys_pwrite64
> +#define compat_sys_readahead           sys_readahead

Makes sense. These of course all require the respective changes
in glibc as discussed in the thread regarding loff_t handling.

> +#define compat_sys_rt_sigaction        sys_rt_sigaction
> +#define compat_sys_shmat               sys_shmat

What's special about compat_sys_shmat?

> +#define compat_sys_sync_file_range     sys_sync_file_range
> +#define compat_sys_truncate64          sys_truncate
> +#define compat_sys_sigaltstack         sys_sigaltstack
> +
> +#define compat_sys_io_getevents        sys_io_getevents

io_getevents seems wrong, you are passing the wrong timespec and
aio_context_t here.

> +#define compat_sys_lookup_dcookie      sys_lookup_dcookie
> +#define compat_sys_epoll_pwait         sys_epoll_pwait

epoll_pwait takes sigset_t, which I'd assume is different between
ilp32 and lp64, so this is probably wrong too, at least on big-endian.

> +#define compat_sys_fcntl64             compat_sys_fcntl

This uses compat_off_t, not compat_loff_t, and needs to be changed.

> +#define compat_sys_signalfd4           sys_signalfd4
> +#define compat_sys_rt_sigsuspend       sys_rt_sigsuspend
> +#define compat_sys_rt_sigprocmask      sys_rt_sigprocmask
> +#define compat_sys_rt_sigpending       sys_rt_sigpending

sigset_t again, all four of these.

> +#define compat_sys_rt_sigqueueinfo     sys_rt_sigqueueinfo

this looks ok though, as you have the 64-bit siginfo

> +#define compat_sys_semtimedop          sys_semtimedop

timespec again

> +#define compat_sys_rt_tgsigqueueinfo   sys_rt_tgsigqueueinfo
> +
> +#define compat_sys_timer_create        sys_timer_create
> +#define compat_sys_timer_gettime       sys_timer_gettime
> +#define compat_sys_timer_settime       sys_timer_settime

timespec again for gettime/settime. create seems fine if you require
the use of the 64-bit sigevent (why?)

> +#define compat_sys_rt_sigtimedwait     sys_rt_sigtimedwait

This one probably needs a custom wrapper as you have the 64-bit
siginfo, but the 32-bit sigset and timespec.

> +#define compat_sys_mq_open             sys_mq_open
> +#define compat_sys_mq_timedsend        sys_mq_timedsend
> +#define compat_sys_mq_timedreceive     sys_mq_timedreceive
> +#define compat_sys_mq_getsetattr       sys_mq_getsetattr
> +#define compat_sys_mq_open             sys_mq_open

You have compat_sys_mq_open twice, and they all look wrong because
you get the wrong struct mq_attr.

> +#define compat_sys_open_by_handle_at   sys_open_by_handle_at

The only difference here is the forced O_LARGEFILE, but that
is set by glibc anyway, right?

> +#define compat_sys_clock_adjtime       sys_clock_adjtime

wrong timex structure

> +#define compat_sys_openat              sys_openat

same as open_by_handle_at

> +#define compat_sys_getdents64          sys_getdents64

glibc uses linux_dirent64 for 32-bit architectures, so this looks wrong

> +#define compat_sys_waitid              sys_waitid

This will probably need a separate wrapper to convert rusage but not siginfo

> +#define compat_sys_timer_settime       sys_timer_settime
> +#define compat_sys_sched_rr_get_interval sys_sched_rr_get_interval

timespec again

> +#define compat_sys_execveat            sys_execveat

This probably gives you the wrong struct user_arg_ptr

> +#define compat_sys_mq_notify           sys_mq_notify

ok.

> +#define compat_sys_clock_nanosleep     sys_clock_nanosleep
> +#define compat_sys_clock_getres        sys_clock_getres

timespec

> +#define sys_lseek                      sys_llseek

This seems pointless, as there is no sys_lseek

> +asmlinkage long compat_sys_mmap2_wrapper(void);
> +#define sys_mmap2                      compat_sys_mmap2_wrapper
> +
> +asmlinkage long compat_sys_fstatfs64_wrapper(void);
> +#define compat_sys_fstatfs64    compat_sys_fstatfs64_wrapper
> +asmlinkage long compat_sys_statfs64_wrapper(void);
> +#define compat_sys_statfs64             compat_sys_statfs64_wrapper

What are the wrappers for again? Maybe add a comment here.

> +#define compat_sys_preadv              compat_sys_preadv64
> +#define compat_sys_pwritev	       compat_sys_pwritev64

wrong iovec.

	Arnd
Arnd Bergmann Nov. 18, 2015, 8:14 a.m. UTC | #2
On Tuesday 17 November 2015 22:57:52 Arnd Bergmann wrote:
> > +#define compat_sys_open_by_handle_at   sys_open_by_handle_at
> 
> The only difference here is the forced O_LARGEFILE, but that
> is set by glibc anyway, right?
> 
> > +#define compat_sys_openat              sys_openat
> 
> same as open_by_handle_at
> 

I gave it some more thought, and I think we should actually stay
with sys_open_by_handle_at and sys_openat, but do it a little differently:

Forcing O_LARGEFILE is probably what we want for all architectures
going forward, so we should change asm-generic/unistd.h to use sys_openat
for compat mode, and override it for tile32 compat, which is the only
existing user of this table at the moment and which needs to
retain the existing behavior.

	Arnd
Andreas Schwab Nov. 19, 2015, 1:21 p.m. UTC | #3
Arnd Bergmann <arnd@arndb.de> writes:

> On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
>> +#define sys_lseek                      sys_llseek
>
> This seems pointless, as there is no sys_lseek

I think it should be the other way round.  Why would you want to use
llseek if you can pass loff_t in a single register?

Andreas.
Arnd Bergmann Nov. 19, 2015, 1:32 p.m. UTC | #4
On Thursday 19 November 2015 14:21:16 Andreas Schwab wrote:
> Arnd Bergmann <arnd@arndb.de> writes:
> 
> > On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
> >> +#define sys_lseek                      sys_llseek
> >
> > This seems pointless, as there is no sys_lseek
> 
> I think it should be the other way round.  Why would you want to use
> llseek if you can pass loff_t in a single register?

Right, it makes more sense the other way round.

	Arnd
Arnd Bergmann Nov. 30, 2015, 3:34 p.m. UTC | #5
On Tuesday 17 November 2015 22:57:52 Arnd Bergmann wrote:
> On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
> > From: Andrew Pinski <apinski@cavium.com>
> > 
> > Add a separate syscall-table for ILP32, which dispatches either to native
> > LP64 system call implementation or to compat-syscalls, as appropriate.
> 
> I like it much better than the previous version, thanks for the rework!

Hi Yuri,

you must have missed my reply below. Are you still working on ilp32
or did you drop this thread because you got distracted with something
else?

	Arnd

> However, it seems that you accidentally have a lot more redirects
> than you should have:
> 
> > +/* Using non-compat syscalls where necessary */
> > +#define compat_sys_fadvise64_64        sys_fadvise64_64
> > +#define compat_sys_fallocate           sys_fallocate
> > +#define compat_sys_ftruncate64         sys_ftruncate
> > +#define compat_sys_pread64             sys_pread64
> > +#define compat_sys_pwrite64            sys_pwrite64
> > +#define compat_sys_readahead           sys_readahead
> 
> Makes sense. These of course all require the respective changes
> in glibc as discussed in the thread regarding loff_t handling.
> 
> > +#define compat_sys_rt_sigaction        sys_rt_sigaction
> > +#define compat_sys_shmat               sys_shmat
> 
> What's special about compat_sys_shmat?
> 
> > +#define compat_sys_sync_file_range     sys_sync_file_range
> > +#define compat_sys_truncate64          sys_truncate
> > +#define compat_sys_sigaltstack         sys_sigaltstack
> > +
> > +#define compat_sys_io_getevents        sys_io_getevents
> 
> io_getevents seems wrong, you are passing the wrong timespec and
> aio_context_t here.
> 
> > +#define compat_sys_lookup_dcookie      sys_lookup_dcookie
> > +#define compat_sys_epoll_pwait         sys_epoll_pwait
> 
> epoll_pwait takes sigset_t, which I'd assume is different between
> ilp32 and lp64, so this is probably wrong too, at least on big-endian.
> 
> > +#define compat_sys_fcntl64             compat_sys_fcntl
> 
> This uses compat_off_t, not compat_loff_t, and needs to be changed.
> 
> > +#define compat_sys_signalfd4           sys_signalfd4
> > +#define compat_sys_rt_sigsuspend       sys_rt_sigsuspend
> > +#define compat_sys_rt_sigprocmask      sys_rt_sigprocmask
> > +#define compat_sys_rt_sigpending       sys_rt_sigpending
> 
> sigset_t again, all four of these.
> 
> > +#define compat_sys_rt_sigqueueinfo     sys_rt_sigqueueinfo
> 
> this looks ok though, as you have the 64-bit siginfo
> 
> > +#define compat_sys_semtimedop          sys_semtimedop
> 
> timespec again
> 
> > +#define compat_sys_rt_tgsigqueueinfo   sys_rt_tgsigqueueinfo
> > +
> > +#define compat_sys_timer_create        sys_timer_create
> > +#define compat_sys_timer_gettime       sys_timer_gettime
> > +#define compat_sys_timer_settime       sys_timer_settime
> 
> timespec again for gettime/settime. create seems fine if you require
> the use of the 64-bit sigevent (why?)
> 
> > +#define compat_sys_rt_sigtimedwait     sys_rt_sigtimedwait
> 
> This one probably needs a custom wrapper as you have the 64-bit
> siginfo, but the 32-bit sigset and timespec.
> 
> > +#define compat_sys_mq_open             sys_mq_open
> > +#define compat_sys_mq_timedsend        sys_mq_timedsend
> > +#define compat_sys_mq_timedreceive     sys_mq_timedreceive
> > +#define compat_sys_mq_getsetattr       sys_mq_getsetattr
> > +#define compat_sys_mq_open             sys_mq_open
> 
> You have compat_sys_mq_open twice, and they all look wrong because
> you get the wrong struct mq_attr.
> 
> > +#define compat_sys_open_by_handle_at   sys_open_by_handle_at
> 
> The only difference here is the forced O_LARGEFILE, but that
> is set by glibc anyway, right?
> 
> > +#define compat_sys_clock_adjtime       sys_clock_adjtime
> 
> wrong timex structure
> 
> > +#define compat_sys_openat              sys_openat
> 
> same as open_by_handle_at
> 
> > +#define compat_sys_getdents64          sys_getdents64
> 
> glibc uses linux_dirent64 for 32-bit architectures, so this looks wrong
> 
> > +#define compat_sys_waitid              sys_waitid
> 
> This will probably need a separate wrapper to convert rusage but not siginfo
> 
> > +#define compat_sys_timer_settime       sys_timer_settime
> > +#define compat_sys_sched_rr_get_interval sys_sched_rr_get_interval
> 
> timespec again
> 
> > +#define compat_sys_execveat            sys_execveat
> 
> This probably gives you the wrong struct user_arg_ptr
> 
> > +#define compat_sys_mq_notify           sys_mq_notify
> 
> ok.
> 
> > +#define compat_sys_clock_nanosleep     sys_clock_nanosleep
> > +#define compat_sys_clock_getres        sys_clock_getres
> 
> timespec
> 
> > +#define sys_lseek                      sys_llseek
> 
> This seems pointless, as there is no sys_lseek
> 
> > +asmlinkage long compat_sys_mmap2_wrapper(void);
> > +#define sys_mmap2                      compat_sys_mmap2_wrapper
> > +
> > +asmlinkage long compat_sys_fstatfs64_wrapper(void);
> > +#define compat_sys_fstatfs64    compat_sys_fstatfs64_wrapper
> > +asmlinkage long compat_sys_statfs64_wrapper(void);
> > +#define compat_sys_statfs64             compat_sys_statfs64_wrapper
> 
> What are the wrappers for again? Maybe add a comment here.
> 
> > +#define compat_sys_preadv              compat_sys_preadv64
> > +#define compat_sys_pwritev	       compat_sys_pwritev64
> 
> wrong iovec.
> 
> 	Arnd
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Yury Norov Nov. 30, 2015, 8:21 p.m. UTC | #6
On Mon, Nov 30, 2015 at 04:34:22PM +0100, Arnd Bergmann wrote:
> On Tuesday 17 November 2015 22:57:52 Arnd Bergmann wrote:
> > On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
> > > From: Andrew Pinski <apinski@cavium.com>
> > > 
> > > Add a separate syscall-table for ILP32, which dispatches either to native
> > > LP64 system call implementation or to compat-syscalls, as appropriate.
> > 
> > I like it much better than the previous version, thanks for the rework!
> 
> Hi Yuri,
> 
> you must have missed my reply below. Are you still working on ilp32
> or did you drop this thread because you got distracted with something
> else?
> 
> 	Arnd
> 

Hi Arnd,

I didn't miss it, and I continue with ILP32. I really appreciate your
attention and time you spend on ILP32.

There's a tricky bug with signal stack, that Andreas also discovered.
It makes almost all tests that use posix threads crash. I want to fix
it and other bugs before next submission. 

I also update glibc to follow all recommendations, and I want to
upload it together with kernel patches.

BR,
Yury.
Arnd Bergmann Nov. 30, 2015, 9:49 p.m. UTC | #7
On Monday 30 November 2015 23:21:41 Yury Norov wrote:
> On Mon, Nov 30, 2015 at 04:34:22PM +0100, Arnd Bergmann wrote:
> > On Tuesday 17 November 2015 22:57:52 Arnd Bergmann wrote:
> > > On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
> > > > From: Andrew Pinski <apinski@cavium.com>
> > > > 
> > > > Add a separate syscall-table for ILP32, which dispatches either to native
> > > > LP64 system call implementation or to compat-syscalls, as appropriate.
> > > 
> > > I like it much better than the previous version, thanks for the rework!
> > 
> > Hi Yuri,
> > 
> > you must have missed my reply below. Are you still working on ilp32
> > or did you drop this thread because you got distracted with something
> > else?
> > 
> 
> I didn't miss it, and I continue with ILP32. I really appreciate your
> attention and time you spend on ILP32.
> 
> There's a tricky bug with signal stack, that Andreas also discovered.
> It makes almost all tests that use posix threads crash. I want to fix
> it and other bugs before next submission. 
> 
> I also update glibc to follow all recommendations, and I want to
> upload it together with kernel patches.

Ok. As a reviewer, I find long waits between submissions a bit annoying
because that means I have already forgotten everything I commented on
the previous time.

Could we try to get consensus on how the syscall ABI should look
before you start adapting glibc to another intermediate version?
I think that would also save you duplicate work, as it's always
possible that we misunderstand each other in the review. Also,
when someone asks you questions during a review, please reply to
those questions so we can get a common understanding of the facts
and document that in the mail archives.

	Arnd
Andrew Pinski Dec. 1, 2015, 12:20 a.m. UTC | #8
On Mon, Nov 30, 2015 at 1:49 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Monday 30 November 2015 23:21:41 Yury Norov wrote:
>> On Mon, Nov 30, 2015 at 04:34:22PM +0100, Arnd Bergmann wrote:
>> > On Tuesday 17 November 2015 22:57:52 Arnd Bergmann wrote:
>> > > On Wednesday 18 November 2015 00:16:54 Yury Norov wrote:
>> > > > From: Andrew Pinski <apinski@cavium.com>
>> > > >
>> > > > Add a separate syscall-table for ILP32, which dispatches either to native
>> > > > LP64 system call implementation or to compat-syscalls, as appropriate.
>> > >
>> > > I like it much better than the previous version, thanks for the rework!
>> >
>> > Hi Yuri,
>> >
>> > you must have missed my reply below. Are you still working on ilp32
>> > or did you drop this thread because you got distracted with something
>> > else?
>> >
>>
>> I didn't miss it, and I continue with ILP32. I really appreciate your
>> attention and time you spend on ILP32.
>>
>> There's a tricky bug with signal stack, that Andreas also discovered.
>> It makes almost all tests that use posix threads crash. I want to fix
>> it and other bugs before next submission.
>>
>> I also update glibc to follow all recommendations, and I want to
>> upload it together with kernel patches.
>
> Ok. As a reviewer, I find long waits between submissions a bit annoying
> because that means I have already forgotten everything I commented on
> the previous time.
>
> Could we try to get consensus on how the syscall ABI should look
> before you start adapting glibc to another intermediate version?

Sounds good. I have asked Yury to do that just that and change the
patches according to the current reviews without testing them with a
newer version of glibc.   Note getting consensus would be nice soon as
possible so I can start working again on glibc patches and make sure
the changes that are made to support a slightly different ABI on the
userland side is ok with them.

Thanks,
Andrew

> I think that would also save you duplicate work, as it's always
> possible that we misunderstand each other in the review. Also,
> when someone asks you questions during a review, please reply to
> those questions so we can get a common understanding of the facts
> and document that in the mail archives.
>
>         Arnd
Yury Norov Dec. 1, 2015, 12:40 a.m. UTC | #9
On Mon, Nov 30, 2015 at 10:49:43PM +0100, Arnd Bergmann wrote:
> 
> Could we try to get consensus on how the syscall ABI should look
> before you start adapting glibc to another intermediate version?
> I think that would also save you duplicate work, as it's always
> possible that we misunderstand each other in the review. Also,
> when someone asks you questions during a review, please reply to
> those questions so we can get a common understanding of the facts
> and document that in the mail archives.
> 
> 	Arnd

This is full syscall table for ILP32, how it looks right now.
I collected all comments here and do rework according it.
Any comments?

BR,
Yury.

[0] = compat_sys_io_setup,			
[1] = sys_io_destroy,
[2] = compat_sys_io_submit,
[3] = sys_io_cancel,
[4] = sys_io_getevents,			wrong timespec_t and aio_context_t here
[5] = sys_setxattr,
[6] = sys_lsetxattr,
[7] = sys_fsetxattr,
[8] = sys_getxattr,
[9] = sys_lgetxattr,
[10] = sys_fgetxattr,
[11] = sys_listxattr,
[12] = sys_llistxattr,
[13] = sys_flistxattr,
[14] = sys_removexattr,
[15] = sys_lremovexattr,
[16] = sys_fremovexattr,
[17] = sys_getcwd,
[18] = sys_lookup_dcookie,
[19] = sys_eventfd2,
[20] = sys_epoll_create1,
[21] = sys_epoll_ctl,
[22] = sys_epoll_pwait,			way round: takes sigset_t check BE
[23] = sys_dup,
[24] = sys_dup3,
[25] = compat_sys_fcntl,		uses compat_off_t, not compat_loff_t
[26] = sys_inotify_init1,
[27] = sys_inotify_add_watch,
[28] = sys_inotify_rm_watch,
[29] = compat_sys_ioctl,
[30] = sys_ioprio_set,
[31] = sys_ioprio_get,
[32] = sys_flock,
[33] = sys_mknodat,
[34] = sys_mkdirat,
[35] = sys_unlinkat,
[36] = sys_symlinkat,
[37] = sys_linkat,
[38] = sys_renameat,
[39] = sys_umount,
[40] = compat_sys_mount,
[41] = sys_pivot_root,
[42] = sys_ni_syscall,
[43] = compat_sys_statfs64_wrapper,	check wrappers, maybe just statfs64
[44] = compat_sys_fstatfs64_wrapper,				   fstatfs64
[45] = sys_truncate,
[46] = sys_ftruncate,
[47] = sys_fallocate,
[48] = sys_faccessat,
[49] = sys_chdir,
[50] = sys_fchdir,
[51] = sys_chroot,
[52] = sys_fchmod,
[53] = sys_fchmodat,
[54] = sys_fchownat,
[55] = sys_fchown,
[56] = sys_openat,			force O_LARGEFILE in asm-generic/unistd.h
					make tile32 to use compat
[57] = sys_close,
[58] = sys_vhangup,
[59] = sys_pipe2,
[60] = sys_quotactl,
[61] = sys_getdents64,			check: glibc uses linux_dirent64 for
					32-bit architectures, so this looks wrong
[62] = sys_llseek,			use 3-argument lseek
[63] = sys_read,
[64] = sys_write,
[65] = compat_sys_readv,
[66] = compat_sys_writev,
[67] = sys_pread64,
[68] = sys_pwrite64,
[69] = compat_sys_preadv64,		use noncompat
[70] = compat_sys_pwritev64,		use noncompat
[71] = sys_sendfile64,
[72] = compat_sys_pselect6,
[73] = compat_sys_ppoll,
[74] = sys_signalfd4,			takes sigset_t, check BE
[75] = compat_sys_vmsplice,
[76] = sys_splice,
[77] = sys_tee,
[78] = sys_readlinkat,
[79] = ilp32_sys_fstatat64,		use compat. fix glibc
[80] = ilp32_sys_fstat64,		use compat. fix glibc
[81] = sys_sync,
[82] = sys_fsync,
[83] = sys_fdatasync,
[84] = sys_sync_file_range,
[85] = sys_timerfd_create,
[86] = compat_sys_timerfd_settime,
[87] = compat_sys_timerfd_gettime,
[88] = compat_sys_utimensat,
[89] = sys_acct,
[90] = sys_capget,
[91] = sys_capset,
[92] = sys_personality,
[93] = sys_exit,
[94] = sys_exit_group,
[95] = sys_waitid,			check: need a separate wrapper
					to convert rusage but not siginfo
[96] = sys_set_tid_address,
[97] = sys_unshare,
[98] = compat_sys_futex,
[99] = compat_sys_set_robust_list,
[100] = compat_sys_get_robust_list,
[101] = compat_sys_nanosleep,
[102] = compat_sys_getitimer,
[103] = compat_sys_setitimer,
[104] = compat_sys_kexec_load,
[105] = sys_init_module,
[106] = sys_delete_module,
[107] = sys_timer_create,		check 64-bit sigevent, maybe use compat
[108] = sys_timer_gettime,		timespec
[109] = sys_timer_getoverrun,
[110] = sys_timer_settime,		timespec
[111] = sys_timer_delete,
[112] = compat_sys_clock_settime,
[113] = compat_sys_clock_gettime,
[114] = sys_clock_getres,		timespec
[115] = sys_clock_nanosleep,		timespec (?)
[116] = sys_syslog,
[117] = sys_ptrace,
[118] = sys_sched_setparam,
[119] = sys_sched_setscheduler,
[120] = sys_sched_getscheduler,
[121] = sys_sched_getparam,
[122] = compat_sys_sched_setaffinity,
[123] = compat_sys_sched_getaffinity,
[124] = sys_sched_yield,
[125] = sys_sched_get_priority_max,
[126] = sys_sched_get_priority_min,
[127] = sys_sched_rr_get_interval,	timespec
[128] = sys_restart_syscall,
[129] = sys_kill,
[130] = sys_tkill,
[131] = sys_tgkill,
[132] = sys_sigaltstack,
[133] = sys_rt_sigsuspend,		takes sigset_t check BE
[134] = sys_rt_sigaction,
[135] = sys_rt_sigprocmask,		takes sigset_t check BE
[136] = sys_rt_sigpending,		takes sigset_t check BE
[137] = sys_rt_sigtimedwait,		wrapper? 64-bit siginfo but
					32-bit sigset and timespec
[138] = sys_rt_sigqueueinfo,		this is OK (?)
[139] = sys_rt_sigreturn_wrapper,
[140] = sys_setpriority,
[141] = sys_getpriority,
[142] = sys_reboot,
[143] = sys_setregid,
[144] = sys_setgid,
[145] = sys_setreuid,
[146] = sys_setuid,
[147] = sys_setresuid,
[148] = sys_getresuid,
[149] = sys_setresgid,
[150] = sys_getresgid,
[151] = sys_setfsuid,
[152] = sys_setfsgid,
[153] = compat_sys_times,
[154] = sys_setpgid,
[155] = sys_getpgid,
[156] = sys_getsid,
[157] = sys_setsid,
[158] = sys_getgroups,
[159] = sys_setgroups,
[160] = sys_newuname,
[161] = sys_sethostname,
[162] = sys_setdomainname,
[163] = compat_sys_getrlimit,
[164] = compat_sys_setrlimit,
[165] = compat_sys_getrusage,
[166] = sys_umask,
[167] = sys_prctl,
[168] = sys_getcpu,
[169] = compat_sys_gettimeofday,
[170] = compat_sys_settimeofday,
[171] = compat_sys_adjtimex,
[172] = sys_getpid,
[173] = sys_getppid,
[174] = sys_getuid,
[175] = sys_geteuid,
[176] = sys_getgid,
[177] = sys_getegid,
[178] = sys_gettid,
[179] = compat_sys_sysinfo,
[180] = sys_mq_open,			wrong struct mq_attr
[181] = sys_mq_unlink,
[182] = sys_mq_timedsend,		wrong struct mq_attr
[183] = sys_mq_timedreceive,		wrong struct mq_attr
[184] = sys_mq_notify,
[185] = sys_mq_getsetattr,		wrong struct mq_attr
[186] = sys_msgget,
[187] = ilp32_sys_msgctl,		as is, waiting for generic fix (?)
[188] = compat_sys_msgrcv,
[189] = compat_sys_msgsnd,
[190] = sys_semget,
[191] = ilp32_sys_semctl,		as is, waiting for generic fix (?)
[192] = sys_semtimedop,			timespec_t
[193] = sys_semop,
[194] = sys_shmget,
[195] = ilp32_sys_shmctl,		as is, waiting for generic fix (?)
[196] = sys_shmat,			check (shmat01 test fails if compat)
[197] = sys_shmdt,
[198] = sys_socket,
[199] = sys_socketpair,
[200] = sys_bind,
[201] = sys_listen,
[202] = sys_accept,
[203] = sys_connect,
[204] = sys_getsockname,
[205] = sys_getpeername,
[206] = sys_sendto,
[207] = compat_sys_recvfrom,
[208] = compat_sys_setsockopt,
[209] = compat_sys_getsockopt,
[210] = sys_shutdown,
[211] = compat_sys_sendmsg,
[212] = compat_sys_recvmsg,
[213] = sys_readahead,
[214] = sys_brk,
[215] = sys_munmap,
[216] = sys_mremap,
[217] = sys_add_key,
[218] = sys_request_key,
[219] = compat_sys_keyctl,
[220] = sys_clone,
[221] = compat_sys_execve,
[222] = compat_sys_mmap2_wrapper, 	check. maybe just mmap2
[223] = sys_fadvise64_64,
[224] = sys_swapon,
[225] = sys_swapoff,
[226] = sys_mprotect,
[227] = sys_msync,
[228] = sys_mlock,
[229] = sys_munlock,
[230] = sys_mlockall,
[231] = sys_munlockall,
[232] = sys_mincore,
[233] = sys_madvise,
[234] = sys_remap_file_pages,
[235] = compat_sys_mbind,
[236] = compat_sys_get_mempolicy,
[237] = compat_sys_set_mempolicy,
[238] = compat_sys_migrate_pages,
[239] = compat_sys_move_pages,
[240] = sys_rt_tgsigqueueinfo,
[241] = sys_perf_event_open,
[242] = sys_accept4,
[243] = compat_sys_recvmmsg,
[260] = compat_sys_wait4,
[261] = sys_prlimit64,
[262] = sys_fanotify_init,
[263] = sys_fanotify_mark,
[264] = sys_name_to_handle_at,
[265] = sys_open_by_handle_at,		force O_LARGEFILE in asm-generic/unistd.h
					make tile32 to use compat
[266] = sys_clock_adjtime,		wrong timex structure
[267] = sys_syncfs,
[268] = sys_setns,
[269] = compat_sys_sendmmsg,
[270] = compat_sys_process_vm_readv,
[271] = compat_sys_process_vm_writev,
[272] = sys_kcmp,
[273] = sys_finit_module,
[274] = sys_sched_setattr,
[275] = sys_sched_getattr,
[276] = sys_renameat2,
[277] = sys_seccomp,
[278] = sys_getrandom,
[279] = sys_memfd_create,
[280] = sys_bpf,
[281] = sys_execveat,			check: wrong struct user_arg_ptr
[282] = sys_userfaultfd,
[283] = sys_membarrier,
Andreas Schwab Dec. 1, 2015, 9:20 a.m. UTC | #10
Yury Norov <ynorov@caviumnetworks.com> writes:

> There's a tricky bug with signal stack, that Andreas also discovered.

That was only a confusion about the compat state of sys_rt_sigaction.
It just requires making sure glibc uses the correct (64bit layout)
struct kernel_sigaction.

Andreas.
Arnd Bergmann Dec. 1, 2015, 10:22 a.m. UTC | #11
On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
> Yury Norov <ynorov@caviumnetworks.com> writes:
> 
> > There's a tricky bug with signal stack, that Andreas also discovered.
> 
> That was only a confusion about the compat state of sys_rt_sigaction.
> It just requires making sure glibc uses the correct (64bit layout)
> struct kernel_sigaction.

I don't think we need to use the 64-bit version of sigaction, both
kernel and libc are simpler if we use the normal 32-bit version.

We should always default to using the generic 32-bit structures
unless there is a strong reason not to.

	Arnd
Arnd Bergmann Dec. 1, 2015, 10:26 a.m. UTC | #12
On Tuesday 01 December 2015 03:40:09 Yury Norov wrote:
> On Mon, Nov 30, 2015 at 10:49:43PM +0100, Arnd Bergmann wrote:
> > 
> > Could we try to get consensus on how the syscall ABI should look
> > before you start adapting glibc to another intermediate version?
> > I think that would also save you duplicate work, as it's always
> > possible that we misunderstand each other in the review. Also,
> > when someone asks you questions during a review, please reply to
> > those questions so we can get a common understanding of the facts
> > and document that in the mail archives.
> > 
> >       Arnd
> 
> This is full syscall table for ILP32, how it looks right now.
> I collected all comments here and do rework according it.
> Any comments?

It might be easier to start the other way round: rather than
annotating the things that might be wrong, remove all the overrides
and add back only the 64-bit syscalls that we want instead of the
32-bit variants when you find that they are absolutely needed.

> [0] = compat_sys_io_setup,                      
> [1] = sys_io_destroy,
> [2] = compat_sys_io_submit,
> [3] = sys_io_cancel,
> [4] = sys_io_getevents,                 wrong timespec_t and aio_context_t here
> [5] = sys_setxattr,
> [6] = sys_lsetxattr,
> [7] = sys_fsetxattr,
> [8] = sys_getxattr,
> [9] = sys_lgetxattr,
> [10] = sys_fgetxattr,
> [11] = sys_listxattr,
> [12] = sys_llistxattr,
> [13] = sys_flistxattr,
> [14] = sys_removexattr,
> [15] = sys_lremovexattr,
> [16] = sys_fremovexattr,
> [17] = sys_getcwd,
> [18] = sys_lookup_dcookie,
> [19] = sys_eventfd2,
> [20] = sys_epoll_create1,
> [21] = sys_epoll_ctl,
> [22] = sys_epoll_pwait,                 way round: takes sigset_t check BE
> [23] = sys_dup,
> [24] = sys_dup3,
> [25] = compat_sys_fcntl,                uses compat_off_t, not compat_loff_t
> [26] = sys_inotify_init1,
> [27] = sys_inotify_add_watch,

I was looking for your latest arch/arm64/kernel/sys_ilp32.c version. I'm
not that interested in the ones that are just using the normal compat API,
just the ones that you still think we have to override, and why that
override is required.

	Arnd
Andreas Schwab Dec. 1, 2015, 11:01 a.m. UTC | #13
Arnd Bergmann <arnd@arndb.de> writes:

> On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
>> Yury Norov <ynorov@caviumnetworks.com> writes:
>> 
>> > There's a tricky bug with signal stack, that Andreas also discovered.
>> 
>> That was only a confusion about the compat state of sys_rt_sigaction.
>> It just requires making sure glibc uses the correct (64bit layout)
>> struct kernel_sigaction.
>
> I don't think we need to use the 64-bit version of sigaction, both
> kernel and libc are simpler if we use the normal 32-bit version.

Since glibc has to do the conversion anyway (due to sigset_t), using the
64bit layout avoids a second conversion in the kernel.

> We should always default to using the generic 32-bit structures
> unless there is a strong reason not to.

The goal should be to avoid conversion layers where it makes sense.

Andreas.
Arnd Bergmann Dec. 1, 2015, 11:30 a.m. UTC | #14
On Tuesday 01 December 2015 12:01:12 Andreas Schwab wrote:
> Arnd Bergmann <arnd@arndb.de> writes:
> 
> > On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
> >> Yury Norov <ynorov@caviumnetworks.com> writes:
> >> 
> >> > There's a tricky bug with signal stack, that Andreas also discovered.
> >> 
> >> That was only a confusion about the compat state of sys_rt_sigaction.
> >> It just requires making sure glibc uses the correct (64bit layout)
> >> struct kernel_sigaction.
> >
> > I don't think we need to use the 64-bit version of sigaction, both
> > kernel and libc are simpler if we use the normal 32-bit version.
> 
> Since glibc has to do the conversion anyway (due to sigset_t), using the
> 64bit layout avoids a second conversion in the kernel.

I don't get the part about sigset_t. Why would glibc want to use the
64-bit layout? This one looks like one of the cases where we absolutely
want to use the 32-bit layout or otherwise get into big trouble if
we ever want to support native ILP32 kernels.

	Arnd
Yury Norov Dec. 1, 2015, 9:29 p.m. UTC | #15
> > +#define compat_sys_shmat               sys_shmat
> 
> What's special about compat_sys_shmat?
> 

It's about SHMLBA definition.
For aarch32 glibc defines it as (__getpagesize () << 2).
For ILP32 there's no definition, and so generic one is used: (__getpagesize ()).

In kernel, for ARM64, COMPAT_SHMLBA defined just as 0x4000. Both
compat and non-compat shmat syscalls pass identical arguments to
do_shmat, except shmlba. Effectively, library expects shmlba to
be 0x1000, as sys_shmat does. And compat_sys_shmat expects 0x4000.

I think, both kernel and library parts are to be fixed. In library
we'd use definition identical to ARM. For kernel we'd use compat
syscall.

My question. Why aarch64 defines COMPAT_SHMLBA as 0x4000? If there's
no specific reason for it, it looks like a bug, and we should
define it like in arch/arm:
        #define SHMLBA  (4 * PAGE_SIZE)          /* attach addr a multiple of this */

Maybe that's why AARCH32 is limited to 4K pages in config.
Arnd Bergmann Dec. 1, 2015, 10:39 p.m. UTC | #16
On Wednesday 02 December 2015 00:29:04 Yury Norov wrote:
> > > +#define compat_sys_shmat               sys_shmat
> > 
> > What's special about compat_sys_shmat?
> > 
> 
> It's about  	 definition.
> For aarch32 glibc defines it as (__getpagesize () << 2).
> For ILP32 there's no definition, and so generic one is used: (__getpagesize ()).

Ok, got it.

> In kernel, for ARM64, COMPAT_SHMLBA defined just as 0x4000. Both
> compat and non-compat shmat syscalls pass identical arguments to
> do_shmat, except shmlba. Effectively, library expects shmlba to
> be 0x1000, as sys_shmat does. And compat_sys_shmat expects 0x4000.
> 
> I think, both kernel and library parts are to be fixed. In library
> we'd use definition identical to ARM. For kernel we'd use compat
> syscall.

I'm not sure I understand this part. What changes specifically do we need?

It sounds like shmat is one of the cases we an override makes sense
and we should use sys_shmat with PAGE_SIZE for aarch64 ilp32 mode.

> My question. Why aarch64 defines COMPAT_SHMLBA as 0x4000? If there's
> no specific reason for it, it looks like a bug, and we should
> define it like in arch/arm:
>         #define SHMLBA  (4 * PAGE_SIZE)          /* attach addr a multiple of this */
> 
> Maybe that's why AARCH32 is limited to 4K pages in config.

The reason why AARCH32 is limited to 4K pages is the alignment of
ELF sections, and with newer binutils versions, that is no longer
a problem, other than the bug you just found.

We normally assume that the page size on ARM is fixed to 4K, so
there might be user space that just hardcodes 16K SHMLBA rather
than using the glibc (__getpagesize () << 2) definition. Changing
the kernel would break those programs, but you can also argue that
they are already broken today.

	Arnd
Yury Norov Dec. 1, 2015, 11:35 p.m. UTC | #17
On Tue, Dec 01, 2015 at 11:39:42PM +0100, Arnd Bergmann wrote:
> On Wednesday 02 December 2015 00:29:04 Yury Norov wrote:
> > > > +#define compat_sys_shmat               sys_shmat
> > > 
> > > What's special about compat_sys_shmat?
> > > 
> > 
> > It's about  	 definition.
> > For aarch32 glibc defines it as (__getpagesize () << 2).
> > For ILP32 there's no definition, and so generic one is used: (__getpagesize ()).
> 
> Ok, got it.
> 
> > In kernel, for ARM64, COMPAT_SHMLBA defined just as 0x4000. Both
> > compat and non-compat shmat syscalls pass identical arguments to
> > do_shmat, except shmlba. Effectively, library expects shmlba to
> > be 0x1000, as sys_shmat does. And compat_sys_shmat expects 0x4000.
> > 
> > I think, both kernel and library parts are to be fixed. In library
> > we'd use definition identical to ARM. For kernel we'd use compat
> > syscall.
> 
> I'm not sure I understand this part. What changes specifically do we need?
> 

For kernel:

        diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
        index 4df608a..e368a55 100644
        --- a/arch/arm64/include/asm/shmparam.h
        +++ b/arch/arm64/include/asm/shmparam.h
        @@ -21,7 +21,7 @@
          * alignment value. Since we don't have aliasing D-caches, the rest of
          * the time we can safely use PAGE_SIZE.
          */
        -#define COMPAT_SHMLBA	0x4000
        +#define COMPAT_SHMLBA	(4 * PAGE_SIZE)
         
         #include <asm-generic/shmparam.h>
         
        diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
        index c5bc712..877bedf 100644
        --- a/arch/arm64/kernel/sys_ilp32.c
        +++ b/arch/arm64/kernel/sys_ilp32.c
        @@ -42,15 +42,12 @@ asmlinkage long sys_rt_sigreturn_wrapper(void);
         #define compat_sys_pwrite64            sys_pwrite64
         #define compat_sys_readahead           sys_readahead
         #define compat_sys_rt_sigaction        sys_rt_sigaction
        -#define compat_sys_shmat               sys_shmat
         #define compat_sys_sync_file_range     sys_sync_file_range
         #define compat_sys_truncate64          sys_truncate
         #define compat_sys_sigaltstack         sys_sigaltstack
 
For library - just create a header in ilp32 directory that defines
SHMLBA exactly as arm: __getpagesize () << 2

> It sounds like shmat is one of the cases we an override makes sense
> and we should use sys_shmat with PAGE_SIZE for aarch64 ilp32 mode.
> 

[...]

> We normally assume that the page size on ARM is fixed to 4K, so
> there might be user space that just hardcodes 16K SHMLBA 

It means that we should use compat_sys_shmat because hardcoded
userspace may have a chance to work for 4K pages. If we'll use
sys_shmat (and so 4K SHMLBA), we'll definitely make hardcoded
userspace broken. Non-hardcoded userspace will work anyway.

We can describe in documentation that 4k pages are prefferable.

Yury
Yury Norov Dec. 2, 2015, 12:24 a.m. UTC | #18
On Tue, Dec 01, 2015 at 12:30:56PM +0100, Arnd Bergmann wrote:
> On Tuesday 01 December 2015 12:01:12 Andreas Schwab wrote:
> > Arnd Bergmann <arnd@arndb.de> writes:
> > 
> > > On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
> > >> Yury Norov <ynorov@caviumnetworks.com> writes:
> > >> 
> > >> > There's a tricky bug with signal stack, that Andreas also discovered.
> > >> 
> > >> That was only a confusion about the compat state of sys_rt_sigaction.
> > >> It just requires making sure glibc uses the correct (64bit layout)
> > >> struct kernel_sigaction.
> > >
> > > I don't think we need to use the 64-bit version of sigaction, both
> > > kernel and libc are simpler if we use the normal 32-bit version.
> > 
> > Since glibc has to do the conversion anyway (due to sigset_t), using the
> > 64bit layout avoids a second conversion in the kernel.
> 
> I don't get the part about sigset_t. Why would glibc want to use the
> 64-bit layout? This one looks like one of the cases where we absolutely
> want to use the 32-bit layout or otherwise get into big trouble if
> we ever want to support native ILP32 kernels.
> 
> 	Arnd

So, we drop patch #6, and use 32-bit layout for all signal structures.

Correct?
Arnd Bergmann Dec. 2, 2015, 8:37 a.m. UTC | #19
On Wednesday 02 December 2015 02:35:03 Yury Norov wrote:
> On Tue, Dec 01, 2015 at 11:39:42PM +0100, Arnd Bergmann wrote:
> > On Wednesday 02 December 2015 00:29:04 Yury Norov wrote:
> > I'm not sure I understand this part. What changes specifically do we need?
> > 
> 
> For kernel:
> 
>         diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
>         index 4df608a..e368a55 100644
>         --- a/arch/arm64/include/asm/shmparam.h
>         +++ b/arch/arm64/include/asm/shmparam.h
>         @@ -21,7 +21,7 @@
>           * alignment value. Since we don't have aliasing D-caches, the rest of
>           * the time we can safely use PAGE_SIZE.
>           */
>         -#define COMPAT_SHMLBA	0x4000
>         +#define COMPAT_SHMLBA	(4 * PAGE_SIZE)
>          
>          #include <asm-generic/shmparam.h>
>          
>         diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
>         index c5bc712..877bedf 100644
>         --- a/arch/arm64/kernel/sys_ilp32.c
>         +++ b/arch/arm64/kernel/sys_ilp32.c
>         @@ -42,15 +42,12 @@ asmlinkage long sys_rt_sigreturn_wrapper(void);
>          #define compat_sys_pwrite64            sys_pwrite64
>          #define compat_sys_readahead           sys_readahead
>          #define compat_sys_rt_sigaction        sys_rt_sigaction
>         -#define compat_sys_shmat               sys_shmat
>          #define compat_sys_sync_file_range     sys_sync_file_range
>          #define compat_sys_truncate64          sys_truncate
>          #define compat_sys_sigaltstack         sys_sigaltstack
>  
> For library - just create a header in ilp32 directory that defines
> SHMLBA exactly as arm: __getpagesize () << 2

I think we both reversed our positions here ;-)

The 4*PAGE_SIZE on ARM is an architecture specific oddity, I believe
to work around aliasing caches on ARMv6. As no other architecture does
this, we're probably better off not duplicating it for aarch64-ilp32
and just use sys_shmat as your v6 patch does.

> > It sounds like shmat is one of the cases we an override makes sense
> > and we should use sys_shmat with PAGE_SIZE for aarch64 ilp32 mode.
> > 
> 
> [...]
> 
> > We normally assume that the page size on ARM is fixed to 4K, so
> > there might be user space that just hardcodes 16K SHMLBA 
> 
> It means that we should use compat_sys_shmat because hardcoded
> userspace may have a chance to work for 4K pages. If we'll use
> sys_shmat (and so 4K SHMLBA), we'll definitely make hardcoded
> userspace broken. Non-hardcoded userspace will work anyway.
> 
> We can describe in documentation that 4k pages are prefferable.

The hardcoded user space would only apply to old source code that
specifically tries to adapt to what ARM does, but gets it wrong,
If we have source code that is converted from x86 or mips and
hardcodes anything, it's more likely to be the normal __getpagesize().

	Arnd
Yury Norov Dec. 2, 2015, 9:15 a.m. UTC | #20
On Wed, Dec 02, 2015 at 09:37:05AM +0100, Arnd Bergmann wrote:
> On Wednesday 02 December 2015 02:35:03 Yury Norov wrote:
> > On Tue, Dec 01, 2015 at 11:39:42PM +0100, Arnd Bergmann wrote:
> > > On Wednesday 02 December 2015 00:29:04 Yury Norov wrote:
> > > I'm not sure I understand this part. What changes specifically do we need?
> > > 
> > 
> > For kernel:
> > 
> >         diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
> >         index 4df608a..e368a55 100644
> >         --- a/arch/arm64/include/asm/shmparam.h
> >         +++ b/arch/arm64/include/asm/shmparam.h
> >         @@ -21,7 +21,7 @@
> >           * alignment value. Since we don't have aliasing D-caches, the rest of
> >           * the time we can safely use PAGE_SIZE.
> >           */
> >         -#define COMPAT_SHMLBA	0x4000
> >         +#define COMPAT_SHMLBA	(4 * PAGE_SIZE)
> >          
> >          #include <asm-generic/shmparam.h>
> >          
> >         diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
> >         index c5bc712..877bedf 100644
> >         --- a/arch/arm64/kernel/sys_ilp32.c
> >         +++ b/arch/arm64/kernel/sys_ilp32.c
> >         @@ -42,15 +42,12 @@ asmlinkage long sys_rt_sigreturn_wrapper(void);
> >          #define compat_sys_pwrite64            sys_pwrite64
> >          #define compat_sys_readahead           sys_readahead
> >          #define compat_sys_rt_sigaction        sys_rt_sigaction
> >         -#define compat_sys_shmat               sys_shmat
> >          #define compat_sys_sync_file_range     sys_sync_file_range
> >          #define compat_sys_truncate64          sys_truncate
> >          #define compat_sys_sigaltstack         sys_sigaltstack
> >  
> > For library - just create a header in ilp32 directory that defines
> > SHMLBA exactly as arm: __getpagesize () << 2
> 
> I think we both reversed our positions here ;-)
> 
> The 4*PAGE_SIZE on ARM is an architecture specific oddity, I believe
> to work around aliasing caches on ARMv6. As no other architecture does
> this, we're probably better off not duplicating it for aarch64-ilp32
> and just use sys_shmat as your v6 patch does.
> 
> > > It sounds like shmat is one of the cases we an override makes sense
> > > and we should use sys_shmat with PAGE_SIZE for aarch64 ilp32 mode.
> > > 
> > 
> > [...]
> > 
> > > We normally assume that the page size on ARM is fixed to 4K, so
> > > there might be user space that just hardcodes 16K SHMLBA 
> > 
> > It means that we should use compat_sys_shmat because hardcoded
> > userspace may have a chance to work for 4K pages. If we'll use
> > sys_shmat (and so 4K SHMLBA), we'll definitely make hardcoded
> > userspace broken. Non-hardcoded userspace will work anyway.
> > 
> > We can describe in documentation that 4k pages are prefferable.
> 
> The hardcoded user space would only apply to old source code that
> specifically tries to adapt to what ARM does, but gets it wrong,
> If we have source code that is converted from x86 or mips and
> hardcodes anything, it's more likely to be the normal __getpagesize().
> 
> 	Arnd

Hmm... OK. Let's have non-compat shmat here...
Yury Norov Dec. 2, 2015, 10:01 a.m. UTC | #21
On Tue, Nov 17, 2015 at 10:57:52PM +0100, Arnd Bergmann wrote:

It looks, all them are needed.

> > +asmlinkage long compat_sys_mmap2_wrapper(void);
> > +#define sys_mmap2                      compat_sys_mmap2_wrapper

This wrapper checks alignement of pgoff, if page sise is greater than
4K

> > +asmlinkage long compat_sys_fstatfs64_wrapper(void);
> > +#define compat_sys_fstatfs64    compat_sys_fstatfs64_wrapper
> > +asmlinkage long compat_sys_statfs64_wrapper(void);
> > +#define compat_sys_statfs64             compat_sys_statfs64_wrapper

This two hacks fix an alignment issue. I didn't check all details but
it looks like sizeof(compat_statfs64) is different in kernel and library.
And this size is passed as 2nd argument to compat syscalls. We can
handle it in userspace but I don't see any advantage. 

All this handlers are shared between ilp32 and aarch32.
This is best we came up, as it doesn't add new hacks, but reuses old
ones...
Arnd Bergmann Dec. 2, 2015, 10:03 a.m. UTC | #22
On Wednesday 02 December 2015 03:24:30 Yury Norov wrote:
> On Tue, Dec 01, 2015 at 12:30:56PM +0100, Arnd Bergmann wrote:
> > On Tuesday 01 December 2015 12:01:12 Andreas Schwab wrote:
> > > Arnd Bergmann <arnd@arndb.de> writes:
> > > 
> > > > On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
> > > >> Yury Norov <ynorov@caviumnetworks.com> writes:
> > > >> 
> > > >> > There's a tricky bug with signal stack, that Andreas also discovered.
> > > >> 
> > > >> That was only a confusion about the compat state of sys_rt_sigaction.
> > > >> It just requires making sure glibc uses the correct (64bit layout)
> > > >> struct kernel_sigaction.
> > > >
> > > > I don't think we need to use the 64-bit version of sigaction, both
> > > > kernel and libc are simpler if we use the normal 32-bit version.
> > > 
> > > Since glibc has to do the conversion anyway (due to sigset_t), using the
> > > 64bit layout avoids a second conversion in the kernel.
> > 
> > I don't get the part about sigset_t. Why would glibc want to use the
> > 64-bit layout? This one looks like one of the cases where we absolutely
> > want to use the 32-bit layout or otherwise get into big trouble if
> > we ever want to support native ILP32 kernels.
> > 
> >       Arnd
> 
> So, we drop patch #6, and use 32-bit layout for all signal structures.
> 
> Correct?

If the 32-bit layout is sane, yes. Andrew can surely judge this better
than I can. The goal should be to have the most straightforward
implementation for both kernel and libc, and reuse either the compat
signal handling or the native one as much as we can.

Looking at the API, I find that ARM fortunately uses all the standard
definitions for sigset_t, struct siginfo/siginfo_t, union sigval/sigval_t,
struct sigaction, struct sigaltstack/stack_t. It took me a while to
figure out that the definitions in the uapi headers are different but
actually unused (probably remaining from libc5 days or earlier).
The signal numbers are all the same, and so are the 

The SA_* flags are all the same too, except for SA_THIRTYTWO, but
that is not implemented by the compat handling because we do not
support 26 bit tasks anyway.

struct sigcontext is obviously different because of the changed
register set, and we will have to deal with that in some form in the
compat code. struct ucontext and struct rt_sigframe includes that,
which I assume is the main part we need to handle correctly
by having ilp32 specific setup_rt_frame/sys_rt_sigreturn functions.

One small difference is the handling of MINSIGSTKSZ, which would
currently work for ilp32, but I noticed that commit c9692657c032
("arm64: Fix MINSIGSTKSZ and SIGSTKSZ") from Manjeet Pawar broke
compat mode: Any 32-bit (aarch32) task that passes an altstack smaller
than 5120 bytes now gets rejected by the kernel, whereas the native
32-bit kernel checks for 2048 bytes instead. We need to fix the
existing compat case here without breaking the ilp32 case.

With all that said, I would assume that just having separate
sigcontext/ucontext/rt_sigframe and reusing the compat code otherwise,
we are overall better off than with using the native 64-bit signal
handling with everything that ties in (waitid, epoll_pwait,
signalfd, ppoll, ...), but we can discuss this further if someone
sees major downsides of that compared to your existing approach.
In particular, I don't know what the difference means for gdb,
glibc or strace.


	Arnd
Arnd Bergmann Dec. 2, 2015, 11:03 a.m. UTC | #23
On Wednesday 02 December 2015 13:01:53 Yury Norov wrote:
> On Tue, Nov 17, 2015 at 10:57:52PM +0100, Arnd Bergmann wrote:
> 
> It looks, all them are needed.
> 
> > > +asmlinkage long compat_sys_mmap2_wrapper(void);
> > > +#define sys_mmap2                      compat_sys_mmap2_wrapper
> 
> This wrapper checks alignement of pgoff, if page sise is greater than
> 4K

Ok.

> > > +asmlinkage long compat_sys_fstatfs64_wrapper(void);
> > > +#define compat_sys_fstatfs64    compat_sys_fstatfs64_wrapper
> > > +asmlinkage long compat_sys_statfs64_wrapper(void);
> > > +#define compat_sys_statfs64             compat_sys_statfs64_wrapper
> 
> This two hacks fix an alignment issue. I didn't check all details but
> it looks like sizeof(compat_statfs64) is different in kernel and library.
> And this size is passed as 2nd argument to compat syscalls. We can
> handle it in userspace but I don't see any advantage. 

Ah, so it's a hack for OABI compatibility. I wonder if we can just
drop the wrapper and the ARCH_PACK_COMPAT_STATFS64 definition
on arm64 as we don't handle OABI user space anyway.

Maybe it's too risky, when there is someone that added the packing
in user space on EABI after all.
 
> All this handlers are shared between ilp32 and aarch32.
> This is best we came up, as it doesn't add new hacks, but reuses old
> ones...

Ok.

	Arnd
Catalin Marinas Dec. 3, 2015, 5:17 p.m. UTC | #24
On Wed, Dec 02, 2015 at 03:24:30AM +0300, Yury Norov wrote:
> On Tue, Dec 01, 2015 at 12:30:56PM +0100, Arnd Bergmann wrote:
> > On Tuesday 01 December 2015 12:01:12 Andreas Schwab wrote:
> > > Arnd Bergmann <arnd@arndb.de> writes:
> > > 
> > > > On Tuesday 01 December 2015 10:20:59 Andreas Schwab wrote:
> > > >> Yury Norov <ynorov@caviumnetworks.com> writes:
> > > >> 
> > > >> > There's a tricky bug with signal stack, that Andreas also discovered.
> > > >> 
> > > >> That was only a confusion about the compat state of sys_rt_sigaction.
> > > >> It just requires making sure glibc uses the correct (64bit layout)
> > > >> struct kernel_sigaction.
> > > >
> > > > I don't think we need to use the 64-bit version of sigaction, both
> > > > kernel and libc are simpler if we use the normal 32-bit version.
> > > 
> > > Since glibc has to do the conversion anyway (due to sigset_t), using the
> > > 64bit layout avoids a second conversion in the kernel.
> > 
> > I don't get the part about sigset_t. Why would glibc want to use the
> > 64-bit layout? This one looks like one of the cases where we absolutely
> > want to use the 32-bit layout or otherwise get into big trouble if
> > we ever want to support native ILP32 kernels.
> > 
> > 	Arnd
> 
> So, we drop patch #6, and use 32-bit layout for all signal structures.
> 
> Correct?

I would vote for this as well. I think it probably removes patch 12 as
well (COMPAT_USE_NATIVE_SIGINFO).
Catalin Marinas Dec. 3, 2015, 5:47 p.m. UTC | #25
On Wed, Dec 02, 2015 at 12:29:04AM +0300, Yury Norov wrote:
> My question. Why aarch64 defines COMPAT_SHMLBA as 0x4000?

This was done to match the arch/arm value of 4 * 4K. The historical
32-bit reason for 4 pages is to cope with aliasing VIPT caches (see
https://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit?id=4197692eef113eeb8e3e413cc70993a5e667e5b8)

> If there's
> no specific reason for it, it looks like a bug, and we should
> define it like in arch/arm:
>         #define SHMLBA  (4 * PAGE_SIZE)          /* attach addr a multiple of this */

I guess you meant COMPAT_SHMLBA. I'm not sure there is much value in
keeping 4*PAGE_SIZE for larger page sizes but I agree that the current
16K value doesn't work well with 64K pages.
Yury Norov Dec. 3, 2015, 6:14 p.m. UTC | #26
On Thu, Dec 03, 2015 at 05:47:08PM +0000, Catalin Marinas wrote:
> On Wed, Dec 02, 2015 at 12:29:04AM +0300, Yury Norov wrote:
> > My question. Why aarch64 defines COMPAT_SHMLBA as 0x4000?
> 
> This was done to match the arch/arm value of 4 * 4K. The historical
> 32-bit reason for 4 pages is to cope with aliasing VIPT caches (see
> https://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit?id=4197692eef113eeb8e3e413cc70993a5e667e5b8)
> 
> > If there's
> > no specific reason for it, it looks like a bug, and we should
> > define it like in arch/arm:
> >         #define SHMLBA  (4 * PAGE_SIZE)          /* attach addr a multiple of this */
> 
> I guess you meant COMPAT_SHMLBA.

I citated arm code here. In aarch64 it's COMPAT_SHMLBA, of course.

> I'm not sure there is much value in
> keeping 4*PAGE_SIZE for larger page sizes but I agree that the current
> 16K value doesn't work well with 64K pages.

Arnd told there will be a workaround for arm v6 caches. Than this
header will not be needed at all. Until that, this is simpliest
fix as it doesn't affect userspace.

> 
> -- 
> Catalin
Arnd Bergmann Dec. 3, 2015, 8:42 p.m. UTC | #27
On Thursday 03 December 2015 21:14:41 Yury Norov wrote:
> 
> > I'm not sure there is much value in
> > keeping 4*PAGE_SIZE for larger page sizes but I agree that the current
> > 16K value doesn't work well with 64K pages.
> 
> Arnd told there will be a workaround for arm v6 caches. Than this
> header will not be needed at all. Until that, this is simpliest
> fix as it doesn't affect userspace.

I think we should do whatever matches user space: There is no harm
in going to 256KB instead of 64KB if current glibc already uses
4*getpagetsize() for a kernel with native 64K pages.

	Arnd
diff mbox

Patch

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 4c2cbbc..696e638 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -13,13 +13,16 @@ 
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+#ifdef CONFIG_ARM64_ILP32
+#define __ARCH_WANT_COMPAT_SYS_PREADV64
+#define __ARCH_WANT_COMPAT_SYS_PWRITEV64
+#endif
 #ifdef CONFIG_AARCH32_EL0
 #define __ARCH_WANT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_COMPAT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
@@ -39,6 +42,8 @@ 
 #define __NR_compat_sigreturn		119
 #define __NR_compat_rt_sigreturn	173
 
+#define __ARCH_WANT_SYS_LLSEEK
+
 /*
  * The following SVCs are ARM private.
  */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 35a59af..837d730 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -24,6 +24,7 @@  arm64-obj-$(CONFIG_AARCH32_EL0)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
 					   ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
+arm64-obj-$(CONFIG_ARM64_ILP32)		+= sys_ilp32.o
 arm64-obj-$(CONFIG_COMPAT)		+= entry32-common.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o perf_callchain.o
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 52be5c8..bcd921a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -664,9 +664,13 @@  ENDPROC(ret_from_fork)
  */
 	.align	6
 el0_svc:
-	adrp	stbl, sys_call_table		// load syscall table pointer
 	uxtw	scno, w8			// syscall number in w8
 	mov	sc_nr, #__NR_syscalls
+#ifdef CONFIG_ARM64_ILP32
+	ldr	x16, [tsk, #TI_FLAGS]
+	tbnz	x16, #TIF_32BIT_AARCH64, el0_ilp32_svc // We are using ILP32
+#endif
+	adrp	stbl, sys_call_table		// load syscall table pointer
 el0_svc_naked:					// compat entry point
 	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
@@ -686,6 +690,12 @@  ni_sys:
 	b	ret_fast_syscall
 ENDPROC(el0_svc)
 
+#ifdef CONFIG_ARM64_ILP32
+el0_ilp32_svc:
+	adrp	stbl, sys_call_ilp32_table // load syscall table pointer
+	b el0_svc_naked
+#endif
+
 	/*
 	 * This is the really slow path.  We're going to be doing context
 	 * switches, and waiting for our parent to respond.
diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
new file mode 100644
index 0000000..c366d92
--- /dev/null
+++ b/arch/arm64/kernel/sys_ilp32.c
@@ -0,0 +1,118 @@ 
+/*
+ * AArch64- ILP32 specific system calls implementation
+ *
+ * Copyright (C) 2015 Cavium Inc.
+ * Author: Andrew Pinski <apinski@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/msg.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+
+/*
+ * Wrappers to pass the pt_regs argument.
+ */
+asmlinkage long sys_rt_sigreturn_wrapper(void);
+#define compat_sys_rt_sigreturn        sys_rt_sigreturn_wrapper
+
+/* Using non-compat syscalls where necessary */
+#define compat_sys_fadvise64_64        sys_fadvise64_64
+#define compat_sys_fallocate           sys_fallocate
+#define compat_sys_ftruncate64         sys_ftruncate
+#define compat_sys_pread64             sys_pread64
+#define compat_sys_pwrite64            sys_pwrite64
+#define compat_sys_readahead           sys_readahead
+#define compat_sys_rt_sigaction        sys_rt_sigaction
+#define compat_sys_shmat               sys_shmat
+#define compat_sys_sync_file_range     sys_sync_file_range
+#define compat_sys_truncate64          sys_truncate
+#define compat_sys_sigaltstack         sys_sigaltstack
+
+#define compat_sys_io_getevents        sys_io_getevents
+#define compat_sys_lookup_dcookie      sys_lookup_dcookie
+#define compat_sys_epoll_pwait         sys_epoll_pwait
+#define compat_sys_fcntl64             compat_sys_fcntl
+#define compat_sys_preadv              compat_sys_preadv64
+#define compat_sys_signalfd4           sys_signalfd4
+
+#define compat_sys_rt_sigsuspend       sys_rt_sigsuspend
+#define compat_sys_rt_sigprocmask      sys_rt_sigprocmask
+#define compat_sys_rt_sigpending       sys_rt_sigpending
+#define compat_sys_rt_sigqueueinfo     sys_rt_sigqueueinfo
+#define compat_sys_semtimedop          sys_semtimedop
+#define compat_sys_rt_tgsigqueueinfo   sys_rt_tgsigqueueinfo
+
+#define compat_sys_timer_create        sys_timer_create
+#define compat_sys_timer_gettime       sys_timer_gettime
+#define compat_sys_timer_settime       sys_timer_settime
+#define compat_sys_rt_sigtimedwait     sys_rt_sigtimedwait
+
+#define compat_sys_mq_open             sys_mq_open
+#define compat_sys_mq_timedsend        sys_mq_timedsend
+#define compat_sys_mq_timedreceive     sys_mq_timedreceive
+#define compat_sys_mq_getsetattr       sys_mq_getsetattr
+#define compat_sys_mq_open             sys_mq_open
+
+#define compat_sys_open_by_handle_at   sys_open_by_handle_at
+#define compat_sys_clock_adjtime       sys_clock_adjtime
+
+#define compat_sys_openat              sys_openat
+#define compat_sys_getdents64          sys_getdents64
+#define compat_sys_waitid              sys_waitid
+#define compat_sys_timer_settime       sys_timer_settime
+#define compat_sys_sched_rr_get_interval sys_sched_rr_get_interval
+#define compat_sys_execveat            sys_execveat
+
+#define compat_sys_mq_notify           sys_mq_notify
+#define compat_sys_clock_nanosleep     sys_clock_nanosleep
+#define compat_sys_clock_getres        sys_clock_getres
+
+#define sys_lseek                      sys_llseek
+
+asmlinkage long compat_sys_mmap2_wrapper(void);
+#define sys_mmap2                      compat_sys_mmap2_wrapper
+
+asmlinkage long compat_sys_fstatfs64_wrapper(void);
+#define compat_sys_fstatfs64    compat_sys_fstatfs64_wrapper
+asmlinkage long compat_sys_statfs64_wrapper(void);
+#define compat_sys_statfs64             compat_sys_statfs64_wrapper
+
+#define compat_sys_pwritev	       compat_sys_pwritev64
+
+#include <asm/syscall.h>
+
+#undef __SYSCALL
+#undef __SC_COMP
+#undef __SC_3264
+#undef __SC_COMP_3264
+
+#define __SYSCALL_COMPAT
+#define __SYSCALL(nr, sym)	[nr] = sym,
+
+/*
+ * The sys_call_ilp32_table array must be 4K aligned to be accessible from
+ * kernel/entry.S.
+ */
+void *sys_call_ilp32_table[__NR_syscalls] __aligned(4096) = {
+	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};