Linux-aarch64-系统调用-1
Linux系统调用定义相关
相关代码:
- for general arch: sys_*name -> aliased to _se_sys*name -> call _do_sys*name to do actual work
- for arm64 arch: _arm64_sys* -> call _se_sys* -> call _do_sys*name to do actual work
因此,实际的系统调用函数为__do_sys_*()
asmlinkage:声明函数传参不使用寄存器,而使用栈进行。后续为Linux源码中的系统调用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100// in file : syscalls.h
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_METADATA(sname, nb, ...)
/*
* __MAP - apply a macro to syscall arguments
* __MAP(n, m, t1, a1, t2, a2, ..., tn, an) will expand to
* m(t1, a1), m(t2, a2), ..., m(tn, an)
* The first argument must be equal to the amount of type/name
* pairs given. Note that this list of pairs (i.e. the arguments
* of __MAP starting at the third one) is in the same format as
* for SYSCALL_DEFINE<n>/COMPAT_SYSCALL_DEFINE<n>
*/
#define __MAP0(m,...)
#define __MAP1(m,t,a,...) m(t,a)
#define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__)
#define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__)
#define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__)
#define __MAP5(m,t,a,...) m(t,a), __MAP4(m,__VA_ARGS__)
#define __MAP6(m,t,a,...) m(t,a), __MAP5(m,__VA_ARGS__)
#define __MAP(n,...) __MAP##n(__VA_ARGS__)
#define __SC_DECL(t, a) t a
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
// in file:arch\arm64\include\asm\syscall_wrapper.h
#define __SYSCALL_DEFINEx(x, name, ...) \ // arm64定义了自己的__SYSCALL_DEFINEx宏
asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \
{ \
return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \
} \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) // 这一行看着很奇怪,需要结合一个具体的系统调用定义才能看明白
// 这里提取x0-x6
#define SC_ARM64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
,,regs->regs[0],,regs->regs[1],,regs->regs[2] \
,,regs->regs[3],,regs->regs[4],,regs->regs[5])
// in file: file.c
SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
{
if (unlikely(newfd == oldfd)) { /* corner case */
struct files_struct *files = current->files;
struct file *f;
int retval = oldfd;
rcu_read_lock();
f = __fget_files_rcu(files, oldfd, 0);
if (!f)
retval = -EBADF;
rcu_read_unlock();
if (f)
fput(f);
return retval;
}
return ksys_dup3(oldfd, newfd, 0);
}
/*
* The asmlinkage stub is aliased to a function named __se_sys_*() which
* sign-extends 32-bit ints to longs whenever needed. The actual work is
* done within __do_sys_*().
*/
#ifndef __SYSCALL_DEFINEx // 不同架构可能定义了自己的__SYSCALL_DEFINEx宏
#define __SYSCALL_DEFINEx(x, name, ...) \
__diag_push(); \
__diag_ignore(GCC, 8, "-Wattribute-alias", \
"Type aliasing is used to sanitize syscall arguments");\
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
__diag_pop(); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#endif /* __SYSCALL_DEFINEx */实际使用中会直接调用dup2(),由其触发svc同步异常,最终调用Linux定义的系统调用“服务函数”。其函数原型在相应的头文件[unistd.h]中有声明,但其定义在哪里?
- 显然,在Linux中,它直接使用GNU的C库,因此dup2()的定义在GNU的C库源文件中。下载glibc的源码后,可以看到dup2()的定义
- 可以看到,对于不同的标准/UNIX实现,glibc中也定义了不同的dup2()实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51// in file: sysdeps/posix/dup2.c
int
__dup2 (int fd, int fd2)
{
int save;
if (fd2 < 0
#ifdef OPEN_MAX
|| fd2 >= OPEN_MAX
#endif
)
{
__set_errno (EBADF);
return -1;
}
/* Check if FD is kosher. */
if (fcntl (fd, F_GETFL) < 0)
return -1;
if (fd == fd2)
return fd2;
/* This is not atomic. */
save = errno;
(void) close (fd2);
__set_errno (save);
return fcntl (fd, F_DUPFD, fd2);
}
libc_hidden_def (__dup2)
weak_alias (__dup2, dup2)
// in file: sysdeps/unix/sysv/linux/dup2.c
int
__dup2 (int fd, int fd2)
{
#ifdef __NR_dup2
return INLINE_SYSCALL_CALL (dup2, fd, fd2);
#else
/* For the degenerate case, check if the fd is valid (by trying to
get the file status flags) and return it, or else return EBADF. */
if (fd == fd2)
return __libc_fcntl (fd, F_GETFL, 0) < 0 ? -1 : fd;
return INLINE_SYSCALL_CALL (dup3, fd, fd2, 0);
#endif
}
libc_hidden_def (__dup2)
weak_alias (__dup2, dup2) - INLINE_SYSCALL_CALL 宏展开
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116// in file: sysdeps/unix/sysdep.h
/* Issue a syscall defined by syscall number plus any other argument
required. Any error will be handled using arch defined macros and errno
will be set accordingly.
It is similar to INLINE_SYSCALL macro, but without the need to pass the
expected argument number as second parameter. */
#define INLINE_SYSCALL_CALL(...) \
__INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)
#define __INTERNAL_SYSCALL_DISP(b,...) \
__SYSCALL_CONCAT (b,__INTERNAL_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
#define __SYSCALL_CONCAT_X(a,b) a##b
#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X (a, b)
// 以上几个宏的作用就是:根据参数个数,展开为宏__INLINE_SYSCALLn()
// 接着将参数展开,如下所示
#define __INLINE_SYSCALL0(name) \
INLINE_SYSCALL (name, 0)
#define __INLINE_SYSCALL1(name, a1) \
INLINE_SYSCALL (name, 1, a1)
#define __INLINE_SYSCALL2(name, a1, a2) \
INLINE_SYSCALL (name, 2, a1, a2)
#define __INLINE_SYSCALL3(name, a1, a2, a3) \
INLINE_SYSCALL (name, 3, a1, a2, a3)
#define __INLINE_SYSCALL4(name, a1, a2, a3, a4) \
INLINE_SYSCALL (name, 4, a1, a2, a3, a4)
#define __INLINE_SYSCALL5(name, a1, a2, a3, a4, a5) \
INLINE_SYSCALL (name, 5, a1, a2, a3, a4, a5)
#define __INLINE_SYSCALL6(name, a1, a2, a3, a4, a5, a6) \
INLINE_SYSCALL (name, 6, a1, a2, a3, a4, a5, a6)
#define __INLINE_SYSCALL7(name, a1, a2, a3, a4, a5, a6, a7) \
INLINE_SYSCALL (name, 7, a1, a2, a3, a4, a5, a6, a7)
// in file: sysdeps/unix/sysdep.h
/* Wrappers around system calls should normally inline the system call code.
But sometimes it is not possible or implemented and we use this code. */
#ifndef INLINE_SYSCALL
#define INLINE_SYSCALL(name, nr, args...) __syscall_##name (args)
#endif
// in file: sysdeps/unix/sysv/linux/sysdep.h
/* Define a macro which expands into the inline wrapper code for a system
call. It sets the errno and returns -1 on a failure, or the syscall
return value otherwise. */
#undef INLINE_SYSCALL
#define INLINE_SYSCALL(name, nr, args...) \
({ \
long int sc_ret = INTERNAL_SYSCALL (name, nr, args); \
__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret)) \
? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret)) \
: sc_ret; \
})
// 宏 INTERNAL_SYSCALL 是专属于**sysv/linux**实现的,而且不同的硬件架构,其具体定义不同
// for x86-64, in file: sysdeps/unix/sysv/linux/x86_64/sysdep.h
#undef INTERNAL_SYSCALL
#define INTERNAL_SYSCALL(name, nr, args...) \
internal_syscall##nr (SYS_ify (name), args)
#undef internal_syscall0
#define internal_syscall0(number, dummy...) \
({ \
unsigned long int resultvar; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (number) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
// 这里可以看到,x86_64使用 syscall触发软中断,调用号应该也是放到寄存器中:"0"(number);参数分别放在r9,r8,r10,rdx,rsi,rdi中
#undef internal_syscall1
#define internal_syscall1(number, arg1) \
({ \
unsigned long int resultvar; \
TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (number), "r" (_a1) \
: "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; \
})
// for aarch64, in file: sysdeps/unix/sysv/linux/aarch64/sysdep.h
# undef INTERNAL_SYSCALL
# define INTERNAL_SYSCALL(name, nr, args...) \
INTERNAL_SYSCALL_RAW(SYS_ify(name), nr, args)
// 这里,SYS_ify将系统调用名,扩展为具体的系统调用号
#undef SYS_ify
#define SYS_ify(syscall_name) (__NR_##syscall_name)
// 这里可以看到使用 svc 0 触发软中断,陷入内核,并在x8寄存器中存储系统调用号
# undef INTERNAL_SYSCALL_RAW
# define INTERNAL_SYSCALL_RAW(name, nr, args...) \
({ long _sys_result; \
{ \
LOAD_ARGS_##nr (args) \
register long _x8 asm ("x8") = (name); \
asm volatile ("svc 0 // syscall " # name \
: "=r" (_x0) : "r"(_x8) ASM_ARGS_##nr : "memory"); \
_sys_result = _x0; \
} \
_sys_result; })
// 可以看到,系统调用使用x0-6这7个寄存器进行参数传递
# define LOAD_ARGS_0() \
register long _x0 asm ("x0");
# define LOAD_ARGS_1(x0) \
long _x0tmp = (long) (x0); \
LOAD_ARGS_0 () \
_x0 = _x0tmp;
Linux-aarch64-系统调用-1
http://example.com/2024/07/10/Linux-aarch64-系统调用-1/