图片 16

而clone()是通过调用do,进程控制块(PCB)

进度描述

进程描述符(task_struct)

用来说述进程的数据布局,能够清楚为经过的属性。举例进度的景观、进度的标记(PID)等,都被封装在了经过描述符这些数据结构中,该数据布局被定义为task_struct

经过调控块(PCB)

是操作系统焦点中一种数据构造,首要代表经过意况。

经过景况

图片 1

fork()

fork(卡塔尔(قطر‎在父、子进度各重临三回。在父进度中重临子进程的
pid,在子进度中重回0。

fork贰个子经过的代码

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char * argv[])
{
  int pid;
  /* fork another process */

  pid = fork();
  if (pid < 0) 
  { 
      /* error occurred */
      fprintf(stderr,"Fork Failed!");
      exit(-1);
  } 
  else if (pid == 0) 
  {
      /* child process */
      printf("This is Child Process!\n");
  } 
  else 
  {  
      /* parent process  */
      printf("This is Parent Process!\n");
      /* parent will wait for the child to complete*/
      wait(NULL);
      printf("Child Complete!\n");
  }
}

转自:

此文仅用于MOOCLinux内核分析作业

张依依+原创文章转发请表明出处+《Linux内核剖析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000

尝试截图

图片 2

安装断点。

图片 3

跟踪到copy_process函数。

图片 4

跟踪到ret_from_fork()。

图片 5

跟踪ret_from_fork(卡塔尔国的汇编代码。

浏览创造进度的相干重大代码

进程创立

前言


Unix规范的复制进度的体系调用时fork(即分叉),不过Linux,BSD等操作系统并不仅仅完成这叁个,确切的说linux达成了四个,fork,vfork,clone(确切说vfork创设出来的是轻量级进度,也叫线程,是分享能源的经过)

系统调用 描述
fork fork创造的子进程是父进程的完整副本,复制了父亲进程的资源,包括内存的内容task_struct内容
vfork vfork创建的子进程与父进程共享数据段,而且由vfork()创建的子进程将先于父进程运行
clone Linux上创建线程一般使用的是pthread库 实际上linux也给我们提供了创建线程的系统调用,就是clone

有关客商空间利用fork, vfork和clone, 请参见

Linux中fork,vfork和clone详明(分歧与调换)

fork, vfork和clone的种类调用的输入地址分别是sys_fork,
sys_vfork和sys_clone, 而他们的概念是依靠于系统布局的,
因为在顾客空间和水源空间之间传递参数的点子因连串布局而异

系统调用的参数字传送递

系统调用的兑现与C库不一样,
普通C函数通过将参数的值压入到进程的栈中举办参数的传递。由于系统调用是通过暂停进度从客商态到内核态的一种特别的函数调用,未有客户态也许内核态的仓库能够被用来在调用函数和被调函数之间实行参数字传送递。系统调用通过CPU的贮存器来实行参数字传送递。在进行系统调用在此以前,系统调用的参数被写入CPU的贮存器,而在其实调用系统服务例程以前,内核将CPU寄放器的剧情拷贝到内核饭馆中,完成参数的传递。

之所以分歧的种类结构恐怕选择不一致的不二法门大概分裂的存放器来传递参数,而地点函数的职责正是从微型机的寄放器中领取客商空间提供的音讯,
并调用系统布局无关的_do_fork(可能早先时代的do_fork)函数,
担任进度的复制

分化的系统构造只怕必要使用差别的办法照旧寄存器来囤积函数调用的参数,
由此linux在两全系统调用的时候,
将其分割成类别构造有关的等级次序和系统构造毫无干系的层系,
前面一个复杂提抽出信赖与系统结构的特定的参数,
后者则依据参数的装置进行一定的着实际操作作


task_struct数据布局分析

struct task_struct {
    volatile long state;//进程运行状态。-1为等待状态,0为运行,>0为停止状态
    void *stack; //进程的内核堆栈
    atomic_t usage;
    unsigned int flags; //每个进程的标识符
    unsigned int ptrace;//进程跟踪标识符

#ifdef CONFIG_SMP //条件编译,即对处理时用到的代码
    struct llist_node wake_entry;
    int on_cpu;
    struct task_struct *last_wakee;
    unsigned long wakee_flips;
    unsigned long wakee_flip_decay_ts;

    int wake_cpu;
#endif
    /*运行队列和进程调度相关的代码*/
    int on_rq;  

    int prio, static_prio, normal_prio;
    unsigned int rt_priority;
    const struct sched_class *sched_class;
    struct sched_entity se;
    struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHED
    struct task_group *sched_task_group;
#endif
    struct sched_dl_entity dl;

#ifdef CONFIG_PREEMPT_NOTIFIERS
    /* list of struct preempt_notifier: */
    struct hlist_head preempt_notifiers;
#endif

#ifdef CONFIG_BLK_DEV_IO_TRACE
    unsigned int btrace_seq;
#endif

    unsigned int policy;
    int nr_cpus_allowed;
    cpumask_t cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
    int rcu_read_lock_nesting;
    union rcu_special rcu_read_unlock_special;
    struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
    struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_TASKS_RCU
    unsigned long rcu_tasks_nvcsw;
    bool rcu_tasks_holdout;
    struct list_head rcu_tasks_holdout_list;
    int rcu_tasks_idle_cpu;
#endif /* #ifdef CONFIG_TASKS_RCU */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    struct sched_info sched_info;
#endif

    struct list_head tasks;  //进程的链表,将所有进程通过双向循环链表链接在一起。
#ifdef CONFIG_SMP
    struct plist_node pushable_tasks;
    struct rb_node pushable_dl_tasks;
#endif

    struct mm_struct *mm, *active_mm; //与进程的地址空间相关的数据结构
#ifdef CONFIG_COMPAT_BRK
    unsigned brk_randomized:1;
#endif
    /* per-thread vma caching */
    u32 vmacache_seqnum;
    struct vm_area_struct *vmacache[VMACACHE_SIZE];
#if defined(SPLIT_RSS_COUNTING)
    struct task_rss_stat    rss_stat;
#endif
/* task state */
    int exit_state;
    int exit_code, exit_signal;
    int pdeath_signal;  /*  The signal sent when the parent dies  */
    unsigned int jobctl;    /* JOBCTL_*, siglock protected */

    /* Used for emulating ABI behavior of previous Linux versions */
    unsigned int personality;

    unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                 * execve */
    unsigned in_iowait:1;

    /* Revert to default priority/policy when forking */
    unsigned sched_reset_on_fork:1;
    unsigned sched_contributes_to_load:1;

    unsigned long atomic_flags; /* Flags needing atomic access. */

    pid_t pid;  //进程标识符
    pid_t tgid; //进程标识符

#ifdef CONFIG_CC_STACKPROTECTOR
    /* Canary value for the -fstack-protector gcc feature */
    unsigned long stack_canary;
#endif
    /*
     * pointers to (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with
     * p->real_parent->pid)
     */
/*与进程父子关系有关的代码*/
    struct task_struct __rcu *real_parent; /* real parent process */
    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    /*
     * children/sibling forms the list of my natural children
     */
    struct list_head children;  /* list of my children */
    struct list_head sibling;   /* linkage in my parent's children list */
    struct task_struct *group_leader;   /* threadgroup leader */

    /*
     * ptraced is the list of tasks this task is using ptrace on.
     * This includes both natural children and PTRACE_ATTACH targets.
     * p->ptrace_entry is p's link on the p->parent->ptraced list.
     */
    struct list_head ptraced;
    struct list_head ptrace_entry;

    /* PID/PID hash table linkage. */
    struct pid_link pids[PIDTYPE_MAX];
    struct list_head thread_group;
    struct list_head thread_node;

    struct completion *vfork_done;      /* for vfork() */
    int __user *set_child_tid;      /* CLONE_CHILD_SETTID */
    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */

    /*与时间相关的代码*/
    cputime_t utime, stime, utimescaled, stimescaled;
    cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    struct cputime prev_cputime;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    seqlock_t vtime_seqlock;
    unsigned long long vtime_snap;
    enum {
        VTIME_SLEEPING = 0,
        VTIME_USER,
        VTIME_SYS,
    } vtime_snap_whence;
#endif
    unsigned long nvcsw, nivcsw; /* context switch counts */
    u64 start_time;     /* monotonic time in nsec */
    u64 real_start_time;    /* boot based time in nsec */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    unsigned long min_flt, maj_flt;

    struct task_cputime cputime_expires;
    struct list_head cpu_timers[3];

/* process credentials */
    const struct cred __rcu *real_cred; /* objective and real subjective task
                     * credentials (COW) */
    const struct cred __rcu *cred;  /* effective (overridable) subjective task
                     * credentials (COW) */
    char comm[TASK_COMM_LEN]; /* executable name excluding path
                     - access with [gs]et_task_comm (which lock
                       it with task_lock())
                     - initialized normally by setup_new_exec */
/* file system info */
    int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
    struct sysv_sem sysvsem;
    struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
    unsigned long last_switch_count;
#endif
/* 与CPU有关的数据结构*/
    struct thread_struct thread;
/* filesystem information */
    struct fs_struct *fs;//与文件系统有关的数据结构
/* open file information */
    struct files_struct *files; //文件描述符
/* namespaces */
    struct nsproxy *nsproxy;
/* 与信号处理相关的数据结构 */
    struct signal_struct *signal;
    struct sighand_struct *sighand;

    sigset_t blocked, real_blocked;
    sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
    struct sigpending pending;

    unsigned long sas_ss_sp;
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;
    sigset_t *notifier_mask;
    struct callback_head *task_works;

    struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
    kuid_t loginuid;
    unsigned int sessionid;
#endif
    struct seccomp seccomp;

/* Thread group tracking */
    u32 parent_exec_id;
    u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
 * mempolicy */
    spinlock_t alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t pi_lock;

#ifdef CONFIG_RT_MUTEXES  //互斥锁
    /* PI waiters blocked on a rt_mutex held by this task */
    struct rb_root pi_waiters;
    struct rb_node *pi_waiters_leftmost;
    /* Deadlock detection and priority inheritance handling */
    struct rt_mutex_waiter *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES//互斥锁
    /* mutex deadlock detection */
    struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS //与调试相关的数据结构
    unsigned int irq_events;
    unsigned long hardirq_enable_ip;
    unsigned long hardirq_disable_ip;
    unsigned int hardirq_enable_event;
    unsigned int hardirq_disable_event;
    int hardirqs_enabled;
    int hardirq_context;
    unsigned long softirq_disable_ip;
    unsigned long softirq_enable_ip;
    unsigned int softirq_disable_event;
    unsigned int softirq_enable_event;
    int softirqs_enabled;
    int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
    u64 curr_chain_key;
    int lockdep_depth;
    unsigned int lockdep_recursion;
    struct held_lock held_locks[MAX_LOCK_DEPTH];
    gfp_t lockdep_reclaim_gfp;
#endif

/* journalling filesystem info */
    void *journal_info;

/* stacked block device info */
    struct bio_list *bio_list;

#ifdef CONFIG_BLOCK
/* stack plugging */
    struct blk_plug *plug;
#endif

/* VM state */
    struct reclaim_state *reclaim_state;

    struct backing_dev_info *backing_dev_info;

    struct io_context *io_context;

    unsigned long ptrace_message;
    siginfo_t *last_siginfo; /* For ptrace use.  */
    struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
    u64 acct_rss_mem1;  /* accumulated rss usage */
    u64 acct_vm_mem1;   /* accumulated virtual memory usage */
    cputime_t acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
    nodemask_t mems_allowed;    /* Protected by alloc_lock */
    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
    int cpuset_mem_spread_rotor;
    int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
    /* Control Group info protected by css_set_lock */
    struct css_set __rcu *cgroups;
    /* cg_list protected by css_set_lock and tsk->alloc_lock */
    struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
    struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
    struct compat_robust_list_head __user *compat_robust_list;
#endif
    struct list_head pi_state_list;
    struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
    struct mutex perf_event_mutex;
    struct list_head perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
    unsigned long preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
    short il_next;
    short pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
    int numa_scan_seq;
    unsigned int numa_scan_period;
    unsigned int numa_scan_period_max;
    int numa_preferred_nid;
    unsigned long numa_migrate_retry;
    u64 node_stamp;         /* migration stamp  */
    u64 last_task_numa_placement;
    u64 last_sum_exec_runtime;
    struct callback_head numa_work;

    struct list_head numa_entry;
    struct numa_group *numa_group;

    /*
     * Exponential decaying average of faults on a per-node basis.
     * Scheduling placement decisions are made based on the these counts.
     * The values remain static for the duration of a PTE scan
     */
    unsigned long *numa_faults_memory;
    unsigned long total_numa_faults;

    /*
     * numa_faults_buffer records faults per node during the current
     * scan window. When the scan completes, the counts in
     * numa_faults_memory decay and these values are copied.
     */
    unsigned long *numa_faults_buffer_memory;

    /*
     * Track the nodes the process was running on when a NUMA hinting
     * fault was incurred.
     */
    unsigned long *numa_faults_cpu;
    unsigned long *numa_faults_buffer_cpu;

    /*
     * numa_faults_locality tracks if faults recorded during the last
     * scan window were remote/local. The task scan period is adapted
     * based on the locality of the faults with different weights
     * depending on whether they were shared or private faults
     */
    unsigned long numa_faults_locality[2];

    unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */

    struct rcu_head rcu;

    /*
     * 与管道相关的数据结构
     */
    struct pipe_inode_info *splice_pipe; 

    struct page_frag task_frag;

#ifdef  CONFIG_TASK_DELAY_ACCT
    struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
    int make_it_fail;
#endif
    /*
     * when (nr_dirtied >= nr_dirtied_pause), it's time to call
     * balance_dirty_pages() for some dirty throttling pause
     */
    int nr_dirtied;
    int nr_dirtied_pause;
    unsigned long dirty_paused_when; /* start of a write-and-pause period */

#ifdef CONFIG_LATENCYTOP
    int latency_record_count;
    struct latency_record latency_record[LT_SAVECOUNT];
#endif
    /*
     * time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    unsigned long timer_slack_ns;
    unsigned long default_timer_slack_ns;

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    /* Index of current stored address in ret_stack */
    int curr_ret_stack;
    /* Stack of return addresses for return function tracing */
    struct ftrace_ret_stack *ret_stack;
    /* time stamp for last schedule */
    unsigned long long ftrace_timestamp;
    /*
     * Number of functions that haven't been traced
     * because of depth overrun.
     */
    atomic_t trace_overrun;
    /* Pause for the tracing */
    atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
    /* state flags for use by tracers */
    unsigned long trace;
    /* bitmask and counter of trace recursion */
    unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
    unsigned int memcg_kmem_skip_account;
    struct memcg_oom_info {
        struct mem_cgroup *memcg;
        gfp_t gfp_mask;
        int order;
        unsigned int may_oom:1;
    } memcg_oom;
#endif
#ifdef CONFIG_UPROBES
    struct uprobe_task *utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
    unsigned int    sequential_io;
    unsigned int    sequential_io_avg;
#endif
};

看一下do_fork      /linux-3.18.6/kernel/fork.c#do_fork

差异常少流程

fork
通过0×80一曝十寒(系统调用)来陷入内核,由系统提供的附和系统调用来变成进度的制造。

fork.c

//fork
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
    return do_fork(SIGCHLD, 0, 0, NULL, NULL);
#else
    /* can not support in nommu mode */
    return -EINVAL;
#endif
}
#endif

//vfork
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
    return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
            0, NULL, NULL);
}
#endif

//clone
#ifdef __ARCH_WANT_SYS_CLONE
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
         int __user *, parent_tidptr,
         int, tls_val,
         int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
         int __user *, parent_tidptr,
         int __user *, child_tidptr,
         int, tls_val)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
        int, stack_size,
        int __user *, parent_tidptr,
        int __user *, child_tidptr,
        int, tls_val)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
         int __user *, parent_tidptr,
         int __user *, child_tidptr,
         int, tls_val)
#endif
{
    return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
}
#endif

由此看上面包车型大巴代码,大家能够领略的阅览,无论是运用 fork 依然 vfork
来创设进程,最终都以透过 do_fork(卡塔尔国 方法来落到实处的。接下来大家可以追踪到
do_fork(卡塔尔的代码(部分代码,经过小编的轻巧):

long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{
        //创建进程描述符指针
        struct task_struct *p;

        //……

        //复制进程描述符,copy_process()的返回值是一个 task_struct 指针。
        p = copy_process(clone_flags, stack_start, stack_size,
             child_tidptr, NULL, trace);

        if (!IS_ERR(p)) {
            struct completion vfork;
            struct pid *pid;

            trace_sched_process_fork(current, p);

            //得到新创建的进程描述符中的pid
            pid = get_task_pid(p, PIDTYPE_PID);
            nr = pid_vnr(pid);

            if (clone_flags & CLONE_PARENT_SETTID)
                put_user(nr, parent_tidptr);

            //如果调用的 vfork()方法,初始化 vfork 完成处理信息。
            if (clone_flags & CLONE_VFORK) {
                p->vfork_done = &vfork;
                init_completion(&vfork);
                get_task_struct(p);
            }

            //将子进程加入到调度器中,为其分配 CPU,准备执行
            wake_up_new_task(p);

            //fork 完成,子进程即将开始运行
            if (unlikely(trace))
                ptrace_event_pid(trace, pid);

            //如果是 vfork,将父进程加入至等待队列,等待子进程完成
            if (clone_flags & CLONE_VFORK) {
                if (!wait_for_vfork_done(p, &vfork))
                    ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
            }

            put_pid(pid);
        } else {
            nr = PTR_ERR(p);
        }
        return nr;
}

fork, vfork, clone系统调用的达成


task_struct数据构造

根据wiki)的定义,进程是Computer中已运营程序的实体。在面向线程设计的连串(Linux
2.6及更正的本子)中,进度本人不是大旨运营单位,而是线程的容器。

A computer program is a passive collection of instructions; a
process is the actual execution of those instructions. Several
processes may be associated with the same program; for example,
opening up several instances of the same program often means more than
one process is being executed.

在Linux中,task_struct实际上正是平时所说的PCB。该协会定义坐落于:

/include/linux/sched.h

task_struct比较庞大,大约能够分成多少个部分:

  • 经过情况(State)
  • 进程调解音信(Scheduling Information)
  • 各样标志符(Identifiers)
  • 进程通讯有关新闻(IPC:Inter_Process Communication)
  • 日子和沙漏音讯(Times and Timers)
  • 经过链接消息(Links)
  • 文件系统消息(File System)
  • 设想内存消息(Virtual Memory)
  • 页面管理消息(page)
  • 对称多微型机(SMP)音信
  • 和Computer相关的意况(上下文)消息(Processor Specific Context)
  • 别的消息

当中相当的重大的多少个参数:

  • volatile long state;进程情状,可以预知/include/linux/sched.h文本中的宏,TASK_RUNNING等
  • unsigned int rt_priority;实时优先级
  • unsigned int policy;调节战略
  • pid_t pid;经过标记符
  • struct task_struct __rcu *real_parent;real parent
  • struct list_head children;list of my children
  • struct files_struct *files;系统张开文件

fork函数创建新进度经过剖判

在Linux系统中fork(卡塔尔(قطر‎通过调用clone系统调用完成其成效,而clone(卡塔尔是因此调用do_fork()实现的。

do_fork(卡塔尔(قطر‎定义在kernel/fork.c文件中。
该函数调用copy_process(卡塔尔先导成立新进度。职业经过如下:

1.调用dup_task_struct(卡塔尔国为新进度创制一个内核栈、thread_info结构和task_struct(PCB),那一个值与眼下路程的值肖似。那时候,子过程和父进程的汇报符是完全相像的。

2.检查并保管新创设那个子进度后,当前顾客所独具的经过数目未有抢先给它分配的能源的范围。

3.子进度先河使本人与父进度不同开来。进度描述符内的无数成员都要被清0或设为初阶值。那个不是世襲而来的进度描述符成员,首就算总结新闻。task_struct中的大许多数目都还是未被涂改。

4.子进度的景观被设置为TASK_UNINTECR-VRUPTINLE,以管教它不会投运。(注:TASK_UNINTE库罗德RUPTIBLE使进度只可以被wake_up(State of Qatar唤醒,即等待状态。等待意况不行被数字信号消弭。)

5.copy_process()调用copy_flags()以更新task_struct的flags成员。申明进度是还是不是持有最棒客户权限的PE_SUPERPPRADOIV标识被清0。表明进程还平昔不调用exec(State of Qatar的函数的PF_FO奥德赛KNOEXEC标记被安装。

6.调用alloc_pid(State of Qatar为新进程分配贰个灵光的PID。

7.根据传递给clone(卡塔尔国的参数标记,copy_process(卡塔尔拷贝或分享打开文件、文件系统音讯、实信号管理函数、进程地址空间和命名空间等。在肖似景况下,这么些能源会被给定进程的有所线程分享;否则,这一个能源对每一个进度是区别的,因而被拷贝到这里。

8.最后,copy_process(卡塔尔(قطر‎做截至专门的学业并回到二个指向子进度的指针。

回到do_fork函数,如果copy_process(卡塔尔国函数成功重临,新创造的子进度被唤醒并让其投运。

1651    p = copy_process(clone_flags, stack_start, stack_size,      // 创建进程的主要代码
1652          child_tidptr, NULL, trace);

do_fork 流程

  • 调用 copy_process 为子进度复制出一份进度音讯
  • 假假使 vfork 起头化达成管理音信
  • 调用 wake_up_new_task 将子进度进入调节器,为之分配 CPU
  • 假使是 vfork,父进度等待子进程实现 exec 替换本人的地点空间

关于do_fork和_do_frok


The commit 3033f14ab78c32687 (“clone: support passing tls argument via
C
rather than pt_regs magic”) introduced _do_fork() that allowed to
pass
@tls parameter.

参见

linux2.5.32以后, 添加了TLS(Thread Local Storage)机制,
clone的标识CLONE_SETTLS采取叁个参数来设置线程的地点存款和储蓄区。sys_clone也因此扩展了一个int参数来传播相应的点tls_val。sys_clone通过do_fork来调用copy_process实现进程的复制,它调用特定的copy_thread和copy_thread把相应的类别调用参数从pt_regs寄放器列表中领到出来,可是会产生意外的意况。

only one code path into copy_thread can pass the CLONE_SETTLS flag,
and
that code path comes from sys_clone with its architecture-specific
argument-passing order.

眼下大家说了,
在促成函数调用的时候,作者iosys_clone等将一定连串构造的参数从寄放器中提抽出来,
然后达到do_fork那步的时候曾经相应是系统结构非亲非故了,
可是我们sys_clone需求安装的CLONE_SETTLS的tls仍是个依据与系统结构的参数,
这里就能身不由己难点。

因此linux-4.2之后选择引进一个新的CONFIG_HAVE_COPY_THREAD_TLS,和八个新的COPY_THREAD_TLS接受TLS参数为
外加的长整型(系统调用参数大小)的争辨。改造sys_clone的TLS参数unsigned
long,并传递到copy_thread_tls。

/* http://lxr.free-electrons.com/source/include/linux/sched.h?v=4.5#L2646  */
extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);


/* linux2.5.32以后, 添加了TLS(Thread Local Storage)机制, 
    在最新的linux-4.2中添加了对CLONE_SETTLS 的支持 
    底层的_do_fork实现了对其的支持, 
    dansh*/
#ifndef CONFIG_HAVE_COPY_THREAD_TLS
/* For compatibility with architectures that call do_fork directly rather than
 * using the syscall entry points below. */
long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
{
        return _do_fork(clone_flags, stack_start, stack_size,
                        parent_tidptr, child_tidptr, 0);
}
#endif

 

我们会意识,新本子的种类中clone的TLS设置标记会通过TLS参数传递,
由此_do_fork替代了老版本的do_fork。

老版本的do_fork唯有在如下情状才会定义

  • 独有当系统不扶助通过TLS参数通过参数字传送递而是使用pt_regs贮存器列表传递时

  • 未定义CONFIG_HAVE_COPY_THREAD_TLS宏

参数 描述
clone_flags 与clone()参数flags相同, 用来控制进程复制过的一些属性信息, 描述你需要从父进程继承那些资源。该标志位的4个字节分为两部分。最低的一个字节为子进程结束时发送给父进程的信号代码,通常为SIGCHLD;剩余的三个字节则是各种clone标志的组合(本文所涉及的标志含义详见下表),也就是若干个标志之间的或运算。通过clone标志可以有选择的对父进程的资源进行复制;
stack_start 与clone()参数stack_start相同, 子进程用户态堆栈的地址
regs 是一个指向了寄存器集合的指针, 其中以原始形式, 保存了调用的参数, 该参数使用的数据类型是特定体系结构的struct pt_regs,其中按照系统调用执行时寄存器在内核栈上的存储顺序, 保存了所有的寄存器, 即指向内核态堆栈通用寄存器值的指针,通用寄存器的值是在从用户态切换到内核态时被保存到内核态堆栈中的(指向pt_regs结构体的指针。当系统发生系统调用,即用户进程从用户态切换到内核态时,该结构体保存通用寄存器中的值,并被存放于内核态的堆栈中)
stack_size 用户状态下栈的大小, 该参数通常是不必要的, 总被设置为0
parent_tidptr 与clone的ptid参数相同, 父进程在用户态下pid的地址,该参数在CLONE_PARENT_SETTID标志被设定时有意义
child_tidptr 与clone的ctid参数相同, 子进程在用户太下pid的地址,该参数在CLONE_CHILD_SETTID标志被设定时有意义

其中clone_flags如下表所示

图片 6

浅析基本功管理进度sys_clone

fork、vfork和clone八个种类调用实际上都以透过do_fork来落到实处进度的创制.
见如下语句:

return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
  • do_fork 函数

而do_fork函数真正达成复制是copy_process

long do_fork(unsigned long clone_flags,
          unsigned long stack_start,
          unsigned long stack_size,
          int __user *parent_tidptr,
          int __user *child_tidptr)
{

    ...

    p = copy_process(clone_flags, stack_start, stack_size,
             child_tidptr, NULL, trace);
  ...
}
  • copy_process函数

copy_process()关键造成进度数据结构,各样能源的起头化。

p = dup_task_struct(current);
  1. (省略的IF语句)检查clone_flags参数,防止无效的组成步向
  2. p = dup_task_struct(current);调用dup_task_struct(卡塔尔国为新历程创立三个内核栈
  3. 认清权限及容许范围的代码
  4. 对子进度的叙说符初叶化和复制父进程的财富给子进度

- `retval = sched_fork(clone_flags, p);`完成调度相关的设置,将这个task分配给CPU
- `if (retval)`语句群,复制共享进程的的各个部分
- `retval = copy_thread(clone_flags, stack_start, stack_size, p);`复制父进程堆栈的内容到子进程的堆栈中去.这其中,copy_thread()函数中的语句`p->thread.ip = (unsigned long) ret_from_fork;`决定了新进程的**第一条指令地址**.
  • dup_task_struct()

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
    struct task_struct *tsk;
    struct thread_info *ti;
    int node = tsk_fork_get_node(orig);
    int err;

    tsk = alloc_task_struct_node(node);
    if (!tsk)
        return NULL;

    ti = alloc_thread_info_node(tsk, node);
    if (!ti)
        goto free_tsk;

    err = arch_dup_task_struct(tsk, orig);
    if (err)
        goto free_ti;

    tsk->stack = ti;
# ifdef CONFIG_SECCOMP

    tsk->seccomp.filter = NULL;
# endif

    setup_thread_stack(tsk, orig);
    clear_user_return_notifier(tsk);
    clear_tsk_need_resched(tsk);
    set_task_stack_end_magic(tsk);

# ifdef CONFIG_CC_STACKPROTECTOR
    tsk->stack_canary = get_random_int();
# endif

    atomic_set(&tsk->usage, 2);
# ifdef CONFIG_BLK_DEV_IO_TRACE
    tsk->btrace_seq = 0;
# endif
    tsk->splice_pipe = NULL;
    tsk->task_frag.page = NULL;

    account_kernel_stack(ti, 1);

    return tsk;

free_ti:
    free_thread_info(ti);
free_tsk:
    free_task_struct(tsk);
    return NULL;
}
  1. tsk = alloc_task_struct_node(node);为task_struct开荒内部存款和储蓄器
  2. ti = alloc_thread_info_node(tsk, node);ti指向thread_info的首地址,同一时候也是系统为新历程分配的八个三回九转页面包车型客车首地址。
  3. err = arch_dup_task_struct(tsk, orig);复制父进度的task_struct消息到新的task_struct里,
    (*dst = *src;)
  4. tsk->stack = ti;task的对应栈
  5. setup_thread_stack(tsk, orig);初始化thread info结构
  6. set_task_stack_end_magic(tsk);栈甘休的地点设置数据为栈截至标示(for
    overflow detection卡塔尔国

子进度是从哪开首实施的?

当奉行到

p->thread.ip = (unsigned long) ret_from_fork;
//调节到子进程时的率先条指令地址。

时,即子进程得到CPU时它从这么些职位上马实践的。

而施行那条语句

*childregs = *current_pt_regs(State of Qatar; //复制内核仓库

确认保证了新进度的施行源点和根本货仓的一致性。
如下图gdb追踪所示。

图片 7

看一下copye_process      /linux-3.18.6/kernel/fork.c#copy_process

copy_process 流程

追踪copy_process 代码(部分)

static struct task_struct *copy_process(unsigned long clone_flags,
                    unsigned long stack_start,
                    unsigned long stack_size,
                    int __user *child_tidptr,
                    struct pid *pid,
                    int trace)
{
    int retval;

    //创建进程描述符指针
    struct task_struct *p;

    //……

    //复制当前的 task_struct
    p = dup_task_struct(current);

    //……

    //初始化互斥变量  
    rt_mutex_init_task(p);

    //检查进程数是否超过限制,由操作系统定义
    if (atomic_read(&p->real_cred->user->processes) >=
            task_rlimit(p, RLIMIT_NPROC)) {
        if (p->real_cred->user != INIT_USER &&
            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
            goto bad_fork_free;
    }

    //……

    //检查进程数是否超过 max_threads 由内存大小决定
    if (nr_threads >= max_threads)
        goto bad_fork_cleanup_count;

    //……

    //初始化自旋锁
    spin_lock_init(&p->alloc_lock);
    //初始化挂起信号
    init_sigpending(&p->pending);
    //初始化 CPU 定时器
    posix_cpu_timers_init(p);

    //……

    //初始化进程数据结构,并把进程状态设置为 TASK_RUNNING
    retval = sched_fork(clone_flags, p);

    //复制所有进程信息,包括文件系统、信号处理函数、信号、内存管理等
    if (retval)
        goto bad_fork_cleanup_policy;

    retval = perf_event_init_task(p);
    if (retval)
        goto bad_fork_cleanup_policy;
    retval = audit_alloc(p);
    if (retval)
        goto bad_fork_cleanup_perf;
    /* copy all the process information */
    shm_init_task(p);
    retval = copy_semundo(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_audit;
    retval = copy_files(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_semundo;
    retval = copy_fs(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_files;
    retval = copy_sighand(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_fs;
    retval = copy_signal(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_sighand;
    retval = copy_mm(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_signal;
    retval = copy_namespaces(clone_flags, p);
    if (retval)
        goto bad_fork_cleanup_mm;
    retval = copy_io(clone_flags, p);

    //初始化子进程内核栈
    retval = copy_thread(clone_flags, stack_start, stack_size, p);

    //为新进程分配新的 pid
    if (pid != &init_struct_pid) {
        retval = -ENOMEM;
        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
        if (!pid)
            goto bad_fork_cleanup_io;
    }

    //设置子进程 pid  
    p->pid = pid_nr(pid);

    //……

    //返回结构体 p
    return p;
  • 调用 dup_task_struct 复制当前的 task_struct
  • 反省进程数是或不是超越约束
  • 开始化自旋锁、挂起时域信号、CPU 电火花计时器等
  • 调用 sched_fork 起始化进程数据构造,并把经过景况设置为
    TASK_RUNNING
  • 复制全数进程消息,包含文件系统、实信号处理函数、实信号、内部存款和储蓄器管理等
  • 调用 copy_thread 初叶化子进程内核栈
  • 为新进度分配并安装新的 pid

sys_fork的实现


区别连串布局下的fork完毕sys_fork主纵然通过标记集合区分,
在超过二分一种类结构上, 标准的fork实现格局与如下

早期达成

架构 实现
arm arch/arm/kernel/sys_arm.c, line 239
i386 arch/i386/kernel/process.c, line 710
x86_64 arch/x86_64/kernel/process.c, line 706
asmlinkage long sys_fork(struct pt_regs regs)
{
    return do_fork(SIGCHLD, regs.rsp, &regs, 0);
}

 

新版本

#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
        return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
#else
        /* can not support in nommu mode */
        return -EINVAL;
#endif
}
#endif

 

咱俩得以看看独一利用的标记是SIGCHLD。那表示在子进度终止后将发送实信号SIGCHLD实信号公告父进度,

出于写时复制(COW卡塔尔技巧, 最早老爹和儿子进度的栈地址相符,
但是借使操作栈地址闭并写入数据,
则COW机制会为每一个进度分别成立八个新的栈副本

如果do_fork成功, 则新建进程的pid作为系统调用的结果回到, 不然赶回错误码

gdb跟踪sys_clone

  • 用GDB来跟踪sys_clone,设置以下断点:

图片 8

fork1.png

  • 运转后先是停在sys_clone处:

图片 9

  • 然后是do_fork,之后是copy_process:

图片 10

fork3.png

  • 进入copy_thread:

图片 11

fork4.png

  • 在copy_thread中,大家能够查阅p的值

图片 12

fork5.png

  • 可是回去copy_process后再查看,将获得一个value optimized out的提醒,这是因为Linux内核展开gcc的-O2选项优化招致.就算想要关掉,能够参照:这里

图片 13

fork6.png

  • ret_from_fork遵照此前的深入分析被调用,跟踪到syscall_exit后不只怕继续.若是想在本机调节和测验system
    call,那么当你进去system
    call时,系统现已在挂起状态了。借使想要追踪调节和测量试验system_call,能够运用kgdb等

图片 14

fork7.png

新进度推行源点对应的货仓状态解析

gdb调试解析

图片 15

1240    p = dup_task_struct(current);      // 复制PCB

dup_task_struct 流程

static struct task_struct *dup_task_struct(struct task_struct *orig)
{
    struct task_struct *tsk;
    struct thread_info *ti;
    int node = tsk_fork_get_node(orig);
    int err;

    //分配一个 task_struct 节点
    tsk = alloc_task_struct_node(node);
    if (!tsk)
        return NULL;

    //分配一个 thread_info 节点,包含进程的内核栈,ti 为栈底
    ti = alloc_thread_info_node(tsk, node);
    if (!ti)
        goto free_tsk;

    //将栈底的值赋给新节点的栈
    tsk->stack = ti;

    //……

    return tsk;

}

调用alloc_task_struct_node分配多少个 task_struct 节点

调用alloc_thread_info_node分配七个 thread_info
节点,其实是分配了多少个thread_union联合体,将栈底重回给 ti

union thread_union {
   struct thread_info thread_info;
  unsigned long stack[THREAD_SIZE/sizeof(long)];
};

最终将栈底的值 ti 赋值给新节点的栈

说起底施行完dup_task_struct之后,子进程除了tsk->stack指针差别之外,全体都平等!

sys_vfork的实现


早期实现

架构 实现
arm arch/arm/kernel/sys_arm.c, line 254
i386 arch/i386/kernel/process.c, line 737
x86_64 arch/x86_64/kernel/process.c, line 728
asmlinkage long sys_vfork(struct pt_regs regs)
{
    return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0);
}

 

新版本

#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
        return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
                        0, NULL, NULL, 0);
}
#endif

 

可以见见sys_vfork的落实与sys_fork只是微微差别,
前面叁个使用了额外的标记CLONE_VFORK | CLONE_VM

新历程是从何地开首履行的?

在早前的分析中,聊起copy_process中的copy_thread()函数,就是以此函数决定了子进度从系统调用中返回后的实践.

int copy_thread(unsigned long clone_flags, unsigned long sp,
    unsigned long arg, struct task_struct *p)
{
    ...

    *childregs = *current_pt_regs();
    childregs->ax = 0;
    if (sp)
        childregs->sp = sp;

    p->thread.ip = (unsigned long) ret_from_fork;

    ...
}

总结

Linux创建三个新进度的历程:系统通过sys_fork、sys_clone、sys_vfork多少个体系调用中的恣意七个创办新历程,那四个系统调用都调用do_fork()函数,由do_fork(卡塔尔国函数调用其余函数复制父进程的PCB,创造新历程的内核栈,然后根据创建时的参数改正新进程PCB中的音信将其与父进程区分开来,为子进程分配新的PID号,最终将其归来客户态。

看一下dup_task_struct     
/linux-3.18.6/kernel/fork.c#dup_task_struct

sched_fork 流程

core.c

int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
    unsigned long flags;
    int cpu = get_cpu();

    __sched_fork(clone_flags, p);

    //将子进程状态设置为 TASK_RUNNING
    p->state = TASK_RUNNING;

    //……

    //为子进程分配 CPU
    set_task_cpu(p, cpu);

    put_cpu();
    return 0;
}

大家能够看看sched_fork大约产生了两项关键职业,一是将子过程意况设置为
TASK_RUNNING,二是为其分配 CPU

sys_clone的实现


早先时代完毕

架构 实现
arm arch/arm/kernel/sys_arm.c, line 247
i386 arch/i386/kernel/process.c, line 715
x86_64 arch/x86_64/kernel/process.c, line 711

sys_clone的落到实处格局与上述系统调用相通, 但实际差距在于do_fork如下调用

casmlinkage int sys_clone(struct pt_regs regs)
{
    /* 注释中是i385下增加的代码, 其他体系结构无此定义
    unsigned long clone_flags;
    unsigned long newsp;

    clone_flags = regs.ebx;
    newsp = regs.ecx;*/
    if (!newsp)
        newsp = regs.esp;
    return do_fork(clone_flags, newsp, &regs, 0);
}

 

新版本

#ifdef __ARCH_WANT_SYS_CLONE
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 int __user *, parent_tidptr,
                 unsigned long, tls,
                 int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
                 int __user *, parent_tidptr,
                 int __user *, child_tidptr,
                 unsigned long, tls)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
                int, stack_size,
                int __user *, parent_tidptr,
                int __user *, child_tidptr,
                unsigned long, tls)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 int __user *, parent_tidptr,
                 int __user *, child_tidptr,
                 unsigned long, tls)
#endif
{
        return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
}
#endif

 

大家能够看到sys_clone的标记不再是硬编码的,
而是通过逐个存放器参数字传送递到系统调用, 因此大家需求领取这几个参数。

别的,clone也不再复制进度的栈, 而是能够钦赐新的栈地址, 在生成线程时,
只怕需求如此做, 线程或许与父进程分享地址空间,
可是线程本身的栈恐怕在此外四个地址空间

其余还吩咐了客商空间的多个指针(parent_tidptr和child_tidptr),
用于与线程库通讯

子进度试行ret_from_fork

ENTRY(ret_from_fork)
    CFI_STARTPROC
    pushl_cfi %eax
    call schedule_tail
    GET_THREAD_INFO(%ebp)
    popl_cfi %eax
    pushl_cfi $0x0202       # Reset kernel eflags
    popfl_cfi
    jmp syscall_exit
    CFI_ENDPROC
END(ret_from_fork)

参谋资料

《Linux内核设计与贯彻》原书第三版

Sawoom原创小说转载请注解出处
《Linux内核解析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000

320    err = arch_dup_task_struct(tsk, orig);      // 执行复制,orig 当前进程

316   ti = alloc_thread_info_node(tsk, node);      // 实际就是alloc一个内核堆栈

324    tsk->stack = ti;      // 把alloc后返回的地址赋给stack

copy_thread 流程

int copy_thread(unsigned long clone_flags, unsigned long sp,
    unsigned long arg, struct task_struct *p)
{
    //获取寄存器信息
    struct pt_regs *childregs = task_pt_regs(p);
    struct task_struct *tsk;
    int err;

    p->thread.sp = (unsigned long) childregs;
    p->thread.sp0 = (unsigned long) (childregs+1);
    memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

    if (unlikely(p->flags & PF_KTHREAD)) {
        //内核线程
        memset(childregs, 0, sizeof(struct pt_regs));
        p->thread.ip = (unsigned long) ret_from_kernel_thread;
        task_user_gs(p) = __KERNEL_STACK_CANARY;
        childregs->ds = __USER_DS;
        childregs->es = __USER_DS;
        childregs->fs = __KERNEL_PERCPU;
        childregs->bx = sp;  /* function */
        childregs->bp = arg;
        childregs->orig_ax = -1;
        childregs->cs = __KERNEL_CS | get_kernel_rpl();
        childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
        p->thread.io_bitmap_ptr = NULL;
        return 0;
    }

    //将当前寄存器信息复制给子进程
    *childregs = *current_pt_regs();

    //子进程 eax 置 0,因此fork 在子进程返回0
    childregs->ax = 0;
    if (sp)
        childregs->sp = sp;

    //子进程ip 设置为ret_from_fork,因此子进程从ret_from_fork开始执行
    p->thread.ip = (unsigned long) ret_from_fork;

    //……

    return err;
}

copy_thread 这段代码为大家讲明了三个超级重大的标题!

一是,为何 fork
在子进程中重返0,原因是childregs->ax = 0;这段代码将子进度的 eax
赋值为0

二是,p->thread.ip = (unsigned long) ret_from_fork;将子进度的 ip
设置为 ret_form_fork 的首地址,因而子进度是从 ret_from_fork
伊始实行的

创立子进程的流程


试行起源与根本货仓如何保管平等?

  1. 在ret_from_fork此前,也正是在copy_thread()函数中*childregs = *current_pt_regs();该句将父进度的regs参数赋值到子进程的基本仓库,
  2. *childregs的等级次序为pt_regs,里面寄放了SAVE ALL中压入栈的参数
  3. 故在随后的RESTORE ALL中能顺遂进行下去.

看一下arch_dup_task_struct    
/linux-3.18.6/kernel/fork.c#arch_dup_task_struct

总结

新进度的推行源于以下前提:

  • dup_task_struct中为其分配了新的仓库
  • 调用了sched_fork,将其置为TASK_RUNNING
  • copy_thread元帅父进度的贮存器上下文复制给子进度,保险了父亲和儿子进度的仓库音讯是同出一辙的
  • 将ret_from_fork之处设置为eip存放器的值

最后子进程从ret_from_fork初始实行

_do_fork和早起do_fork的流程


_do_fork和do_fork在进程的复制的时候并未太大的界别,
他们就只是在经过tls复制的经过中落到实处有细微差异

怀有进程复制(创设卡塔尔(قطر‎的fork机制最终都调用了kernel/fork.c中的_do_fork(三个类别结构非亲非故的函数卡塔尔,

其定义在

_do_fork以调用copy_process最早, 前面一个实施生成新的进程的实际上中国人民解放军海军事工业程高校业作,
并依照钦点的标识复制父进度的数码。在子进度生成后,
内核必得举行下列收尾操作:

  1. 调用 copy_process 为子进程复制出一份进度音讯

  2. 如果是 vfork(设置了CLONE_VFOAMG ONEK和ptrace标志)初阶化实现管理音信

  3. 调用 wake_up_new_task 将子进度走入调治器,为之分配 CPU

  4. 假要是 vfork,父进度等待子进度完成 exec 替换自身的地址空间

相比较,大家从《深刻linux内核布局》中找到了开始时期的do_fork流程图,基本一致,能够用来参照他事他说加以考察学习和相比

图片 16

long _do_fork(unsigned long clone_flags,
      unsigned long stack_start,
      unsigned long stack_size,
      int __user *parent_tidptr,
      int __user *child_tidptr,
      unsigned long tls)
{
    struct task_struct *p;
    int trace = 0;
    long nr;

    /*
     * Determine whether and which event to report to ptracer.  When
     * called from kernel_thread or CLONE_UNTRACED is explicitly
     * requested, no event is reported; otherwise, report if the event
     * for the type of forking is enabled.
     */
    if (!(clone_flags & CLONE_UNTRACED)) {
    if (clone_flags & CLONE_VFORK)
        trace = PTRACE_EVENT_VFORK;
    else if ((clone_flags & CSIGNAL) != SIGCHLD)
        trace = PTRACE_EVENT_CLONE;
    else
        trace = PTRACE_EVENT_FORK;

    if (likely(!ptrace_event_enabled(current, trace)))
        trace = 0;
    }
    /*  复制进程描述符,copy_process()的返回值是一个 task_struct 指针  */
    p = copy_process(clone_flags, stack_start, stack_size,
         child_tidptr, NULL, trace, tls);
    /*
     * Do this prior waking up the new thread - the thread pointer
     * might get invalid after that point, if the thread exits quickly.
     */
    if (!IS_ERR(p)) {
    struct completion vfork;
    struct pid *pid;

    trace_sched_process_fork(current, p);
    /*  得到新创建的进程的pid信息  */
    pid = get_task_pid(p, PIDTYPE_PID);
    nr = pid_vnr(pid);

    if (clone_flags & CLONE_PARENT_SETTID)
        put_user(nr, parent_tidptr);

    /*  如果调用的 vfork()方法,初始化 vfork 完成处理信息 */
    if (clone_flags & CLONE_VFORK) {
        p->vfork_done = &vfork;
        init_completion(&vfork);
        get_task_struct(p);
    }
    /*  将子进程加入到调度器中,为其分配 CPU,准备执行  */
    wake_up_new_task(p);

    /* forking complete and child started to run, tell ptracer */
    if (unlikely(trace))
        ptrace_event_pid(trace, pid);

    /*  如果是 vfork,将父进程加入至等待队列,等待子进程完成  */
    if (clone_flags & CLONE_VFORK) {
        if (!wait_for_vfork_done(p, &vfork))
        ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
    }

    put_pid(pid);
    } else {
    nr = PTR_ERR(p);
    }
    return nr;
}

 

总结

  1. Linux通过复制父进度来创立一个新历程,通过调用do_fork来实现
  2. Linux为种种新创设的长河动态地分配三个task_struct结构.
  3. 为了把功底中的全部进程协会起来,Linux提供了两种集体格局,此中哈希表和双向循环链表方法是指向系统中的全体进程(包蕴底工线程),而运作队列和等待队列是把远在相同景观的经过组织起来
  4. fork(卡塔尔(قطر‎函数被调用叁回,但再次来到两次
290int __weak arch_dup_task_struct(struct task_struct *dst,
291                          struct task_struct *src)
292{
293   *dst = *src;      // 就是把数据结构加*,原来它是数据结构的指针,加*,表示它的值
294   return 0;
295}

发表评论

电子邮件地址不会被公开。 必填项已用*标注

相关文章