5045-use-atomic_inc_*-atomic_dec_*-instead-of-atomic_add_* Wdiff usr/src/uts/common/os/fork.c

Print this page

5045 use atomic_{inc,dec}_* instead of atomic_add_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/fork.c
          +++ new/usr/src/uts/common/os/fork.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  #include <sys/types.h>
  30   30  #include <sys/param.h>
  31   31  #include <sys/sysmacros.h>
  32   32  #include <sys/signal.h>
  33   33  #include <sys/cred.h>
  34   34  #include <sys/policy.h>
  35   35  #include <sys/user.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/cpuvar.h>
  38   38  #include <sys/vfs.h>
  39   39  #include <sys/vnode.h>
  40   40  #include <sys/file.h>
  41   41  #include <sys/errno.h>
  42   42  #include <sys/time.h>
  43   43  #include <sys/proc.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/acct.h>
  46   46  #include <sys/tuneable.h>
  47   47  #include <sys/class.h>
  48   48  #include <sys/kmem.h>
  49   49  #include <sys/session.h>
  50   50  #include <sys/ucontext.h>
  51   51  #include <sys/stack.h>
  52   52  #include <sys/procfs.h>
  53   53  #include <sys/prsystm.h>
  54   54  #include <sys/vmsystm.h>
  55   55  #include <sys/vtrace.h>
  56   56  #include <sys/debug.h>
  57   57  #include <sys/shm_impl.h>
  58   58  #include <sys/door_data.h>
  59   59  #include <vm/as.h>
  60   60  #include <vm/rm.h>
  61   61  #include <c2/audit.h>
  62   62  #include <sys/var.h>
  63   63  #include <sys/schedctl.h>
  64   64  #include <sys/utrap.h>
  65   65  #include <sys/task.h>
  66   66  #include <sys/resource.h>
  67   67  #include <sys/cyclic.h>
  68   68  #include <sys/lgrp.h>
  69   69  #include <sys/rctl.h>
  70   70  #include <sys/contract_impl.h>
  71   71  #include <sys/contract/process_impl.h>
  72   72  #include <sys/list.h>
  73   73  #include <sys/dtrace.h>
  74   74  #include <sys/pool.h>
  75   75  #include <sys/zone.h>
  76   76  #include <sys/sdt.h>
  77   77  #include <sys/class.h>
  78   78  #include <sys/corectl.h>
  79   79  #include <sys/brand.h>
  80   80  #include <sys/fork.h>
  81   81  
  82   82  static int64_t cfork(int, int, int);
  83   83  static int getproc(proc_t **, pid_t, uint_t);
  84   84  #define GETPROC_USER    0x0
  85   85  #define GETPROC_KERNEL  0x1
  86   86  
  87   87  static void fork_fail(proc_t *);
  88   88  static void forklwp_fail(proc_t *);
  89   89  
  90   90  int fork_fail_pending;
  91   91  
  92   92  extern struct kmem_cache *process_cache;
  93   93  
  94   94  /*
  95   95   * The vfork() system call trap is no longer invoked by libc.
  96   96   * It is retained only for the benefit of applications running
  97   97   * within a solaris10 branded zone.  It should be eliminated
  98   98   * when we no longer support solaris10 branded zones.
  99   99   */
 100  100  int64_t
 101  101  vfork(void)
 102  102  {
 103  103          curthread->t_post_sys = 1;      /* so vfwait() will be called */
 104  104          return (cfork(1, 1, 0));
 105  105  }
 106  106  
 107  107  /*
 108  108   * forksys system call - forkx, forkallx, vforkx.  This is the
 109  109   * interface invoked by libc for fork1(), forkall(), and vfork()
 110  110   */
 111  111  int64_t
 112  112  forksys(int subcode, int flags)
 113  113  {
 114  114          switch (subcode) {
 115  115          case 0:
 116  116                  return (cfork(0, 1, flags));    /* forkx(flags) */
 117  117          case 1:
 118  118                  return (cfork(0, 0, flags));    /* forkallx(flags) */
 119  119          case 2:
 120  120                  curthread->t_post_sys = 1;      /* so vfwait() will be called */
 121  121                  return (cfork(1, 1, flags));    /* vforkx(flags) */
 122  122          default:
 123  123                  return ((int64_t)set_errno(EINVAL));
 124  124          }
 125  125  }
 126  126  
 127  127  /* ARGSUSED */
 128  128  static int64_t
 129  129  cfork(int isvfork, int isfork1, int flags)
 130  130  {
 131  131          proc_t *p = ttoproc(curthread);
 132  132          struct as *as;
 133  133          proc_t *cp, **orphpp;
 134  134          klwp_t *clone;
 135  135          kthread_t *t;
 136  136          task_t *tk;
 137  137          rval_t  r;
 138  138          int error;
 139  139          int i;
 140  140          rctl_set_t *dup_set;
 141  141          rctl_alloc_gp_t *dup_gp;
 142  142          rctl_entity_p_t e;
 143  143          lwpdir_t *ldp;
 144  144          lwpent_t *lep;
 145  145          lwpent_t *clep;
 146  146  
 147  147          /*
 148  148           * Allow only these two flags.
 149  149           */
 150  150          if ((flags & ~(FORK_NOSIGCHLD | FORK_WAITPID)) != 0) {
 151  151                  error = EINVAL;
 152  152                  goto forkerr;
 153  153          }
 154  154  
 155  155          /*
 156  156           * fork is not supported for the /proc agent lwp.
 157  157           */
 158  158          if (curthread == p->p_agenttp) {
 159  159                  error = ENOTSUP;
 160  160                  goto forkerr;
 161  161          }
 162  162  
 163  163          if ((error = secpolicy_basic_fork(CRED())) != 0)
 164  164                  goto forkerr;
 165  165  
 166  166          /*
 167  167           * If the calling lwp is doing a fork1() then the
 168  168           * other lwps in this process are not duplicated and
 169  169           * don't need to be held where their kernel stacks can be
 170  170           * cloned.  If doing forkall(), the process is held with
 171  171           * SHOLDFORK, so that the lwps are at a point where their
 172  172           * stacks can be copied which is on entry or exit from
 173  173           * the kernel.
 174  174           */
 175  175          if (!holdlwps(isfork1 ? SHOLDFORK1 : SHOLDFORK)) {
 176  176                  aston(curthread);
 177  177                  error = EINTR;
 178  178                  goto forkerr;
 179  179          }
 180  180  
 181  181  #if defined(__sparc)
 182  182          /*
 183  183           * Ensure that the user stack is fully constructed
 184  184           * before creating the child process structure.
 185  185           */
 186  186          (void) flush_user_windows_to_stack(NULL);
 187  187  #endif
 188  188  
 189  189          mutex_enter(&p->p_lock);
 190  190          /*
 191  191           * If this is vfork(), cancel any suspend request we might
 192  192           * have gotten from some other thread via lwp_suspend().
 193  193           * Otherwise we could end up with a deadlock on return
 194  194           * from the vfork() in both the parent and the child.
 195  195           */
 196  196          if (isvfork)
 197  197                  curthread->t_proc_flag &= ~TP_HOLDLWP;
 198  198          /*
 199  199           * Prevent our resource set associations from being changed during fork.
 200  200           */
 201  201          pool_barrier_enter();
 202  202          mutex_exit(&p->p_lock);
 203  203  
 204  204          /*
 205  205           * Create a child proc struct. Place a VN_HOLD on appropriate vnodes.
 206  206           */
 207  207          if (getproc(&cp, 0, GETPROC_USER) < 0) {
 208  208                  mutex_enter(&p->p_lock);
 209  209                  pool_barrier_exit();
 210  210                  continuelwps(p);
 211  211                  mutex_exit(&p->p_lock);
 212  212                  error = EAGAIN;
 213  213                  goto forkerr;
 214  214          }
 215  215  
 216  216          TRACE_2(TR_FAC_PROC, TR_PROC_FORK, "proc_fork:cp %p p %p", cp, p);
 217  217  
 218  218          /*
 219  219           * Assign an address space to child
 220  220           */
 221  221          if (isvfork) {
 222  222                  /*
 223  223                   * Clear any watched areas and remember the
 224  224                   * watched pages for restoring in vfwait().
 225  225                   */
 226  226                  as = p->p_as;
 227  227                  if (avl_numnodes(&as->a_wpage) != 0) {
 228  228                          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 229  229                          as_clearwatch(as);
 230  230                          p->p_wpage = as->a_wpage;
 231  231                          avl_create(&as->a_wpage, wp_compare,
 232  232                              sizeof (struct watched_page),
 233  233                              offsetof(struct watched_page, wp_link));
 234  234                          AS_LOCK_EXIT(as, &as->a_lock);
 235  235                  }
 236  236                  cp->p_as = as;
 237  237                  cp->p_flag |= SVFORK;
 238  238  
 239  239                  /*
 240  240                   * Use the parent's shm segment list information for
 241  241                   * the child as it uses its address space till it execs.
 242  242                   */
 243  243                  cp->p_segacct = p->p_segacct;
 244  244          } else {
 245  245                  /*
 246  246                   * We need to hold P_PR_LOCK until the address space has
 247  247                   * been duplicated and we've had a chance to remove from the
 248  248                   * child any DTrace probes that were in the parent. Holding
 249  249                   * P_PR_LOCK prevents any new probes from being added and any
 250  250                   * extant probes from being removed.
 251  251                   */
 252  252                  mutex_enter(&p->p_lock);
 253  253                  sprlock_proc(p);
 254  254                  p->p_flag |= SFORKING;
 255  255                  mutex_exit(&p->p_lock);
 256  256  
 257  257                  error = as_dup(p->p_as, cp);
 258  258                  if (error != 0) {
 259  259                          mutex_enter(&p->p_lock);
 260  260                          sprunlock(p);
 261  261                          fork_fail(cp);
 262  262                          mutex_enter(&pidlock);
 263  263                          orphpp = &p->p_orphan;
 264  264                          while (*orphpp != cp)
 265  265                                  orphpp = &(*orphpp)->p_nextorph;
 266  266                          *orphpp = cp->p_nextorph;

↓ open down ↓

266 lines elided

↑ open up ↑

 267  267                          if (p->p_child == cp)
 268  268                                  p->p_child = cp->p_sibling;
 269  269                          if (cp->p_sibling)
 270  270                                  cp->p_sibling->p_psibling = cp->p_psibling;
 271  271                          if (cp->p_psibling)
 272  272                                  cp->p_psibling->p_sibling = cp->p_sibling;
 273  273                          mutex_enter(&cp->p_lock);
 274  274                          tk = cp->p_task;
 275  275                          task_detach(cp);
 276  276                          ASSERT(cp->p_pool->pool_ref > 0);
 277      -                        atomic_add_32(&cp->p_pool->pool_ref, -1);
      277 +                        atomic_dec_32(&cp->p_pool->pool_ref);
 278  278                          mutex_exit(&cp->p_lock);
 279  279                          pid_exit(cp, tk);
 280  280                          mutex_exit(&pidlock);
 281  281                          task_rele(tk);
 282  282  
 283  283                          mutex_enter(&p->p_lock);
 284  284                          p->p_flag &= ~SFORKING;
 285  285                          pool_barrier_exit();
 286  286                          continuelwps(p);
 287  287                          mutex_exit(&p->p_lock);

 288  288                          /*
 289  289                           * Preserve ENOMEM error condition but
 290  290                           * map all others to EAGAIN.
 291  291                           */
 292  292                          error = (error == ENOMEM) ? ENOMEM : EAGAIN;
 293  293                          goto forkerr;
 294  294                  }
 295  295  
 296  296                  /*
 297  297                   * Remove all DTrace tracepoints from the child process. We
 298  298                   * need to do this _before_ duplicating USDT providers since
 299  299                   * any associated probes may be immediately enabled.
 300  300                   */
 301  301                  if (p->p_dtrace_count > 0)
 302  302                          dtrace_fasttrap_fork(p, cp);
 303  303  
 304  304                  mutex_enter(&p->p_lock);
 305  305                  sprunlock(p);
 306  306  
 307  307                  /* Duplicate parent's shared memory */
 308  308                  if (p->p_segacct)
 309  309                          shmfork(p, cp);
 310  310  
 311  311                  /*
 312  312                   * Duplicate any helper actions and providers. The SFORKING
 313  313                   * we set above informs the code to enable USDT probes that
 314  314                   * sprlock() may fail because the child is being forked.
 315  315                   */
 316  316                  if (p->p_dtrace_helpers != NULL) {
 317  317                          ASSERT(dtrace_helpers_fork != NULL);
 318  318                          (*dtrace_helpers_fork)(p, cp);
 319  319                  }
 320  320  
 321  321                  mutex_enter(&p->p_lock);
 322  322                  p->p_flag &= ~SFORKING;
 323  323                  mutex_exit(&p->p_lock);
 324  324          }
 325  325  
 326  326          /*
 327  327           * Duplicate parent's resource controls.
 328  328           */
 329  329          dup_set = rctl_set_create();
 330  330          for (;;) {
 331  331                  dup_gp = rctl_set_dup_prealloc(p->p_rctls);
 332  332                  mutex_enter(&p->p_rctls->rcs_lock);
 333  333                  if (rctl_set_dup_ready(p->p_rctls, dup_gp))
 334  334                          break;
 335  335                  mutex_exit(&p->p_rctls->rcs_lock);
 336  336                  rctl_prealloc_destroy(dup_gp);
 337  337          }
 338  338          e.rcep_p.proc = cp;
 339  339          e.rcep_t = RCENTITY_PROCESS;
 340  340          cp->p_rctls = rctl_set_dup(p->p_rctls, p, cp, &e, dup_set, dup_gp,
 341  341              RCD_DUP | RCD_CALLBACK);
 342  342          mutex_exit(&p->p_rctls->rcs_lock);
 343  343  
 344  344          rctl_prealloc_destroy(dup_gp);
 345  345  
 346  346          /*
 347  347           * Allocate the child's lwp directory and lwpid hash table.
 348  348           */
 349  349          if (isfork1)
 350  350                  cp->p_lwpdir_sz = 2;
 351  351          else
 352  352                  cp->p_lwpdir_sz = p->p_lwpdir_sz;
 353  353          cp->p_lwpdir = cp->p_lwpfree = ldp =
 354  354              kmem_zalloc(cp->p_lwpdir_sz * sizeof (lwpdir_t), KM_SLEEP);
 355  355          for (i = 1; i < cp->p_lwpdir_sz; i++, ldp++)
 356  356                  ldp->ld_next = ldp + 1;
 357  357          cp->p_tidhash_sz = (cp->p_lwpdir_sz + 2) / 2;
 358  358          cp->p_tidhash =
 359  359              kmem_zalloc(cp->p_tidhash_sz * sizeof (tidhash_t), KM_SLEEP);
 360  360  
 361  361          /*
 362  362           * Duplicate parent's lwps.
 363  363           * Mutual exclusion is not needed because the process is
 364  364           * in the hold state and only the current lwp is running.
 365  365           */
 366  366          klgrpset_clear(cp->p_lgrpset);
 367  367          if (isfork1) {
 368  368                  clone = forklwp(ttolwp(curthread), cp, curthread->t_tid);
 369  369                  if (clone == NULL)
 370  370                          goto forklwperr;
 371  371                  /*
 372  372                   * Inherit only the lwp_wait()able flag,
 373  373                   * Daemon threads should not call fork1(), but oh well...
 374  374                   */
 375  375                  lwptot(clone)->t_proc_flag |=
 376  376                      (curthread->t_proc_flag & TP_TWAIT);
 377  377          } else {
 378  378                  /* this is forkall(), no one can be in lwp_wait() */
 379  379                  ASSERT(p->p_lwpwait == 0 && p->p_lwpdwait == 0);
 380  380                  /* for each entry in the parent's lwp directory... */
 381  381                  for (i = 0, ldp = p->p_lwpdir; i < p->p_lwpdir_sz; i++, ldp++) {
 382  382                          klwp_t *clwp;
 383  383                          kthread_t *ct;
 384  384  
 385  385                          if ((lep = ldp->ld_entry) == NULL)
 386  386                                  continue;
 387  387  
 388  388                          if ((t = lep->le_thread) != NULL) {
 389  389                                  clwp = forklwp(ttolwp(t), cp, t->t_tid);
 390  390                                  if (clwp == NULL)
 391  391                                          goto forklwperr;
 392  392                                  ct = lwptot(clwp);
 393  393                                  /*
 394  394                                   * Inherit lwp_wait()able and daemon flags.
 395  395                                   */
 396  396                                  ct->t_proc_flag |=
 397  397                                      (t->t_proc_flag & (TP_TWAIT|TP_DAEMON));
 398  398                                  /*
 399  399                                   * Keep track of the clone of curthread to
 400  400                                   * post return values through lwp_setrval().
 401  401                                   * Mark other threads for special treatment
 402  402                                   * by lwp_rtt() / post_syscall().
 403  403                                   */
 404  404                                  if (t == curthread)
 405  405                                          clone = clwp;
 406  406                                  else
 407  407                                          ct->t_flag |= T_FORKALL;
 408  408                          } else {
 409  409                                  /*
 410  410                                   * Replicate zombie lwps in the child.
 411  411                                   */
 412  412                                  clep = kmem_zalloc(sizeof (*clep), KM_SLEEP);
 413  413                                  clep->le_lwpid = lep->le_lwpid;
 414  414                                  clep->le_start = lep->le_start;
 415  415                                  lwp_hash_in(cp, clep,
 416  416                                      cp->p_tidhash, cp->p_tidhash_sz, 0);
 417  417                          }
 418  418                  }
 419  419          }
 420  420  
 421  421          /*
 422  422           * Put new process in the parent's process contract, or put it
 423  423           * in a new one if there is an active process template.  Send a
 424  424           * fork event (if requested) to whatever contract the child is
 425  425           * a member of.  Fails if the parent has been SIGKILLed.
 426  426           */
 427  427          if (contract_process_fork(NULL, cp, p, B_TRUE) == NULL)
 428  428                  goto forklwperr;
 429  429  
 430  430          /*
 431  431           * No fork failures occur beyond this point.
 432  432           */
 433  433  
 434  434          cp->p_lwpid = p->p_lwpid;
 435  435          if (!isfork1) {
 436  436                  cp->p_lwpdaemon = p->p_lwpdaemon;
 437  437                  cp->p_zombcnt = p->p_zombcnt;
 438  438                  /*
 439  439                   * If the parent's lwp ids have wrapped around, so have the
 440  440                   * child's.
 441  441                   */
 442  442                  cp->p_flag |= p->p_flag & SLWPWRAP;
 443  443          }
 444  444  
 445  445          mutex_enter(&p->p_lock);
 446  446          corectl_path_hold(cp->p_corefile = p->p_corefile);
 447  447          corectl_content_hold(cp->p_content = p->p_content);
 448  448          mutex_exit(&p->p_lock);
 449  449  
 450  450          /*
 451  451           * Duplicate process context ops, if any.
 452  452           */
 453  453          if (p->p_pctx)
 454  454                  forkpctx(p, cp);
 455  455  
 456  456  #ifdef __sparc
 457  457          utrap_dup(p, cp);
 458  458  #endif
 459  459          /*
 460  460           * If the child process has been marked to stop on exit
 461  461           * from this fork, arrange for all other lwps to stop in
 462  462           * sympathy with the active lwp.
 463  463           */
 464  464          if (PTOU(cp)->u_systrap &&
 465  465              prismember(&PTOU(cp)->u_exitmask, curthread->t_sysnum)) {
 466  466                  mutex_enter(&cp->p_lock);
 467  467                  t = cp->p_tlist;
 468  468                  do {
 469  469                          t->t_proc_flag |= TP_PRSTOP;
 470  470                          aston(t);       /* so TP_PRSTOP will be seen */
 471  471                  } while ((t = t->t_forw) != cp->p_tlist);
 472  472                  mutex_exit(&cp->p_lock);
 473  473          }
 474  474          /*
 475  475           * If the parent process has been marked to stop on exit
 476  476           * from this fork, and its asynchronous-stop flag has not
 477  477           * been set, arrange for all other lwps to stop before
 478  478           * they return back to user level.
 479  479           */
 480  480          if (!(p->p_proc_flag & P_PR_ASYNC) && PTOU(p)->u_systrap &&
 481  481              prismember(&PTOU(p)->u_exitmask, curthread->t_sysnum)) {
 482  482                  mutex_enter(&p->p_lock);
 483  483                  t = p->p_tlist;
 484  484                  do {
 485  485                          t->t_proc_flag |= TP_PRSTOP;
 486  486                          aston(t);       /* so TP_PRSTOP will be seen */
 487  487                  } while ((t = t->t_forw) != p->p_tlist);
 488  488                  mutex_exit(&p->p_lock);
 489  489          }
 490  490  
 491  491          if (PROC_IS_BRANDED(p))
 492  492                  BROP(p)->b_lwp_setrval(clone, p->p_pid, 1);
 493  493          else
 494  494                  lwp_setrval(clone, p->p_pid, 1);
 495  495  
 496  496          /* set return values for parent */
 497  497          r.r_val1 = (int)cp->p_pid;
 498  498          r.r_val2 = 0;
 499  499  
 500  500          /*
 501  501           * pool_barrier_exit() can now be called because the child process has:
 502  502           * - all identifying features cloned or set (p_pid, p_task, p_pool)
 503  503           * - all resource sets associated (p_tlist->*->t_cpupart, p_as->a_mset)
 504  504           * - any other fields set which are used in resource set binding.
 505  505           */
 506  506          mutex_enter(&p->p_lock);
 507  507          pool_barrier_exit();
 508  508          mutex_exit(&p->p_lock);
 509  509  
 510  510          mutex_enter(&pidlock);
 511  511          mutex_enter(&cp->p_lock);
 512  512  
 513  513          /*
 514  514           * Set flags telling the child what (not) to do on exit.
 515  515           */
 516  516          if (flags & FORK_NOSIGCHLD)
 517  517                  cp->p_pidflag |= CLDNOSIGCHLD;
 518  518          if (flags & FORK_WAITPID)
 519  519                  cp->p_pidflag |= CLDWAITPID;
 520  520  
 521  521          /*
 522  522           * Now that there are lwps and threads attached, add the new
 523  523           * process to the process group.
 524  524           */
 525  525          pgjoin(cp, p->p_pgidp);
 526  526          cp->p_stat = SRUN;
 527  527          /*
 528  528           * We are now done with all the lwps in the child process.
 529  529           */
 530  530          t = cp->p_tlist;
 531  531          do {
 532  532                  /*
 533  533                   * Set the lwp_suspend()ed lwps running.
 534  534                   * They will suspend properly at syscall exit.
 535  535                   */
 536  536                  if (t->t_proc_flag & TP_HOLDLWP)
 537  537                          lwp_create_done(t);
 538  538                  else {
 539  539                          /* set TS_CREATE to allow continuelwps() to work */
 540  540                          thread_lock(t);
 541  541                          ASSERT(t->t_state == TS_STOPPED &&
 542  542                              !(t->t_schedflag & (TS_CREATE|TS_CSTART)));
 543  543                          t->t_schedflag |= TS_CREATE;
 544  544                          thread_unlock(t);
 545  545                  }
 546  546          } while ((t = t->t_forw) != cp->p_tlist);
 547  547          mutex_exit(&cp->p_lock);
 548  548  
 549  549          if (isvfork) {
 550  550                  CPU_STATS_ADDQ(CPU, sys, sysvfork, 1);
 551  551                  mutex_enter(&p->p_lock);
 552  552                  p->p_flag |= SVFWAIT;
 553  553                  curthread->t_flag |= T_VFPARENT;
 554  554                  DTRACE_PROC1(create, proc_t *, cp);
 555  555                  cv_broadcast(&pr_pid_cv[p->p_slot]);    /* inform /proc */
 556  556                  mutex_exit(&p->p_lock);
 557  557                  /*
 558  558                   * Grab child's p_lock before dropping pidlock to ensure
 559  559                   * the process will not disappear before we set it running.
 560  560                   */
 561  561                  mutex_enter(&cp->p_lock);
 562  562                  mutex_exit(&pidlock);
 563  563                  sigdefault(cp);
 564  564                  continuelwps(cp);
 565  565                  mutex_exit(&cp->p_lock);
 566  566          } else {
 567  567                  CPU_STATS_ADDQ(CPU, sys, sysfork, 1);
 568  568                  DTRACE_PROC1(create, proc_t *, cp);
 569  569                  /*
 570  570                   * It is CL_FORKRET's job to drop pidlock.
 571  571                   * If we do it here, the process could be set running
 572  572                   * and disappear before CL_FORKRET() is called.
 573  573                   */
 574  574                  CL_FORKRET(curthread, cp->p_tlist);
 575  575                  schedctl_set_cidpri(curthread);
 576  576                  ASSERT(MUTEX_NOT_HELD(&pidlock));
 577  577          }
 578  578  
 579  579          return (r.r_vals);
 580  580  
 581  581  forklwperr:
 582  582          if (isvfork) {
 583  583                  if (avl_numnodes(&p->p_wpage) != 0) {
 584  584                          /* restore watchpoints to parent */
 585  585                          as = p->p_as;
 586  586                          AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
 587  587                          as->a_wpage = p->p_wpage;
 588  588                          avl_create(&p->p_wpage, wp_compare,
 589  589                              sizeof (struct watched_page),
 590  590                              offsetof(struct watched_page, wp_link));
 591  591                          as_setwatch(as);
 592  592                          AS_LOCK_EXIT(as, &as->a_lock);
 593  593                  }
 594  594          } else {
 595  595                  if (cp->p_segacct)
 596  596                          shmexit(cp);
 597  597                  as = cp->p_as;
 598  598                  cp->p_as = &kas;
 599  599                  as_free(as);
 600  600          }
 601  601  
 602  602          if (cp->p_lwpdir) {
 603  603                  for (i = 0, ldp = cp->p_lwpdir; i < cp->p_lwpdir_sz; i++, ldp++)
 604  604                          if ((lep = ldp->ld_entry) != NULL)
 605  605                                  kmem_free(lep, sizeof (*lep));
 606  606                  kmem_free(cp->p_lwpdir,
 607  607                      cp->p_lwpdir_sz * sizeof (*cp->p_lwpdir));
 608  608          }
 609  609          cp->p_lwpdir = NULL;
 610  610          cp->p_lwpfree = NULL;
 611  611          cp->p_lwpdir_sz = 0;
 612  612  
 613  613          if (cp->p_tidhash)
 614  614                  kmem_free(cp->p_tidhash,
 615  615                      cp->p_tidhash_sz * sizeof (*cp->p_tidhash));
 616  616          cp->p_tidhash = NULL;
 617  617          cp->p_tidhash_sz = 0;
 618  618  
 619  619          forklwp_fail(cp);
 620  620          fork_fail(cp);

↓ open down ↓

333 lines elided

↑ open up ↑

 621  621          rctl_set_free(cp->p_rctls);
 622  622          mutex_enter(&pidlock);
 623  623  
 624  624          /*
 625  625           * Detach failed child from task.
 626  626           */
 627  627          mutex_enter(&cp->p_lock);
 628  628          tk = cp->p_task;
 629  629          task_detach(cp);
 630  630          ASSERT(cp->p_pool->pool_ref > 0);
 631      -        atomic_add_32(&cp->p_pool->pool_ref, -1);
      631 +        atomic_dec_32(&cp->p_pool->pool_ref);
 632  632          mutex_exit(&cp->p_lock);
 633  633  
 634  634          orphpp = &p->p_orphan;
 635  635          while (*orphpp != cp)
 636  636                  orphpp = &(*orphpp)->p_nextorph;
 637  637          *orphpp = cp->p_nextorph;
 638  638          if (p->p_child == cp)
 639  639                  p->p_child = cp->p_sibling;
 640  640          if (cp->p_sibling)
 641  641                  cp->p_sibling->p_psibling = cp->p_psibling;

 642  642          if (cp->p_psibling)
 643  643                  cp->p_psibling->p_sibling = cp->p_sibling;
 644  644          pid_exit(cp, tk);
 645  645          mutex_exit(&pidlock);
 646  646  
 647  647          task_rele(tk);
 648  648  
 649  649          mutex_enter(&p->p_lock);
 650  650          pool_barrier_exit();
 651  651          continuelwps(p);
 652  652          mutex_exit(&p->p_lock);
 653  653          error = EAGAIN;
 654  654  forkerr:
 655  655          return ((int64_t)set_errno(error));
 656  656  }
 657  657  
 658  658  /*
 659  659   * Free allocated resources from getproc() if a fork failed.
 660  660   */
 661  661  static void
 662  662  fork_fail(proc_t *cp)
 663  663  {
 664  664          uf_info_t *fip = P_FINFO(cp);
 665  665  
 666  666          fcnt_add(fip, -1);
 667  667          sigdelq(cp, NULL, 0);
 668  668  
 669  669          mutex_enter(&pidlock);
 670  670          upcount_dec(crgetruid(cp->p_cred), crgetzoneid(cp->p_cred));
 671  671          mutex_exit(&pidlock);
 672  672  
 673  673          /*
 674  674           * single threaded, so no locking needed here
 675  675           */
 676  676          crfree(cp->p_cred);
 677  677  
 678  678          kmem_free(fip->fi_list, fip->fi_nfiles * sizeof (uf_entry_t));
 679  679  
 680  680          VN_RELE(PTOU(curproc)->u_cdir);
 681  681          if (PTOU(curproc)->u_rdir)
 682  682                  VN_RELE(PTOU(curproc)->u_rdir);
 683  683          if (cp->p_exec)
 684  684                  VN_RELE(cp->p_exec);
 685  685          if (cp->p_execdir)
 686  686                  VN_RELE(cp->p_execdir);
 687  687          if (PTOU(curproc)->u_cwd)
 688  688                  refstr_rele(PTOU(curproc)->u_cwd);
 689  689          if (PROC_IS_BRANDED(cp)) {
 690  690                  brand_clearbrand(cp, B_TRUE);
 691  691          }
 692  692  }
 693  693  
 694  694  /*
 695  695   * Clean up the lwps already created for this child process.
 696  696   * The fork failed while duplicating all the lwps of the parent
 697  697   * and those lwps already created must be freed.
 698  698   * This process is invisible to the rest of the system,
 699  699   * so we don't need to hold p->p_lock to protect the list.
 700  700   */
 701  701  static void
 702  702  forklwp_fail(proc_t *p)
 703  703  {
 704  704          kthread_t *t;
 705  705          task_t *tk;
 706  706          int branded = 0;
 707  707  
 708  708          if (PROC_IS_BRANDED(p))
 709  709                  branded = 1;
 710  710  
 711  711          while ((t = p->p_tlist) != NULL) {
 712  712                  /*
 713  713                   * First remove the lwp from the process's p_tlist.
 714  714                   */
 715  715                  if (t != t->t_forw)
 716  716                          p->p_tlist = t->t_forw;
 717  717                  else
 718  718                          p->p_tlist = NULL;
 719  719                  p->p_lwpcnt--;
 720  720                  t->t_forw->t_back = t->t_back;
 721  721                  t->t_back->t_forw = t->t_forw;
 722  722  
 723  723                  tk = p->p_task;
 724  724                  mutex_enter(&p->p_zone->zone_nlwps_lock);
 725  725                  tk->tk_nlwps--;
 726  726                  tk->tk_proj->kpj_nlwps--;
 727  727                  p->p_zone->zone_nlwps--;
 728  728                  mutex_exit(&p->p_zone->zone_nlwps_lock);
 729  729  
 730  730                  ASSERT(t->t_schedctl == NULL);
 731  731  
 732  732                  if (branded)
 733  733                          BROP(p)->b_freelwp(ttolwp(t));
 734  734  
 735  735                  if (t->t_door != NULL) {
 736  736                          kmem_free(t->t_door, sizeof (door_data_t));
 737  737                          t->t_door = NULL;
 738  738                  }
 739  739                  lwp_ctmpl_clear(ttolwp(t));
 740  740  
 741  741                  /*
 742  742                   * Remove the thread from the all threads list.
 743  743                   * We need to hold pidlock for this.
 744  744                   */
 745  745                  mutex_enter(&pidlock);
 746  746                  t->t_next->t_prev = t->t_prev;
 747  747                  t->t_prev->t_next = t->t_next;
 748  748                  CL_EXIT(t);     /* tell the scheduler that we're exiting */
 749  749                  cv_broadcast(&t->t_joincv);     /* tell anyone in thread_join */
 750  750                  mutex_exit(&pidlock);
 751  751  
 752  752                  /*
 753  753                   * Let the lgroup load averages know that this thread isn't
 754  754                   * going to show up (i.e. un-do what was done on behalf of
 755  755                   * this thread by the earlier lgrp_move_thread()).
 756  756                   */
 757  757                  kpreempt_disable();
 758  758                  lgrp_move_thread(t, NULL, 1);
 759  759                  kpreempt_enable();
 760  760  
 761  761                  /*
 762  762                   * The thread was created TS_STOPPED.
 763  763                   * We change it to TS_FREE to avoid an
 764  764                   * ASSERT() panic in thread_free().
 765  765                   */
 766  766                  t->t_state = TS_FREE;
 767  767                  thread_rele(t);
 768  768                  thread_free(t);
 769  769          }
 770  770  }
 771  771  
 772  772  extern struct as kas;
 773  773  
 774  774  /*
 775  775   * fork a kernel process.
 776  776   */
 777  777  int
 778  778  newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
 779  779      pid_t pid)
 780  780  {
 781  781          proc_t *p;
 782  782          struct user *up;
 783  783          kthread_t *t;
 784  784          cont_process_t *ctp = NULL;
 785  785          rctl_entity_p_t e;
 786  786  
 787  787          ASSERT(cid != sysdccid);
 788  788          ASSERT(cid != syscid || ct == NULL);
 789  789          if (CLASS_KERNEL(cid)) {
 790  790                  rctl_alloc_gp_t *init_gp;
 791  791                  rctl_set_t *init_set;
 792  792  
 793  793                  ASSERT(pid != 1);
 794  794  
 795  795                  if (getproc(&p, pid, GETPROC_KERNEL) < 0)
 796  796                          return (EAGAIN);
 797  797  
 798  798                  /*
 799  799                   * Release the hold on the p_exec and p_execdir, these
 800  800                   * were acquired in getproc()
 801  801                   */
 802  802                  if (p->p_execdir != NULL)
 803  803                          VN_RELE(p->p_execdir);
 804  804                  if (p->p_exec != NULL)
 805  805                          VN_RELE(p->p_exec);
 806  806                  p->p_flag |= SNOWAIT;
 807  807                  p->p_exec = NULL;
 808  808                  p->p_execdir = NULL;
 809  809  
 810  810                  init_set = rctl_set_create();
 811  811                  init_gp = rctl_set_init_prealloc(RCENTITY_PROCESS);
 812  812  
 813  813                  /*
 814  814                   * kernel processes do not inherit /proc tracing flags.
 815  815                   */
 816  816                  sigemptyset(&p->p_sigmask);
 817  817                  premptyset(&p->p_fltmask);
 818  818                  up = PTOU(p);
 819  819                  up->u_systrap = 0;
 820  820                  premptyset(&(up->u_entrymask));
 821  821                  premptyset(&(up->u_exitmask));
 822  822                  mutex_enter(&p->p_lock);
 823  823                  e.rcep_p.proc = p;
 824  824                  e.rcep_t = RCENTITY_PROCESS;
 825  825                  p->p_rctls = rctl_set_init(RCENTITY_PROCESS, p, &e, init_set,
 826  826                      init_gp);
 827  827                  mutex_exit(&p->p_lock);
 828  828  
 829  829                  rctl_prealloc_destroy(init_gp);
 830  830  
 831  831                  t = lwp_kernel_create(p, pc, arg, TS_STOPPED, pri);
 832  832          } else {
 833  833                  rctl_alloc_gp_t *init_gp, *default_gp;
 834  834                  rctl_set_t *init_set;
 835  835                  task_t *tk, *tk_old;
 836  836                  klwp_t *lwp;
 837  837  
 838  838                  if (getproc(&p, pid, GETPROC_USER) < 0)
 839  839                          return (EAGAIN);
 840  840                  /*
 841  841                   * init creates a new task, distinct from the task
 842  842                   * containing kernel "processes".
 843  843                   */
 844  844                  tk = task_create(0, p->p_zone);
 845  845                  mutex_enter(&tk->tk_zone->zone_nlwps_lock);
 846  846                  tk->tk_proj->kpj_ntasks++;
 847  847                  tk->tk_nprocs++;
 848  848                  mutex_exit(&tk->tk_zone->zone_nlwps_lock);
 849  849  
 850  850                  default_gp = rctl_rlimit_set_prealloc(RLIM_NLIMITS);
 851  851                  init_gp = rctl_set_init_prealloc(RCENTITY_PROCESS);
 852  852                  init_set = rctl_set_create();
 853  853  
 854  854                  mutex_enter(&pidlock);
 855  855                  mutex_enter(&p->p_lock);
 856  856                  tk_old = p->p_task;     /* switch to new task */
 857  857  
 858  858                  task_detach(p);
 859  859                  task_begin(tk, p);
 860  860                  mutex_exit(&pidlock);
 861  861  
 862  862                  mutex_enter(&tk_old->tk_zone->zone_nlwps_lock);
 863  863                  tk_old->tk_nprocs--;
 864  864                  mutex_exit(&tk_old->tk_zone->zone_nlwps_lock);
 865  865  
 866  866                  e.rcep_p.proc = p;
 867  867                  e.rcep_t = RCENTITY_PROCESS;
 868  868                  p->p_rctls = rctl_set_init(RCENTITY_PROCESS, p, &e, init_set,
 869  869                      init_gp);
 870  870                  rctlproc_default_init(p, default_gp);
 871  871                  mutex_exit(&p->p_lock);
 872  872  
 873  873                  task_rele(tk_old);
 874  874                  rctl_prealloc_destroy(default_gp);
 875  875                  rctl_prealloc_destroy(init_gp);
 876  876  
 877  877                  if ((lwp = lwp_create(pc, arg, 0, p, TS_STOPPED, pri,
 878  878                      &curthread->t_hold, cid, 1)) == NULL) {
 879  879                          task_t *tk;
 880  880                          fork_fail(p);
 881  881                          mutex_enter(&pidlock);
 882  882                          mutex_enter(&p->p_lock);
 883  883                          tk = p->p_task;
 884  884                          task_detach(p);
 885  885                          ASSERT(p->p_pool->pool_ref > 0);
 886  886                          atomic_add_32(&p->p_pool->pool_ref, -1);
 887  887                          mutex_exit(&p->p_lock);
 888  888                          pid_exit(p, tk);
 889  889                          mutex_exit(&pidlock);
 890  890                          task_rele(tk);
 891  891  
 892  892                          return (EAGAIN);
 893  893                  }
 894  894                  t = lwptot(lwp);
 895  895  
 896  896                  ctp = contract_process_fork(sys_process_tmpl, p, curproc,
 897  897                      B_FALSE);
 898  898                  ASSERT(ctp != NULL);
 899  899                  if (ct != NULL)
 900  900                          *ct = &ctp->conp_contract;
 901  901          }
 902  902  
 903  903          ASSERT3U(t->t_tid, ==, 1);
 904  904          p->p_lwpid = 1;
 905  905          mutex_enter(&pidlock);
 906  906          pgjoin(p, p->p_parent->p_pgidp);
 907  907          p->p_stat = SRUN;
 908  908          mutex_enter(&p->p_lock);
 909  909          t->t_proc_flag &= ~TP_HOLDLWP;
 910  910          lwp_create_done(t);
 911  911          mutex_exit(&p->p_lock);
 912  912          mutex_exit(&pidlock);
 913  913          return (0);
 914  914  }
 915  915  
 916  916  /*
 917  917   * create a child proc struct.
 918  918   */
 919  919  static int
 920  920  getproc(proc_t **cpp, pid_t pid, uint_t flags)
 921  921  {
 922  922          proc_t          *pp, *cp;
 923  923          pid_t           newpid;
 924  924          struct user     *uarea;
 925  925          extern uint_t   nproc;
 926  926          struct cred     *cr;
 927  927          uid_t           ruid;
 928  928          zoneid_t        zoneid;
 929  929          task_t          *task;
 930  930          kproject_t      *proj;
 931  931          zone_t          *zone;
 932  932          int             rctlfail = 0;
 933  933  
 934  934          if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
 935  935                  return (-1);    /* no point in starting new processes */
 936  936  
 937  937          pp = (flags & GETPROC_KERNEL) ? &p0 : curproc;
 938  938          task = pp->p_task;
 939  939          proj = task->tk_proj;
 940  940          zone = pp->p_zone;
 941  941  
 942  942          mutex_enter(&pp->p_lock);
 943  943          mutex_enter(&zone->zone_nlwps_lock);
 944  944          if (proj != proj0p) {
 945  945                  if (task->tk_nprocs >= task->tk_nprocs_ctl)
 946  946                          if (rctl_test(rc_task_nprocs, task->tk_rctls,
 947  947                              pp, 1, 0) & RCT_DENY)
 948  948                                  rctlfail = 1;
 949  949  
 950  950                  if (proj->kpj_nprocs >= proj->kpj_nprocs_ctl)
 951  951                          if (rctl_test(rc_project_nprocs, proj->kpj_rctls,
 952  952                              pp, 1, 0) & RCT_DENY)
 953  953                                  rctlfail = 1;
 954  954  
 955  955                  if (zone->zone_nprocs >= zone->zone_nprocs_ctl)
 956  956                          if (rctl_test(rc_zone_nprocs, zone->zone_rctls,
 957  957                              pp, 1, 0) & RCT_DENY)
 958  958                                  rctlfail = 1;
 959  959  
 960  960                  if (rctlfail) {
 961  961                          mutex_exit(&zone->zone_nlwps_lock);
 962  962                          mutex_exit(&pp->p_lock);
 963  963                          goto punish;
 964  964                  }
 965  965          }
 966  966          task->tk_nprocs++;
 967  967          proj->kpj_nprocs++;
 968  968          zone->zone_nprocs++;
 969  969          mutex_exit(&zone->zone_nlwps_lock);
 970  970          mutex_exit(&pp->p_lock);
 971  971  
 972  972          cp = kmem_cache_alloc(process_cache, KM_SLEEP);
 973  973          bzero(cp, sizeof (proc_t));
 974  974  
 975  975          /*
 976  976           * Make proc entry for child process
 977  977           */
 978  978          mutex_init(&cp->p_splock, NULL, MUTEX_DEFAULT, NULL);
 979  979          mutex_init(&cp->p_crlock, NULL, MUTEX_DEFAULT, NULL);
 980  980          mutex_init(&cp->p_pflock, NULL, MUTEX_DEFAULT, NULL);
 981  981  #if defined(__x86)
 982  982          mutex_init(&cp->p_ldtlock, NULL, MUTEX_DEFAULT, NULL);
 983  983  #endif
 984  984          mutex_init(&cp->p_maplock, NULL, MUTEX_DEFAULT, NULL);
 985  985          cp->p_stat = SIDL;
 986  986          cp->p_mstart = gethrtime();
 987  987          cp->p_as = &kas;
 988  988          /*
 989  989           * p_zone must be set before we call pid_allocate since the process
 990  990           * will be visible after that and code such as prfind_zone will
 991  991           * look at the p_zone field.
 992  992           */
 993  993          cp->p_zone = pp->p_zone;
 994  994          cp->p_t1_lgrpid = LGRP_NONE;
 995  995          cp->p_tr_lgrpid = LGRP_NONE;
 996  996  
 997  997          if ((newpid = pid_allocate(cp, pid, PID_ALLOC_PROC)) == -1) {
 998  998                  if (nproc == v.v_proc) {
 999  999                          CPU_STATS_ADDQ(CPU, sys, procovf, 1);
1000 1000                          cmn_err(CE_WARN, "out of processes");
1001 1001                  }
1002 1002                  goto bad;
1003 1003          }
1004 1004  
1005 1005          mutex_enter(&pp->p_lock);
1006 1006          cp->p_exec = pp->p_exec;
1007 1007          cp->p_execdir = pp->p_execdir;
1008 1008          mutex_exit(&pp->p_lock);
1009 1009  
1010 1010          if (cp->p_exec) {
1011 1011                  VN_HOLD(cp->p_exec);
1012 1012                  /*
1013 1013                   * Each VOP_OPEN() must be paired with a corresponding
1014 1014                   * VOP_CLOSE(). In this case, the executable will be
1015 1015                   * closed for the child in either proc_exit() or gexec().
1016 1016                   */
1017 1017                  if (VOP_OPEN(&cp->p_exec, FREAD, CRED(), NULL) != 0) {
1018 1018                          VN_RELE(cp->p_exec);
1019 1019                          cp->p_exec = NULLVP;
1020 1020                          cp->p_execdir = NULLVP;
1021 1021                          goto bad;
1022 1022                  }
1023 1023          }
1024 1024          if (cp->p_execdir)
1025 1025                  VN_HOLD(cp->p_execdir);
1026 1026  
1027 1027          /*
1028 1028           * If not privileged make sure that this user hasn't exceeded
1029 1029           * v.v_maxup processes, and that users collectively haven't
1030 1030           * exceeded v.v_maxupttl processes.
1031 1031           */
1032 1032          mutex_enter(&pidlock);
1033 1033          ASSERT(nproc < v.v_proc);       /* otherwise how'd we get our pid? */
1034 1034          cr = CRED();
1035 1035          ruid = crgetruid(cr);
1036 1036          zoneid = crgetzoneid(cr);
1037 1037          if (nproc >= v.v_maxup &&       /* short-circuit; usually false */
1038 1038              (nproc >= v.v_maxupttl ||
1039 1039              upcount_get(ruid, zoneid) >= v.v_maxup) &&
1040 1040              secpolicy_newproc(cr) != 0) {
1041 1041                  mutex_exit(&pidlock);
1042 1042                  zcmn_err(zoneid, CE_NOTE,
1043 1043                      "out of per-user processes for uid %d", ruid);
1044 1044                  goto bad;
1045 1045          }
1046 1046  
1047 1047          /*
1048 1048           * Everything is cool, put the new proc on the active process list.
1049 1049           * It is already on the pid list and in /proc.
1050 1050           * Increment the per uid process count (upcount).
1051 1051           */
1052 1052          nproc++;
1053 1053          upcount_inc(ruid, zoneid);
1054 1054  
1055 1055          cp->p_next = practive;
1056 1056          practive->p_prev = cp;
1057 1057          practive = cp;
1058 1058  
1059 1059          cp->p_ignore = pp->p_ignore;
1060 1060          cp->p_siginfo = pp->p_siginfo;
1061 1061          cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD);
1062 1062          cp->p_sessp = pp->p_sessp;
1063 1063          sess_hold(pp);
1064 1064          cp->p_brand = pp->p_brand;
1065 1065          if (PROC_IS_BRANDED(pp))
1066 1066                  BROP(pp)->b_copy_procdata(cp, pp);
1067 1067          cp->p_bssbase = pp->p_bssbase;
1068 1068          cp->p_brkbase = pp->p_brkbase;
1069 1069          cp->p_brksize = pp->p_brksize;
1070 1070          cp->p_brkpageszc = pp->p_brkpageszc;
1071 1071          cp->p_stksize = pp->p_stksize;
1072 1072          cp->p_stkpageszc = pp->p_stkpageszc;
1073 1073          cp->p_stkprot = pp->p_stkprot;
1074 1074          cp->p_datprot = pp->p_datprot;
1075 1075          cp->p_usrstack = pp->p_usrstack;
1076 1076          cp->p_model = pp->p_model;
1077 1077          cp->p_ppid = pp->p_pid;
1078 1078          cp->p_ancpid = pp->p_pid;
1079 1079          cp->p_portcnt = pp->p_portcnt;
1080 1080  
1081 1081          /*
1082 1082           * Initialize watchpoint structures
1083 1083           */
1084 1084          avl_create(&cp->p_warea, wa_compare, sizeof (struct watched_area),
1085 1085              offsetof(struct watched_area, wa_link));
1086 1086  
1087 1087          /*
1088 1088           * Initialize immediate resource control values.
1089 1089           */
1090 1090          cp->p_stk_ctl = pp->p_stk_ctl;
1091 1091          cp->p_fsz_ctl = pp->p_fsz_ctl;
1092 1092          cp->p_vmem_ctl = pp->p_vmem_ctl;
1093 1093          cp->p_fno_ctl = pp->p_fno_ctl;
1094 1094  
1095 1095          /*
1096 1096           * Link up to parent-child-sibling chain.  No need to lock
1097 1097           * in general since only a call to freeproc() (done by the
1098 1098           * same parent as newproc()) diddles with the child chain.
1099 1099           */
1100 1100          cp->p_sibling = pp->p_child;
1101 1101          if (pp->p_child)
1102 1102                  pp->p_child->p_psibling = cp;
1103 1103  
1104 1104          cp->p_parent = pp;
1105 1105          pp->p_child = cp;
1106 1106  
1107 1107          cp->p_child_ns = NULL;
1108 1108          cp->p_sibling_ns = NULL;
1109 1109  
1110 1110          cp->p_nextorph = pp->p_orphan;
1111 1111          cp->p_nextofkin = pp;
1112 1112          pp->p_orphan = cp;
1113 1113  
1114 1114          /*
1115 1115           * Inherit profiling state; do not inherit REALPROF profiling state.
1116 1116           */
1117 1117          cp->p_prof = pp->p_prof;
1118 1118          cp->p_rprof_cyclic = CYCLIC_NONE;
1119 1119  
1120 1120          /*

↓ open down ↓

479 lines elided

↑ open up ↑

1121 1121           * Inherit pool pointer from the parent.  Kernel processes are
1122 1122           * always bound to the default pool.
1123 1123           */
1124 1124          mutex_enter(&pp->p_lock);
1125 1125          if (flags & GETPROC_KERNEL) {
1126 1126                  cp->p_pool = pool_default;
1127 1127                  cp->p_flag |= SSYS;
1128 1128          } else {
1129 1129                  cp->p_pool = pp->p_pool;
1130 1130          }
1131      -        atomic_add_32(&cp->p_pool->pool_ref, 1);
     1131 +        atomic_inc_32(&cp->p_pool->pool_ref);
1132 1132          mutex_exit(&pp->p_lock);
1133 1133  
1134 1134          /*
1135 1135           * Add the child process to the current task.  Kernel processes
1136 1136           * are always attached to task0.
1137 1137           */
1138 1138          mutex_enter(&cp->p_lock);
1139 1139          if (flags & GETPROC_KERNEL)
1140 1140                  task_attach(task0p, cp);
1141 1141          else

1142 1142                  task_attach(pp->p_task, cp);
1143 1143          mutex_exit(&cp->p_lock);
1144 1144          mutex_exit(&pidlock);
1145 1145  
1146 1146          avl_create(&cp->p_ct_held, contract_compar, sizeof (contract_t),
1147 1147              offsetof(contract_t, ct_ctlist));
1148 1148  
1149 1149          /*
1150 1150           * Duplicate any audit information kept in the process table
1151 1151           */
1152 1152          if (audit_active)       /* copy audit data to cp */
1153 1153                  audit_newproc(cp);
1154 1154  
1155 1155          crhold(cp->p_cred = cr);
1156 1156  
1157 1157          /*
1158 1158           * Bump up the counts on the file structures pointed at by the
1159 1159           * parent's file table since the child will point at them too.
1160 1160           */
1161 1161          fcnt_add(P_FINFO(pp), 1);
1162 1162  
1163 1163          if (PTOU(pp)->u_cdir) {
1164 1164                  VN_HOLD(PTOU(pp)->u_cdir);
1165 1165          } else {
1166 1166                  ASSERT(pp == &p0);
1167 1167                  /*
1168 1168                   * We must be at or before vfs_mountroot(); it will take care of
1169 1169                   * assigning our current directory.
1170 1170                   */
1171 1171          }
1172 1172          if (PTOU(pp)->u_rdir)
1173 1173                  VN_HOLD(PTOU(pp)->u_rdir);
1174 1174          if (PTOU(pp)->u_cwd)
1175 1175                  refstr_hold(PTOU(pp)->u_cwd);
1176 1176  
1177 1177          /*
1178 1178           * copy the parent's uarea.
1179 1179           */
1180 1180          uarea = PTOU(cp);
1181 1181          bcopy(PTOU(pp), uarea, sizeof (*uarea));
1182 1182          flist_fork(P_FINFO(pp), P_FINFO(cp));
1183 1183  
1184 1184          gethrestime(&uarea->u_start);
1185 1185          uarea->u_ticks = ddi_get_lbolt();
1186 1186          uarea->u_mem = rm_asrss(pp->p_as);
1187 1187          uarea->u_acflag = AFORK;
1188 1188  
1189 1189          /*
1190 1190           * If inherit-on-fork, copy /proc tracing flags to child.
1191 1191           */
1192 1192          if ((pp->p_proc_flag & P_PR_FORK) != 0) {
1193 1193                  cp->p_proc_flag |= pp->p_proc_flag & (P_PR_TRACE|P_PR_FORK);
1194 1194                  cp->p_sigmask = pp->p_sigmask;
1195 1195                  cp->p_fltmask = pp->p_fltmask;
1196 1196          } else {
1197 1197                  sigemptyset(&cp->p_sigmask);
1198 1198                  premptyset(&cp->p_fltmask);
1199 1199                  uarea->u_systrap = 0;
1200 1200                  premptyset(&uarea->u_entrymask);
1201 1201                  premptyset(&uarea->u_exitmask);
1202 1202          }
1203 1203          /*
1204 1204           * If microstate accounting is being inherited, mark child
1205 1205           */
1206 1206          if ((pp->p_flag & SMSFORK) != 0)
1207 1207                  cp->p_flag |= pp->p_flag & (SMSFORK|SMSACCT);
1208 1208  
1209 1209          /*
1210 1210           * Inherit fixalignment flag from the parent
1211 1211           */
1212 1212          cp->p_fixalignment = pp->p_fixalignment;
1213 1213  
1214 1214          *cpp = cp;
1215 1215          return (0);
1216 1216  
1217 1217  bad:
1218 1218          ASSERT(MUTEX_NOT_HELD(&pidlock));
1219 1219  
1220 1220          mutex_destroy(&cp->p_crlock);
1221 1221          mutex_destroy(&cp->p_pflock);
1222 1222  #if defined(__x86)
1223 1223          mutex_destroy(&cp->p_ldtlock);
1224 1224  #endif
1225 1225          if (newpid != -1) {
1226 1226                  proc_entry_free(cp->p_pidp);
1227 1227                  (void) pid_rele(cp->p_pidp);
1228 1228          }
1229 1229          kmem_cache_free(process_cache, cp);
1230 1230  
1231 1231          mutex_enter(&zone->zone_nlwps_lock);
1232 1232          task->tk_nprocs--;
1233 1233          proj->kpj_nprocs--;
1234 1234          zone->zone_nprocs--;
1235 1235          mutex_exit(&zone->zone_nlwps_lock);
1236 1236  
1237 1237  punish:
1238 1238          /*
1239 1239           * We most likely got into this situation because some process is
1240 1240           * forking out of control.  As punishment, put it to sleep for a
1241 1241           * bit so it can't eat the machine alive.  Sleep interval is chosen
1242 1242           * to allow no more than one fork failure per cpu per clock tick
1243 1243           * on average (yes, I just made this up).  This has two desirable
1244 1244           * properties: (1) it sets a constant limit on the fork failure
1245 1245           * rate, and (2) the busier the system is, the harsher the penalty
1246 1246           * for abusing it becomes.
1247 1247           */
1248 1248          INCR_COUNT(&fork_fail_pending, &pidlock);
1249 1249          delay(fork_fail_pending / ncpus + 1);
1250 1250          DECR_COUNT(&fork_fail_pending, &pidlock);
1251 1251  
1252 1252          return (-1); /* out of memory or proc slots */
1253 1253  }
1254 1254  
1255 1255  /*
1256 1256   * Release virtual memory.
1257 1257   * In the case of vfork(), the child was given exclusive access to its
1258 1258   * parent's address space.  The parent is waiting in vfwait() for the
1259 1259   * child to release its exclusive claim via relvm().
1260 1260   */
1261 1261  void
1262 1262  relvm()
1263 1263  {
1264 1264          proc_t *p = curproc;
1265 1265  
1266 1266          ASSERT((unsigned)p->p_lwpcnt <= 1);
1267 1267  
1268 1268          prrelvm();      /* inform /proc */
1269 1269  
1270 1270          if (p->p_flag & SVFORK) {
1271 1271                  proc_t *pp = p->p_parent;
1272 1272                  /*
1273 1273                   * The child process is either exec'ing or exit'ing.
1274 1274                   * The child is now separated from the parent's address
1275 1275                   * space.  The parent process is made dispatchable.
1276 1276                   *
1277 1277                   * This is a delicate locking maneuver, involving
1278 1278                   * both the parent's p_lock and the child's p_lock.
1279 1279                   * As soon as the SVFORK flag is turned off, the
1280 1280                   * parent is free to run, but it must not run until
1281 1281                   * we wake it up using its p_cv because it might
1282 1282                   * exit and we would be referencing invalid memory.
1283 1283                   * Therefore, we hold the parent with its p_lock
1284 1284                   * while protecting our p_flags with our own p_lock.
1285 1285                   */
1286 1286  try_again:
1287 1287                  mutex_enter(&p->p_lock);        /* grab child's lock first */
1288 1288                  prbarrier(p);           /* make sure /proc is blocked out */
1289 1289                  mutex_enter(&pp->p_lock);
1290 1290  
1291 1291                  /*
1292 1292                   * Check if parent is locked by /proc.
1293 1293                   */
1294 1294                  if (pp->p_proc_flag & P_PR_LOCK) {
1295 1295                          /*
1296 1296                           * Delay until /proc is done with the parent.
1297 1297                           * We must drop our (the child's) p->p_lock, wait
1298 1298                           * via prbarrier() on the parent, then start over.
1299 1299                           */
1300 1300                          mutex_exit(&p->p_lock);
1301 1301                          prbarrier(pp);
1302 1302                          mutex_exit(&pp->p_lock);
1303 1303                          goto try_again;
1304 1304                  }
1305 1305                  p->p_flag &= ~SVFORK;
1306 1306                  kpreempt_disable();
1307 1307                  p->p_as = &kas;
1308 1308  
1309 1309                  /*
1310 1310                   * notify hat of change in thread's address space
1311 1311                   */
1312 1312                  hat_thread_exit(curthread);
1313 1313                  kpreempt_enable();
1314 1314  
1315 1315                  /*
1316 1316                   * child sizes are copied back to parent because
1317 1317                   * child may have grown.
1318 1318                   */
1319 1319                  pp->p_brkbase = p->p_brkbase;
1320 1320                  pp->p_brksize = p->p_brksize;
1321 1321                  pp->p_stksize = p->p_stksize;
1322 1322  
1323 1323                  /*
1324 1324                   * Copy back the shm accounting information
1325 1325                   * to the parent process.
1326 1326                   */
1327 1327                  pp->p_segacct = p->p_segacct;
1328 1328                  p->p_segacct = NULL;
1329 1329  
1330 1330                  /*
1331 1331                   * The parent is no longer waiting for the vfork()d child.
1332 1332                   * Restore the parent's watched pages, if any.  This is
1333 1333                   * safe because we know the parent is not locked by /proc
1334 1334                   */
1335 1335                  pp->p_flag &= ~SVFWAIT;
1336 1336                  if (avl_numnodes(&pp->p_wpage) != 0) {
1337 1337                          pp->p_as->a_wpage = pp->p_wpage;
1338 1338                          avl_create(&pp->p_wpage, wp_compare,
1339 1339                              sizeof (struct watched_page),
1340 1340                              offsetof(struct watched_page, wp_link));
1341 1341                  }
1342 1342                  cv_signal(&pp->p_cv);
1343 1343                  mutex_exit(&pp->p_lock);
1344 1344                  mutex_exit(&p->p_lock);
1345 1345          } else {
1346 1346                  if (p->p_as != &kas) {
1347 1347                          struct as *as;
1348 1348  
1349 1349                          if (p->p_segacct)
1350 1350                                  shmexit(p);
1351 1351  
1352 1352                          /*
1353 1353                           * We grab p_lock for the benefit of /proc
1354 1354                           */
1355 1355                          kpreempt_disable();
1356 1356                          mutex_enter(&p->p_lock);
1357 1357                          prbarrier(p);   /* make sure /proc is blocked out */
1358 1358                          as = p->p_as;
1359 1359                          p->p_as = &kas;
1360 1360                          mutex_exit(&p->p_lock);
1361 1361  
1362 1362                          /*
1363 1363                           * notify hat of change in thread's address space
1364 1364                           */
1365 1365                          hat_thread_exit(curthread);
1366 1366                          kpreempt_enable();
1367 1367  
1368 1368                          as_free(as);
1369 1369                          p->p_tr_lgrpid = LGRP_NONE;
1370 1370                  }
1371 1371          }
1372 1372  }
1373 1373  
1374 1374  /*
1375 1375   * Wait for child to exec or exit.
1376 1376   * Called by parent of vfork'ed process.
1377 1377   * See important comments in relvm(), above.
1378 1378   */
1379 1379  void
1380 1380  vfwait(pid_t pid)
1381 1381  {
1382 1382          int signalled = 0;
1383 1383          proc_t *pp = ttoproc(curthread);
1384 1384          proc_t *cp;
1385 1385  
1386 1386          /*
1387 1387           * Wait for child to exec or exit.
1388 1388           */
1389 1389          for (;;) {
1390 1390                  mutex_enter(&pidlock);
1391 1391                  cp = prfind(pid);
1392 1392                  if (cp == NULL || cp->p_parent != pp) {
1393 1393                          /*
1394 1394                           * Child has exit()ed.
1395 1395                           */
1396 1396                          mutex_exit(&pidlock);
1397 1397                          break;
1398 1398                  }
1399 1399                  /*
1400 1400                   * Grab the child's p_lock before releasing pidlock.
1401 1401                   * Otherwise, the child could exit and we would be
1402 1402                   * referencing invalid memory.
1403 1403                   */
1404 1404                  mutex_enter(&cp->p_lock);
1405 1405                  mutex_exit(&pidlock);
1406 1406                  if (!(cp->p_flag & SVFORK)) {
1407 1407                          /*
1408 1408                           * Child has exec()ed or is exit()ing.
1409 1409                           */
1410 1410                          mutex_exit(&cp->p_lock);
1411 1411                          break;
1412 1412                  }
1413 1413                  mutex_enter(&pp->p_lock);
1414 1414                  mutex_exit(&cp->p_lock);
1415 1415                  /*
1416 1416                   * We might be waked up spuriously from the cv_wait().
1417 1417                   * We have to do the whole operation over again to be
1418 1418                   * sure the child's SVFORK flag really is turned off.
1419 1419                   * We cannot make reference to the child because it can
1420 1420                   * exit before we return and we would be referencing
1421 1421                   * invalid memory.
1422 1422                   *
1423 1423                   * Because this is potentially a very long-term wait,
1424 1424                   * we call cv_wait_sig() (for its jobcontrol and /proc
1425 1425                   * side-effects) unless there is a current signal, in
1426 1426                   * which case we use cv_wait() because we cannot return
1427 1427                   * from this function until the child has released the
1428 1428                   * address space.  Calling cv_wait_sig() with a current
1429 1429                   * signal would lead to an indefinite loop here because
1430 1430                   * cv_wait_sig() returns immediately in this case.
1431 1431                   */
1432 1432                  if (signalled)
1433 1433                          cv_wait(&pp->p_cv, &pp->p_lock);
1434 1434                  else
1435 1435                          signalled = !cv_wait_sig(&pp->p_cv, &pp->p_lock);
1436 1436                  mutex_exit(&pp->p_lock);
1437 1437          }
1438 1438  
1439 1439          /* restore watchpoints to parent */
1440 1440          if (pr_watch_active(pp)) {
1441 1441                  struct as *as = pp->p_as;
1442 1442                  AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1443 1443                  as_setwatch(as);
1444 1444                  AS_LOCK_EXIT(as, &as->a_lock);
1445 1445          }
1446 1446  
1447 1447          mutex_enter(&pp->p_lock);
1448 1448          prbarrier(pp);  /* barrier against /proc locking */
1449 1449          continuelwps(pp);
1450 1450          mutex_exit(&pp->p_lock);
1451 1451  }

↓ open down ↓

310 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX