defaultServiceManager介绍

本篇介绍

我们在使用binder时候的大致流程是先获取servicemanger的binder，然后通过该binder获取目标服务的binder，最后调用该binder的接口。本篇就介绍下第一步的内容，涉及了从native到驱动，fwk部分先不涉及，希望可以完全理解servicemanager的获取流程。

内容介绍

整个流程的起点是IServiceManager::defaultServiceManager()

代码语言：javascript复制

sp<IServiceManager> defaultServiceManager()
{
    std::call_once(gSmOnce, []() {
        sp<AidlServiceManager> sm = nullptr;
        while (sm == nullptr) {
            sm = interface_cast<AidlServiceManager>(ProcessState::self()->getContextObject(nullptr));//获取servicemanager的binder
            if (sm == nullptr) {
                ALOGE("Waiting 1s on context object on %s.", ProcessState::self()->getDriverName().c_str());
                sleep(1);
            }
        }

        gDefaultServiceManager = sp<ServiceManagerShim>::make(sm);
    });

    return gDefaultServiceManager;
}

可以看到这是一个单例，在首次调用的时候会进行初始化获取servicemanager的binder，然后用智能指针封装一下。

std::call_once 这个是c 11上的一个新特性，用于需要保证某个流程在多线程场景中也只执行一次，需要和std::once_flag配套使用，用来区分不同的流程。接下来看下ProcessState::self()->getContextObject(nullptr) 的内容:

代码语言：javascript复制

#ifdef __ANDROID_VNDK__
const char* kDefaultDriver = "/dev/vndbinder";
#else
const char* kDefaultDriver = "/dev/binder";  
#endif

sp<ProcessState> ProcessState::self()
{
    return init(kDefaultDriver, false /*requireDefault*/);
}

sp<ProcessState> ProcessState::init(const char *driver, bool requireDefault)
{
    [[clang::no_destroy]] static sp<ProcessState> gProcess;
    [[clang::no_destroy]] static std::mutex gProcessMutex;

    if (driver == nullptr) {
        std::lock_guard<std::mutex> l(gProcessMutex);
        return gProcess;
    }

    [[clang::no_destroy]] static std::once_flag gProcessOnce;
    std::call_once(gProcessOnce, [&](){
        if (access(driver, R_OK) == -1) {
            ALOGE("Binder driver %s is unavailable. Using /dev/binder instead.", driver);
            driver = "/dev/binder";
        }

        std::lock_guard<std::mutex> l(gProcessMutex);
        gProcess = sp<ProcessState>::make(driver); // 创建一个processstate
    });

    if (requireDefault) {
        // Detect if we are trying to initialize with a different driver, and
        // consider that an error. ProcessState will only be initialized once above.
        LOG_ALWAYS_FATAL_IF(gProcess->getDriverName() != driver,
                            "ProcessState was already initialized with %s,"
                            " can't initialize with %s.",
                            gProcess->getDriverName().c_str(), driver);
    }

    return gProcess;
}

可以看到这儿还是一个单例，对于参与binder IPC的进程，和binder驱动交互部分就是通过ProcessState实现的。对于android，有三种binder驱动节点，下面列一下区别：

binder驱动节点	描述
/dev/binder	供android fwk使用aidl接口时使用
/dev/hwbinder	供使用hidl接口的进程使用，主要是fwk和vendor进程，vendor进程之间
/dev/vndbinder	供使用aidl接口的vendor进程之间使用

接下来看下ProcessState的构造方法：

代码语言：javascript复制

ProcessState::ProcessState(const char *driver)
    : mDriverName(String8(driver))
    , mDriverFD(open_driver(driver)) // 打开 binder节点
    , mVMStart(MAP_FAILED)
    , mThreadCountLock(PTHREAD_MUTEX_INITIALIZER)
    , mThreadCountDecrement(PTHREAD_COND_INITIALIZER)
    , mExecutingThreadsCount(0)
    , mWaitingForThreads(0)
    , mMaxThreads(DEFAULT_MAX_BINDER_THREADS) // 线程池数量
    , mStarvationStartTimeMs(0)
    , mThreadPoolStarted(false)
    , mThreadPoolSeq(1)
    , mCallRestriction(CallRestriction::NONE)
{

// TODO(b/166468760): enforce in build system
#if defined(__ANDROID_APEX__)
    LOG_ALWAYS_FATAL("Cannot use libbinder in APEX (only system.img libbinder) since it is not stable.");
#endif

    if (mDriverFD >= 0) {
        // mmap the binder, providing a chunk of virtual address space to receive transactions.
        mVMStart = mmap(nullptr, BINDER_VM_SIZE, PROT_READ, MAP_PRIVATE | MAP_NORESERVE, mDriverFD, 0); // 内存映射
        if (mVMStart == MAP_FAILED) {
            // *sigh*
            ALOGE("Using %s failed: unable to mmap transaction memory.n", mDriverName.c_str());
            close(mDriverFD);
            mDriverFD = -1;
            mDriverName.clear();
        }
    }

#ifdef __ANDROID__
    LOG_ALWAYS_FATAL_IF(mDriverFD < 0, "Binder driver '%s' could not be opened.  Terminating.", driver);
#endif
}

这儿有两个和驱动交互的部分，一个是open_driver，一个是mmap，这里先介绍下open_driver：

代码语言：javascript复制

static int open_driver(const char *driver)
{
    int fd = open(driver, O_RDWR | O_CLOEXEC); //打开binder节点
    if (fd >= 0) {
        int vers = 0;
        status_t result = ioctl(fd, BINDER_VERSION, &vers); //获取binder版本号
        if (result == -1) {
            ALOGE("Binder ioctl to obtain version failed: %s", strerror(errno));
            close(fd);
            fd = -1;
        }
        if (result != 0 || vers != BINDER_CURRENT_PROTOCOL_VERSION) {
          ALOGE("Binder driver protocol(%d) does not match user space protocol(%d)! ioctl() return value: %d",
                vers, BINDER_CURRENT_PROTOCOL_VERSION, result);
            close(fd);
            fd = -1;
        }
        size_t maxThreads = DEFAULT_MAX_BINDER_THREADS;
        result = ioctl(fd, BINDER_SET_MAX_THREADS, &maxThreads); //设置线程池数量
        if (result == -1) {
            ALOGE("Binder ioctl to set max threads failed: %s", strerror(errno));
        }
        uint32_t enable = DEFAULT_ENABLE_ONEWAY_SPAM_DETECTION;
        result = ioctl(fd, BINDER_ENABLE_ONEWAY_SPAM_DETECTION, &enable);
        if (result == -1) {
            ALOGI("Binder ioctl to enable oneway spam detection failed: %s", strerror(errno));
        }
    } else {
        ALOGW("Opening '%s' failed: %sn", driver, strerror(errno));
    }
    return fd;
}

这儿的流程比较直接，就是打开节点，用ioctl获取了版本号，设置了线程池数量，设置了允许异步调用检测。接下来看下open 的内容，这时候就会进入内核，因为binder驱动定义了自己的open，ioctl，mmap方法。

代码语言：javascript复制

static int __init binder_init(void)
{
    int ret;
    char *device_name, *device_tmp;
    struct binder_device *device;
    struct hlist_node *tmp;
    char *device_names = NULL;

    ret = binder_alloc_shrinker_init(); //cache管理初始化
    if (ret)
        return ret;

    atomic_set(&binder_transaction_log.cur, ~0U);
    atomic_set(&binder_transaction_log_failed.cur, ~0U);

    binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); //debugfs目录，在android里面是/sys/kernel/debug，通常还有一个软连接/d 指向该路径
    if (binder_debugfs_dir_entry_root)
        binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
                         binder_debugfs_dir_entry_root);

    if (binder_debugfs_dir_entry_root) {
        debugfs_create_file("state",
                    0444,
                    binder_debugfs_dir_entry_root,
                    NULL,
                    &binder_state_fops);
        debugfs_create_file("stats",
                    0444,
                    binder_debugfs_dir_entry_root,
                    NULL,
                    &binder_stats_fops);
        debugfs_create_file("transactions",
                    0444,
                    binder_debugfs_dir_entry_root,
                    NULL,
                    &binder_transactions_fops);
        debugfs_create_file("transaction_log",
                    0444,
                    binder_debugfs_dir_entry_root,
                    &binder_transaction_log,
                    &binder_transaction_log_fops);
        debugfs_create_file("failed_transaction_log",
                    0444,
                    binder_debugfs_dir_entry_root,
                    &binder_transaction_log_failed,
                    &binder_transaction_log_fops);
    }

    if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) &&
        strcmp(binder_devices_param, "") != 0) {
        /*
        * Copy the module_parameter string, because we don't want to
        * tokenize it in-place.
         */
        device_names = kstrdup(binder_devices_param, GFP_KERNEL);
        if (!device_names) {
            ret = -ENOMEM;
            goto err_alloc_device_names_failed;
        }

        device_tmp = device_names;
        while ((device_name = strsep(&device_tmp, ","))) {
            ret = init_binder_device(device_name); // 初始化binder驱动设备，这儿的device_name 就包含了/dev/binder，/dev/hwbinder， /dev/vndbinder
            if (ret)
                goto err_init_binder_device_failed;
        }
    }

    ret = init_binderfs();
    if (ret)
        goto err_init_binder_device_failed;

    return ret;

err_init_binder_device_failed:
    hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
        misc_deregister(&device->miscdev);
        hlist_del(&device->hlist);
        kfree(device);
    }

    kfree(device_names);

err_alloc_device_names_failed:
    debugfs_remove_recursive(binder_debugfs_dir_entry_root);

    return ret;
}

device_initcall(binder_init);

接下来看下init_binder_device的实现：

代码语言：javascript复制

static int __init init_binder_device(const char *name)
{
    int ret;
    struct binder_device *binder_device;

    binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL);
    if (!binder_device)
        return -ENOMEM;

    binder_device->miscdev.fops = &binder_fops;
    binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
    binder_device->miscdev.name = name;

    refcount_set(&binder_device->ref, 1);
    binder_device->context.binder_context_mgr_uid = INVALID_UID;
    binder_device->context.name = name;
    mutex_init(&binder_device->context.context_mgr_node_lock);

    ret = misc_register(&binder_device->miscdev); //将binder 驱动注册成为misc设备
    if (ret < 0) {
        kfree(binder_device);
        return ret;
    }

    hlist_add_head(&binder_device->hlist, &binder_devices);

    return ret;
}

const struct file_operations binder_fops = {
    .owner = THIS_MODULE,
    .poll = binder_poll,
    .unlocked_ioctl = binder_ioctl,
    .compat_ioctl = compat_ptr_ioctl,
    .mmap = binder_mmap,
    .open = binder_open,
    .flush = binder_flush,
    .release = binder_release,
};

这儿就是将binder驱动设备注册为一个misc设备，并指定了它的操作实现方法。因为上层是先调用的open，因此看下这儿binder_open的实现：

代码语言：javascript复制

static int binder_open(struct inode *nodp, struct file *filp)
{
    struct binder_proc *proc, *itr;
    struct binder_device *binder_dev;
    struct binderfs_info *info;
    struct dentry *binder_binderfs_dir_entry_proc = NULL;
    bool existing_pid = false;

    binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%dn", __func__,
             current->group_leader->pid, current->pid);

    proc = kzalloc(sizeof(*proc), GFP_KERNEL); // 一个binder进程对应一个binder_proc
    if (proc == NULL)
        return -ENOMEM;
    spin_lock_init(&proc->inner_lock);
    spin_lock_init(&proc->outer_lock);
    get_task_struct(current->group_leader);
    proc->tsk = current->group_leader;
    INIT_LIST_HEAD(&proc->todo);
    proc->default_priority = task_nice(current);
    /* binderfs stashes devices in i_private */
    if (is_binderfs_device(nodp)) {
        binder_dev = nodp->i_private;
        info = nodp->i_sb->s_fs_info;
        binder_binderfs_dir_entry_proc = info->proc_log_dir;
    } else {
        binder_dev = container_of(filp->private_data,
                      struct binder_device, miscdev);
    }
    refcount_inc(&binder_dev->ref);
    proc->context = &binder_dev->context;
    binder_alloc_init(&proc->alloc);

    binder_stats_created(BINDER_STAT_PROC);
    proc->pid = current->group_leader->pid;
    INIT_LIST_HEAD(&proc->delivered_death);
    INIT_LIST_HEAD(&proc->waiting_threads);
    filp->private_data = proc;

    mutex_lock(&binder_procs_lock);
    hlist_for_each_entry(itr, &binder_procs, proc_node) { // 加入binder_proc链表
        if (itr->pid == proc->pid) {
            existing_pid = true;
            break;
        }
    }
    hlist_add_head(&proc->proc_node, &binder_procs);
    mutex_unlock(&binder_procs_lock);

    if (binder_debugfs_dir_entry_proc && !existing_pid) {
        char strbuf[11];

        snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
        /*
         * proc debug entries are shared between contexts.
         * Only create for the first PID to avoid debugfs log spamming
         * The printing code will anyway print all contexts for a given
         * PID so this is not a problem.
         */
        proc->debugfs_entry = debugfs_create_file(strbuf, 0444,
            binder_debugfs_dir_entry_proc,
            (void *)(unsigned long)proc->pid,
            &proc_fops);
    }

    if (binder_binderfs_dir_entry_proc && !existing_pid) {
        char strbuf[11];
        struct dentry *binderfs_entry;

        snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
        /*
         * Similar to debugfs, the process specific log file is shared
         * between contexts. Only create for the first PID.
         * This is ok since same as debugfs, the log file will contain
         * information on all contexts of a given PID.
         */
        binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc,
            strbuf, &proc_fops, (void *)(unsigned long)proc->pid);
        if (!IS_ERR(binderfs_entry)) {
            proc->binderfs_entry = binderfs_entry;
        } else {
            int error;

            error = PTR_ERR(binderfs_entry);
            pr_warn("Unable to create file %s in binderfs (error %d)n",
                strbuf, error);
        }
    }

    return 0;
}

这儿主要就是创建了一个binder_proc结构，并和当前用户态进程current关联起来。再看下ioctl的内容：

代码语言：javascript复制

static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
    int ret;
    struct binder_proc *proc = filp->private_data;
    struct binder_thread *thread;
    unsigned int size = _IOC_SIZE(cmd);
    void __user *ubuf = (void __user *)arg;

    /*pr_info("binder_ioctl: %d:%d %x %lxn",
            proc->pid, current->pid, cmd, arg);*/

    binder_selftest_alloc(&proc->alloc);

    trace_binder_ioctl(cmd, arg);

    ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
    if (ret)
        goto err_unlocked;

    thread = binder_get_thread(proc); // 获取一个binder线程
    if (thread == NULL) {
        ret = -ENOMEM;
        goto err;
    }

    switch (cmd) {
    case BINDER_WRITE_READ: // 用的最多的就是这个命令了
        ret = binder_ioctl_write_read(filp, cmd, arg, thread);
        if (ret)
            goto err;
        break;
    case BINDER_SET_MAX_THREADS: { // 设置binder线程池数量
        int max_threads;

        if (copy_from_user(&max_threads, ubuf,
                   sizeof(max_threads))) {
            ret = -EINVAL;
            goto err;
        }
        binder_inner_proc_lock(proc);
        proc->max_threads = max_threads; //在这儿设置生效
        binder_inner_proc_unlock(proc);
        break;
    }
    case BINDER_SET_CONTEXT_MGR_EXT: { //设置binder管家，也就是并不一定servicemanager就是binder管家，也可以将其他服务设置成binder管家
        struct flat_binder_object fbo;

        if (copy_from_user(&fbo, ubuf, sizeof(fbo))) {
            ret = -EINVAL;
            goto err;
        }
        ret = binder_ioctl_set_ctx_mgr(filp, &fbo);
        if (ret)
            goto err;
        break;
    }
    case BINDER_SET_CONTEXT_MGR: //设置binder管家
        ret = binder_ioctl_set_ctx_mgr(filp, NULL);
        if (ret)
            goto err;
        break;
    case BINDER_THREAD_EXIT: 
        binder_debug(BINDER_DEBUG_THREADS, "%d:%d exitn",
                 proc->pid, thread->pid);
        binder_thread_release(proc, thread);
        thread = NULL;
        break;
    case BINDER_VERSION: { // 获取binder版本
        struct binder_version __user *ver = ubuf;

        if (size != sizeof(struct binder_version)) {
            ret = -EINVAL;
            goto err;
        }
        if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
                 &ver->protocol_version)) {
            ret = -EINVAL;
            goto err;
        }
        break;
    }
    case BINDER_GET_NODE_INFO_FOR_REF: {
        struct binder_node_info_for_ref info;

        if (copy_from_user(&info, ubuf, sizeof(info))) {
            ret = -EFAULT;
            goto err;
        }

        ret = binder_ioctl_get_node_info_for_ref(proc, &info);
        if (ret < 0)
            goto err;

        if (copy_to_user(ubuf, &info, sizeof(info))) {
            ret = -EFAULT;
            goto err;
        }

        break;
    }
    case BINDER_GET_NODE_DEBUG_INFO: {
        struct binder_node_debug_info info;

        if (copy_from_user(&info, ubuf, sizeof(info))) {
            ret = -EFAULT;
            goto err;
        }

        ret = binder_ioctl_get_node_debug_info(proc, &info);
        if (ret < 0)
            goto err;

        if (copy_to_user(ubuf, &info, sizeof(info))) {
            ret = -EFAULT;
            goto err;
        }
        break;
    }
    default:
        ret = -EINVAL;
        goto err;
    }
    ret = 0;
err:
    if (thread)
        thread->looper_need_return = false;
    wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
    if (ret && ret != -ERESTARTSYS)
        pr_info("%d:%d ioctl %x %lx returned %dn", proc->pid, current->pid, cmd, arg, ret);
err_unlocked:
    trace_binder_ioctl_done(ret);
    return ret;
}

看到这一块，之前的三个ioctl命令内容就清楚了。接下来需要看下一个关键的地方，就是mmap，对应的实现就是binder_mmap:

代码语言：javascript复制

static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
{
    struct binder_proc *proc = filp->private_data;

    if (proc->tsk != current->group_leader)
        return -EINVAL;
...
    vma->vm_flags |= VM_DONTCOPY | VM_MIXEDMAP;
    vma->vm_flags &= ~VM_MAYWRITE;

    vma->vm_ops = &binder_vm_ops;
    vma->vm_private_data = proc;

    return binder_alloc_mmap_handler(&proc->alloc, vma);

这儿没做啥，看下binder_alloc_mmap_handler：

代码语言：javascript复制

int binder_alloc_mmap_handler(struct binder_alloc *alloc,
                  struct vm_area_struct *vma)
{
    int ret;
    const char *failure_string;
    struct binder_buffer *buffer;

    mutex_lock(&binder_alloc_mmap_lock);
    if (alloc->buffer_size) {
        ret = -EBUSY;
        failure_string = "already mapped";
        goto err_already_mapped;
    }
    alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
                   SZ_4M); // 更新binder的buffer 空间大小
    mutex_unlock(&binder_alloc_mmap_lock);

    alloc->buffer = (void __user *)vma->vm_start; // 设置起始地址

    alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE,
                   sizeof(alloc->pages[0]),
                   GFP_KERNEL); // 分配页结构，只是页结构，并未真正分配物理内存
    if (alloc->pages == NULL) {
        ret = -ENOMEM;
        failure_string = "alloc page array";
        goto err_alloc_pages_failed;
    }

    buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); // 分配buffer结构
    if (!buffer) {
        ret = -ENOMEM;
        failure_string = "alloc buffer struct";
        goto err_alloc_buf_struct_failed;
    }

    buffer->user_data = alloc->buffer;
    list_add(&buffer->entry, &alloc->buffers); // 添加到buffer链表
    buffer->free = 1;
    binder_insert_free_buffer(alloc, buffer); // 添加到空间buffer链表
    alloc->free_async_space = alloc->buffer_size / 2; // 更新异步调用buffer空间
    binder_alloc_set_vma(alloc, vma); // 保存vma到alloc
    mmgrab(alloc->vma_vm_mm);

    return 0;
}

这时候驱动做的事情就是将用户态的地址保存到proc自己的结构里面。 mmap暂时告一段落。继续回到用户态，继续ProcessState::self()->getContextObject(nullptr) 获取serviemanager的binder过程，前一部分介绍完了，接下来应该是getContextObject部分，看下实现：

代码语言：javascript复制

sp<IBinder> ProcessState::getContextObject(const sp<IBinder>& /*caller*/)
{
    sp<IBinder> context = getStrongProxyForHandle(0);

    if (context) {
        // The root object is special since we get it directly from the driver, it is never
        // written by Parcell::writeStrongBinder.
        internal::Stability::markCompilationUnit(context.get());
    } else {
        ALOGW("Not able to get context object on %s.", mDriverName.c_str());
    }

    return context;
}

对于servicemanager，handle为0，看下getStrongProxyForHandle的内容：

代码语言：javascript复制

sp<IBinder> ProcessState::getStrongProxyForHandle(int32_t handle)
{
    sp<IBinder> result;

    AutoMutex _l(mLock);

    handle_entry* e = lookupHandleLocked(handle); // 本地查找，刚开始肯定没有，因此handler_entry->binder 是nullptr

    if (e != nullptr) {
        // We need to create a new BpBinder if there isn't currently one, OR we
        // are unable to acquire a weak reference on this current one.  The
        // attemptIncWeak() is safe because we know the BpBinder destructor will always
        // call expungeHandle(), which acquires the same lock we are holding now.
        // We need to do this because there is a race condition between someone
        // releasing a reference on this BpBinder, and a new reference on its handle
        // arriving from the driver.
        IBinder* b = e->binder;
        if (b == nullptr || !e->refs->attemptIncWeak(this)) {
            if (handle == 0) {
                // Special case for context manager...
                // The context manager is the only object for which we create
                // a BpBinder proxy without already holding a reference.
                // Perform a dummy transaction to ensure the context manager
                // is registered before we create the first local reference
                // to it (which will occur when creating the BpBinder).
                // If a local reference is created for the BpBinder when the
                // context manager is not present, the driver will fail to
                // provide a reference to the context manager, but the
                // driver API does not return status.
                //
                // Note that this is not race-free if the context manager
                // dies while this code runs.
                //
                // TODO: add a driver API to wait for context manager, or
                // stop special casing handle 0 for context manager and add
                // a driver API to get a handle to the context manager with
                // proper reference counting.

                IPCThreadState* ipc = IPCThreadState::self();

                CallRestriction originalCallRestriction = ipc->getCallRestriction();
                ipc->setCallRestriction(CallRestriction::NONE);

                Parcel data;
                status_t status = ipc->transact(
                        0, IBinder::PING_TRANSACTION, data, nullptr, 0);

                ipc->setCallRestriction(originalCallRestriction);

                if (status == DEAD_OBJECT)
                   return nullptr;
            }

            sp<BpBinder> b = BpBinder::create(handle);
            e->binder = b.get();
            if (b) e->refs = b->getWeakRefs();
            result = b;
        } else {
            // This little bit of nastyness is to allow us to add a primary
            // reference to the remote proxy when this team doesn't have one
            // but another team is sending the handle to us.
            result.force_set(b);
            e->refs->decWeak(this);
        }
    }

    return result;
}

如果本地没有servicemanager的proxy binder，那么就需要用驱动获取。继续看下获取流程：

代码语言：javascript复制

IPCThreadState* IPCThreadState::self()
{
    if (gHaveTLS.load(std::memory_order_acquire)) {
restart:
        const pthread_key_t k = gTLS;
        IPCThreadState* st = (IPCThreadState*)pthread_getspecific(k); // 线程局部变量 tls
        if (st) return st;
        return new IPCThreadState;
    }
...
}

IPCThreadState::IPCThreadState()
    : mProcess(ProcessState::self()),
      mServingStackPointer(nullptr),
      mWorkSource(kUnsetWorkSource),
      mPropagateWorkSource(false),
      mIsLooper(false),
      mStrictModePolicy(0),
      mLastTransactionBinderFlags(0),
      mCallRestriction(mProcess->mCallRestriction)
{
    pthread_setspecific(gTLS, this); // 设置成线程局部变量，也就是每个线程一个ipcthreadstate
    clearCaller();
    mIn.setDataCapacity(256);
    mOut.setDataCapacity(256);
}

看下transact：

代码语言：javascript复制

status_t IPCThreadState::transact(int32_t handle,
                                  uint32_t code, const Parcel& data,
                                  Parcel* reply, uint32_t flags)
{
    status_t err;

    flags |= TF_ACCEPT_FDS;

    err = writeTransactionData(BC_TRANSACTION, flags, handle, code, data, nullptr);// 封装数据包

    if (err != NO_ERROR) {
        if (reply) reply->setError(err);
        return (mLastError = err);
    }
...
        if (reply) {
            err = waitForResponse(reply); // 和驱动交互
        } else {
            Parcel fakeReply;
            err = waitForResponse(&fakeReply);
        }
       ...
    } else {
        err = waitForResponse(nullptr, nullptr);
    }

    return err;
}

可以看到transact里面的流程就是封装数据包和与驱动交互，如果有返回值，则直接写入reply。先看下如何封装的数据包：

代码语言：javascript复制

status_t IPCThreadState::writeTransactionData(int32_t cmd, uint32_t binderFlags,
    int32_t handle, uint32_t code, const Parcel& data, status_t* statusBuffer)
{
    binder_transaction_data tr;

    tr.target.ptr = 0; /* Don't pass uninitialized stack data to a remote process */
    tr.target.handle = handle; // servicemanager 的handle， 也就是0
    tr.code = code; // PING_TRANSACTION
    tr.flags = binderFlags;
    tr.cookie = 0;
    tr.sender_pid = 0;
    tr.sender_euid = 0;

    const status_t err = data.errorCheck();
    if (err == NO_ERROR) {
        tr.data_size = data.ipcDataSize(); // 这时候应该都是空的
        tr.data.ptr.buffer = data.ipcData();
        tr.offsets_size = data.ipcObjectsCount()*sizeof(binder_size_t);
        tr.data.ptr.offsets = data.ipcObjects();
    } else if (statusBuffer) {
        tr.flags |= TF_STATUS_CODE;
        *statusBuffer = err;
        tr.data_size = sizeof(status_t);
        tr.data.ptr.buffer = reinterpret_cast<uintptr_t>(statusBuffer);
        tr.offsets_size = 0;
        tr.data.ptr.offsets = 0;
    } else {
        return (mLastError = err);
    }

    mOut.writeInt32(cmd); // BC_TRANSACTION
    mOut.write(&tr, sizeof(tr)); // 将数据写入parcel

    return NO_ERROR;
}

这时候就把请求相关的写入了parcel，接下来看下waitForResponse：

代码语言：javascript复制

status_t IPCThreadState::waitForResponse(Parcel *reply, status_t *acquireResult)
{
    uint32_t cmd;
    int32_t err;

    while (1) {
        if ((err=talkWithDriver()) < NO_ERROR) break; // 将请求写入内核，并读取驱动回复
        err = mIn.errorCheck();
        if (err < NO_ERROR) break;
        if (mIn.dataAvail() == 0) continue;

        cmd = (uint32_t)mIn.readInt32();
...
        switch (cmd) { // 解析驱动的回复
        case BR_ONEWAY_SPAM_SUSPECT:
            ALOGE("Process seems to be sending too many oneway calls.");
            CallStack::logStack("oneway spamming", CallStack::getCurrent().get(),
                    ANDROID_LOG_ERROR);
            [[fallthrough]];
        case BR_TRANSACTION_COMPLETE:
            if (!reply && !acquireResult) goto finish;
            break;

        case BR_DEAD_REPLY:
            err = DEAD_OBJECT;
            goto finish;

        case BR_FAILED_REPLY:
            err = FAILED_TRANSACTION;
            goto finish;

        case BR_FROZEN_REPLY:
            err = FAILED_TRANSACTION;
            goto finish;

        case BR_ACQUIRE_RESULT:
            {
                ALOG_ASSERT(acquireResult != NULL, "Unexpected brACQUIRE_RESULT");
                const int32_t result = mIn.readInt32();
                if (!acquireResult) continue;
                *acquireResult = result ? NO_ERROR : INVALID_OPERATION;
            }
            goto finish;

        case BR_REPLY: 
            {
                binder_transaction_data tr;
                err = mIn.read(&tr, sizeof(tr));
                ALOG_ASSERT(err == NO_ERROR, "Not enough command data for brREPLY");
                if (err != NO_ERROR) goto finish;

                if (reply) {
                    if ((tr.flags & TF_STATUS_CODE) == 0) {
                        reply->ipcSetDataReference(
                            reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer),
                            tr.data_size,
                            reinterpret_cast<const binder_size_t*>(tr.data.ptr.offsets),
                            tr.offsets_size/sizeof(binder_size_t),
                            freeBuffer);
                    } else {
                        err = *reinterpret_cast<const status_t*>(tr.data.ptr.buffer);
                        freeBuffer(nullptr,
                            reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer),
                            tr.data_size,
                            reinterpret_cast<const binder_size_t*>(tr.data.ptr.offsets),
                            tr.offsets_size/sizeof(binder_size_t));
                    }
                } else {
                    freeBuffer(nullptr,
                        reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer),
                        tr.data_size,
                        reinterpret_cast<const binder_size_t*>(tr.data.ptr.offsets),
                        tr.offsets_size/sizeof(binder_size_t));
                    continue;
                }
            }
            goto finish;

        default:
            err = executeCommand(cmd);
            if (err != NO_ERROR) goto finish;
            break;
        }
    }

finish:
    if (err != NO_ERROR) {
        if (acquireResult) *acquireResult = err;
        if (reply) reply->setError(err);
        mLastError = err;
    }

    return err;
}

看下talkWithDriver：

代码语言：javascript复制

status_t IPCThreadState::talkWithDriver(bool doReceive)
{
    if (mProcess->mDriverFD < 0) {
        return -EBADF;
    }

    binder_write_read bwr; // 被驱动读取的结构

    // Is the read buffer empty?
    const bool needRead = mIn.dataPosition() >= mIn.dataSize();

    // We don't want to write anything if we are still reading
    // from data left in the input buffer and the caller
    // has requested to read the next data.
    const size_t outAvail = (!doReceive || needRead) ? mOut.dataSize() : 0;

    bwr.write_size = outAvail;
    bwr.write_buffer = (uintptr_t)mOut.data();

    // This is what we'll read.
    if (doReceive && needRead) {
        bwr.read_size = mIn.dataCapacity();
        bwr.read_buffer = (uintptr_t)mIn.data();
    } else {
        bwr.read_size = 0;
        bwr.read_buffer = 0;
    }
...

    // Return immediately if there is nothing to do.
    if ((bwr.write_size == 0) && (bwr.read_size == 0)) return NO_ERROR;

    bwr.write_consumed = 0;
    bwr.read_consumed = 0;
    status_t err;
   ...
#if defined(__ANDROID__)
        if (ioctl(mProcess->mDriverFD, BINDER_WRITE_READ, &bwr) >= 0) //和驱动交互
            err = NO_ERROR;
        else
            err = -errno;
#else
        err = INVALID_OPERATION;
#endif
  ...
    } while (err == -EINTR);

    if (err >= NO_ERROR) {
        if (bwr.write_consumed > 0) {
...
            else {
                mOut.setDataSize(0);
                processPostWriteDerefs();
            }
        }
        if (bwr.read_consumed > 0) {
            mIn.setDataSize(bwr.read_consumed);
            mIn.setDataPosition(0);
        }
...
        return NO_ERROR;
    }

    return err;
}

这儿就是将请求写入binder_write_read，并和驱动进行交互。在进入驱动前，先看下binder_write_read的结构：

代码语言：javascript复制

struct binder_write_read {
    binder_size_t       write_size; /* bytes to write */
    binder_size_t       write_consumed; /* bytes consumed by driver */
    binder_uintptr_t    write_buffer;
    binder_size_t       read_size;  /* bytes to read */
    binder_size_t       read_consumed;  /* bytes consumed by driver */
    binder_uintptr_t    read_buffer;
};

在驱动中，ioctl的操作如下：

代码语言：javascript复制

static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
    int ret;
    struct binder_proc *proc = filp->private_data;
    struct binder_thread *thread;
    unsigned int size = _IOC_SIZE(cmd);
    void __user *ubuf = (void __user *)arg;

    binder_selftest_alloc(&proc->alloc);

    ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
    if (ret)
        goto err_unlocked;

    thread = binder_get_thread(proc);
    if (thread == NULL) {
        ret = -ENOMEM;
        goto err;
    }

    switch (cmd) {
    case BINDER_WRITE_READ:
        ret = binder_ioctl_write_read(filp, cmd, arg, thread);
        if (ret)
            goto err;
        break;
...

先看下binder_get_thread的操作：

代码语言：javascript复制

static struct binder_thread *binder_get_thread(struct binder_proc *proc)
{
    struct binder_thread *thread;
    struct binder_thread *new_thread;

    binder_inner_proc_lock(proc);
    thread = binder_get_thread_ilocked(proc, NULL); // 从红黑树中查找binder线程，每个用户态线程都对应一个binder_thread
    binder_inner_proc_unlock(proc);
    if (!thread) {
        new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); 
        if (new_thread == NULL)
            return NULL;
        binder_inner_proc_lock(proc);
        thread = binder_get_thread_ilocked(proc, new_thread);// 如果没找到，则将new_thread加入红黑树，并完成初始化
        binder_inner_proc_unlock(proc);
        if (thread != new_thread)
            kfree(new_thread);
    }
    return thread;
}

binder_get_thread_ilocked的逻辑如下：

代码语言：javascript复制

static struct binder_thread *binder_get_thread_ilocked(
        struct binder_proc *proc, struct binder_thread *new_thread)
{
    struct binder_thread *thread = NULL;
    struct rb_node *parent = NULL;
    struct rb_node **p = &proc->threads.rb_node;
// 遍历红黑树，key是线程的pid，也可以理解成tid
    while (*p) {
        parent = *p;
        thread = rb_entry(parent, struct binder_thread, rb_node);

        if (current->pid < thread->pid)
            p = &(*p)->rb_left;
        else if (current->pid > thread->pid)
            p = &(*p)->rb_right;
        else
            return thread;
    }
    if (!new_thread)
        return NULL;
    thread = new_thread; // 线程初始化
    binder_stats_created(BINDER_STAT_THREAD);
    thread->proc = proc;  
    thread->pid = current->pid; 
    atomic_set(&thread->tmp_ref, 0);
    init_waitqueue_head(&thread->wait);
    INIT_LIST_HEAD(&thread->todo);
    rb_link_node(&thread->rb_node, parent, p); // 加入红黑树
    rb_insert_color(&thread->rb_node, &proc->threads);
    thread->looper_need_return = true;
    thread->return_error.work.type = BINDER_WORK_RETURN_ERROR;
    thread->return_error.cmd = BR_OK;
    thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR;
    thread->reply_error.cmd = BR_OK;
    INIT_LIST_HEAD(&new_thread->waiting_thread_node);
    return thread;
}

拿到线程后，接下来就是binder_ioctl_write_read：

代码语言：javascript复制

static int binder_ioctl_write_read(struct file *filp,
                unsigned int cmd, unsigned long arg,
                struct binder_thread *thread)
{
    int ret = 0;
    struct binder_proc *proc = filp->private_data;
    unsigned int size = _IOC_SIZE(cmd);
    void __user *ubuf = (void __user *)arg;
    struct binder_write_read bwr;

    if (size != sizeof(struct binder_write_read)) {
        ret = -EINVAL;
        goto out;
    }
// 拷贝用户态数据
    if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
        ret = -EFAULT;
        goto out;
    }

    if (bwr.write_size > 0) {
        ret = binder_thread_write(proc, thread,  // 读取数据，包含了命令码等
                      bwr.write_buffer,
                      bwr.write_size,
                      &bwr.write_consumed);
        trace_binder_write_done(ret);
        if (ret < 0) {
            bwr.read_consumed = 0;
            if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
                ret = -EFAULT;
            goto out;
        }
    }
...

binder_thread_write里面就是解析命令内容了,只需要关注我们发的cmd是BC_TRANSACTION，code是PING_TRANSACTION：

代码语言：javascript复制

static int binder_thread_write(struct binder_proc *proc,
      struct binder_thread *thread,
      binder_uintptr_t binder_buffer, size_t size,
      binder_size_t *consumed)
{
  uint32_t cmd;
  struct binder_context *context = proc->context;
  void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
  void __user *ptr = buffer   *consumed;
  void __user *end = buffer   size;
...
    case BC_TRANSACTION:    
    case BC_REPLY: {
      struct binder_transaction_data tr;

      if (copy_from_user(&tr, ptr, sizeof(tr)))
        return -EFAULT;
      ptr  = sizeof(tr);
      binder_transaction(proc, thread, &tr, 
             cmd == BC_REPLY, 0);
      break;
    }
...

这儿就是读取内容，然后解析出命令，对于我们，命令是BC_TRANSACTION，然后调用binder_transaction进行操作：

代码语言：javascript复制

static void binder_transaction(struct binder_proc *proc,
             struct binder_thread *thread,
             struct binder_transaction_data *tr, int reply,
             binder_size_t extra_buffers_size)
{
if (tr->target.handle) {
      struct binder_ref *ref;

      /*
       * There must already be a strong ref
       * on this node. If so, do a strong
       * increment on the node to ensure it
       * stays alive until the transaction is
       * done.
       */
      binder_proc_lock(proc);
      ref = binder_get_ref_olocked(proc, tr->target.handle,
                 true);
      if (ref) {
        target_node = binder_get_node_refs_for_txn(
            ref->node, &target_proc,
            &return_error);
      } else {
        binder_user_error("%d:%d got transaction to invalid handlen",
              proc->pid, thread->pid);
        return_error = BR_FAILED_REPLY;
      }
      binder_proc_unlock(proc);
    } else {
// 代码较长，略去本流程走不到的部分， handle是0，因此获取的是servicemanager
      mutex_lock(&context->context_mgr_node_lock);
      target_node = context->binder_context_mgr_node;
      if (target_node)
        target_node = binder_get_node_refs_for_txn(
            target_node, &target_proc,
            &return_error);
      else
        return_error = BR_DEAD_REPLY;
      mutex_unlock(&context->context_mgr_node_lock);
      if (target_node && target_proc->pid == proc->pid) {
        binder_user_error("%d:%d got transaction to context manager from process owning itn",
              proc->pid, thread->pid);
        return_error = BR_FAILED_REPLY;
        return_error_param = -EINVAL;
        return_error_line = __LINE__;
        goto err_invalid_target_handle;
      }
    }
...
//如果是同步操作，并且当前线程有binder事务，则在找目标线程的时候就可以尝试优化了。
//优化的场景主要是为了提升并法度，如果是B线程向A线程发一个请求，在A线程处理完之前，B线程应该是阻塞的，如果A线程这时候也需要让B线程所在的binder 处理一个请求，
//这时候直接让B线程来处理就行，不需要重新开一个线程了
if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) {
      struct binder_transaction *tmp;

      tmp = thread->transaction_stack;
      if (tmp->to_thread != thread) { // 线程的事物目标线程当然需要是自己
        spin_lock(&tmp->lock);
        binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%dn",
          proc->pid, thread->pid, tmp->debug_id,
          tmp->to_proc ? tmp->to_proc->pid : 0,
          tmp->to_thread ?
          tmp->to_thread->pid : 0);
        spin_unlock(&tmp->lock);
        binder_inner_proc_unlock(proc);
        return_error = BR_FAILED_REPLY;
        return_error_param = -EPROTO;
        return_error_line = __LINE__;
        goto err_bad_call_stack;
      }
      while (tmp) {
        struct binder_thread *from; // 查找目标线程的优化，见上面注释

        spin_lock(&tmp->lock);
        from = tmp->from;
        if (from && from->proc == target_proc) {
          atomic_inc(&from->tmp_ref);
          target_thread = from;
          spin_unlock(&tmp->lock);
          break;
        }
        spin_unlock(&tmp->lock);
        tmp = tmp->from_parent;
      }
    }
...
  } else if (!(t->flags & TF_ONE_WAY)) {
    BUG_ON(t->buffer->async_transaction != 0);
    binder_inner_proc_lock(proc);
    /*
     * Defer the TRANSACTION_COMPLETE, so we don't return to
     * userspace immediately; this allows the target process to
     * immediately start processing this transaction, reducing
     * latency. We will then return the TRANSACTION_COMPLETE when
     * the target replies (or there is an error).
     */
    binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete);
    t->need_reply = 1;
    t->from_parent = thread->transaction_stack;
    thread->transaction_stack = t;
    binder_inner_proc_unlock(proc);
    if (!binder_proc_transaction(t, target_proc, target_thread)) { // 将binder事务添加到目标线程或进程的todo 队列中
      binder_inner_proc_lock(proc);
      binder_pop_transaction_ilocked(thread, t);
      binder_inner_proc_unlock(proc);
      goto err_dead_proc_or_thread;

到这里，过去servicemanager的binder流程就结束了。最后再简单看下binder_proc_transaction的内容：

代码语言：javascript复制

static bool binder_proc_transaction(struct binder_transaction *t,
            struct binder_proc *proc,
            struct binder_thread *thread)
{
  struct binder_node *node = t->buffer->target_node;
  bool oneway = !!(t->flags & TF_ONE_WAY);
  bool pending_async = false;

  BUG_ON(!node);
  binder_node_lock(node);
  if (oneway) {
    BUG_ON(thread);
    if (node->has_async_transaction)
      pending_async = true;
    else
      node->has_async_transaction = true;
  }

  binder_inner_proc_lock(proc);

  if (proc->is_dead || (thread && thread->is_dead)) {
    binder_inner_proc_unlock(proc);
    binder_node_unlock(node);
    return false;
  }

  if (!thread && !pending_async)
    thread = binder_select_thread_ilocked(proc); // 返回一个空闲线程

  if (thread)
    binder_enqueue_thread_work_ilocked(thread, &t->work); // 添加到空闲线程的队列中
  else if (!pending_async)
    binder_enqueue_work_ilocked(&t->work, &proc->todo); // 添加到目标进程的队列中
  else
    binder_enqueue_work_ilocked(&t->work, &node->async_todo); // 添加到异步队列中

  if (!pending_async)
    binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); // 唤醒目标线程

  binder_inner_proc_unlock(proc);
  binder_node_unlock(node);

  return true;
}

介绍到这里，defaultServiceManager涉及的流程就介绍完了，最后用一个流程图总结下：

binder.png

本篇总结

本篇介绍了下servicemanager proxy的获取流程，涉及了ProcessState(进程单例)， IPCThreadState(线程单例)。 binder驱动的open，mmap，ioctl部分。该流程比起其他调用流程稍微简单一些，不过对于熟悉binder 工作流程还是很有帮助的。

driver mmap proc std thread

0 人点赞