深入浅出MMC子系统

前言

本文基于内核版本4.1.15分析，随着内核版本升级，部分数据结构会发生变化，但是整体流程没有发生变化。

先附上读写eMMC大体经过，目的在于了解过程，具体细节不作过多分析。

文件系统缓存层

_submit_bh (fs/buffer.c)

文件系统层的处理最终会调用到_submit_bh函数，在这里完成将buffer_head描述的数据转换成bio描述（block I/O），传到general block layer，所以对于general block layer接收到的就是一个初始化好的bio，里面描述了需要读写数据的信息。

代码语言：javascript复制

struct buffer_head {
  unsigned long b_state;    /*  */
  struct buffer_head *b_this_page;/* circular list of page's buffers */
  struct page *b_page;    /* the page this bh is mapped to */

  sector_t b_blocknr;    /* start block number */
  size_t b_size;      /* size of mapping */
  char *b_data;      /* pointer to data within the page */

  struct block_device *b_bdev;
  bh_end_io_t *b_end_io;    /* I/O completion */
   void *b_private;    /* reserved for b_end_io */
  struct list_head b_assoc_buffers; /* associated with another mapping */
  struct address_space *b_assoc_map;  /* mapping this buffer is
               associated with */
  atomic_t b_count;    /* users using this buffer_head */
};

代码语言：javascript复制

int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
{
  struct bio *bio;
  int ret = 0;
···············省略······················
  /*
   * Only clear out a write error when rewriting
   */
  if (test_set_buffer_req(bh) && (rw & WRITE))
    clear_buffer_write_io_error(bh);

  /*
   * from here on down, it's all bio -- do the initial mapping,
   * submit_bio -> generic_make_request may further map this bio around
   */
  bio = bio_alloc(GFP_NOIO, 1);

  bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
  bio->bi_bdev = bh->b_bdev;
  bio->bi_io_vec[0].bv_page = bh->b_page;
  bio->bi_io_vec[0].bv_len = bh->b_size;
  bio->bi_io_vec[0].bv_offset = bh_offset(bh);
  bio->bi_vcnt = 1;
  bio->bi_iter.bi_size = bh->b_size;

  bio->bi_end_io = end_bio_bh_io_sync;
  bio->bi_private = bh;
  bio->bi_flags |= bio_flags;

  /* Take care of bh's that straddle the end of the device */
  guard_bio_eod(rw, bio);

  if (buffer_meta(bh))
    rw |= REQ_META;
  if (buffer_prio(bh))
    rw |= REQ_PRIO;

  bio_get(bio);
  submit_bio(rw, bio);//提交bio,进入通用块层

  if (bio_flagged(bio, BIO_EOPNOTSUPP))
    ret = -EOPNOTSUPP;

  bio_put(bio);
  return ret;
}

general block layer

submit_bio (block/blk-core.c)

该函数主要判断读还是写，然后调用generic_make_request，将bio加入请求队列中。

内核希望一次只有一个make_request_fn是活动的，否则使用堆叠设备的堆栈可能是一个问题。因此，使用current->bio_list保存由make_request_fn函数提交的请求列表。Current ->bio_list也用作一个标志，表示generic_make_request当前是否在这个任务中是活动的。如果它是NULL，那么没有活动的make_request。如果它是非null，那么make_request是活动的，并且应该在尾部添加新的请求。

代码语言：javascript复制

void generic_make_request(struct bio *bio)
{
  struct bio_list bio_list_on_stack;

  if (!generic_make_request_checks(bio))
    return;

  if (current->bio_list) {
    bio_list_add(current->bio_list, bio);
    return;
  }

  /* following loop may be a bit non-obvious, and so deserves some
   * explanation.
   * Before entering the loop, bio->bi_next is NULL (as all callers
   * ensure that) so we have a list with a single bio.
   * We pretend that we have just taken it off a longer list, so
   * we assign bio_list to a pointer to the bio_list_on_stack,
   * thus initialising the bio_list of new bios to be
   * added. ->make_request() may indeed add some more bios
   * through a recursive call to generic_make_request. If it
   * did, we find a non-NULL value in bio_list and re-enter the loop
   * from the top. In this case we really did just take the bio
   * of the top of the list (no pretending) and so remove it from
   * bio_list, and call into ->make_request() again.
   */
  BUG_ON(bio->bi_next);
  bio_list_init(&bio_list_on_stack);
  current->bio_list = &bio_list_on_stack;
  do {
    struct request_queue *q = bdev_get_queue(bio->bi_bdev);

    q->make_request_fn(q, bio);

    bio = bio_list_pop(current->bio_list);
  } while (bio);
  current->bio_list = NULL; /* deactivate */
}

make_request_fn在blk_queue_make_request函数里赋值，指向blk_queue_bio函数，blk_queue_bio函数使用电梯算法对bio尝试合并，然后调用__blk_run_queue运行队列。

随后在__blk_run_queue_uncond中调用q->request_fn(q)，request_fn在blk_init_queue函数中初始化，指向mmc_request_fn函数。而mmc_request_fn通过wake_up_process唤醒了mmc_queue_thread，该函数在mmc_init_queue函数中完成初始化赋值，随后执行mmc_queue_thread，该函数在queue.c（drivers/mmc/card），代表着从此进入了MMC子系统card层。内核4.1以后的某个版本将card层合并到core层去了，所以这里暂且把card层也当成MMC的CORE核心层去看吧。

MMC core层

在mmc_queue_thread函数，通过blk_fetch_request从mmc queue中取出请求，然后执行mq->issue_fn下发请求，mq->issue_fn在mmc_blk_alloc_req函数完成初始化，即执行mmc_blk_issue_rq下发请求。

代码语言：javascript复制

static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
{
  int ret;
  struct mmc_blk_data *md = mq->data;
  struct mmc_card *card = md->queue.card;
  struct mmc_host *host = card->host;
  unsigned long flags;
  unsigned int cmd_flags = req ? req->cmd_flags : 0;

  if (req && !mq->mqrq_prev->req)
    /* claim host only for the first request */
    mmc_get_card(card);

  ret = mmc_blk_part_switch(card, md);
  if (ret) {
    if (req) {
      blk_end_request_all(req, -EIO);
    }
    ret = 0;
    goto out;
  }

  mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
  if (cmd_flags & REQ_DISCARD) {
    /* complete ongoing async transfer before issuing discard */
    if (card->host->areq)
      mmc_blk_issue_rw_rq(mq, NULL);
    if (req->cmd_flags & REQ_SECURE)
      ret = mmc_blk_issue_secdiscard_rq(mq, req);
    else
      ret = mmc_blk_issue_discard_rq(mq, req);
  } else if (cmd_flags & REQ_FLUSH) {
    /* complete ongoing async transfer before issuing flush */
    if (card->host->areq)
      mmc_blk_issue_rw_rq(mq, NULL);
    ret = mmc_blk_issue_flush(mq, req);
  } else {
    if (!req && host->areq) {
      spin_lock_irqsave(&host->context_info.lock, flags);
      host->context_info.is_waiting_last_req = true;
      spin_unlock_irqrestore(&host->context_info.lock, flags);
    }
    ret = mmc_blk_issue_rw_rq(mq, req);
  }

out:
  if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
       (cmd_flags & MMC_REQ_SPECIAL_MASK))
    /*
     * Release host when there are no more requests
     * and after special request(discard, flush) is done.
     * In case sepecial request, there is no reentry to
     * the 'mmc_blk_issue_rq' with 'mqrq_prev->req'.
     */
    mmc_put_card(card);
  return ret;
}

暂且仅关注读写请求，通过判断request的成员cmd_flags，执行mmc_blk_issue_rw_rq下发读写请求。

经过一系列的调用，最终都会调到host->ops->request，这个十分重要，调用该函数代表着从MMC的core层进入到host层了，request定义在mmc_host_ops结构体里，host层是与平台紧密相关的，drivers/mmc/host的源文件并不是所有都会编译，所以这里还需结合你的芯片平台。

进入drivers/mmc/host，可以通过以下方法确认使用的host是哪个。

四 MMC host层

sdhci_request函数主要做的事情是判断host_flags，是否设置了SDHCI_AUTO_CMD12和SDHCI_AUTO_CMD23。

因为对于MMC读有两种方式，一种是先发送CMD23设置要读的长度（单位：block）；另一种是开放式读，直到card接收到CMD12才停止读card。所以这里对SDHCI_AUTO_CMD12的判断，目的是避免重复发送CMD12，后面有个地方会提到。

对于SDHCI_AUTO_CMD23的处理，区别就是在于使用mmc_request里的sbc还是cmd，事实上并没什么区别，他们的类型都是struct mmc_command。

代码语言：javascript复制

static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
  struct sdhci_host *host;
  int present;
  unsigned long flags;

  host = mmc_priv(mmc);

  sdhci_runtime_pm_get(host);

  /* Firstly check card presence */
  present = sdhci_do_get_cd(host);

  spin_lock_irqsave(&host->lock, flags);

  WARN_ON(host->mrq != NULL);

#ifndef SDHCI_USE_LEDS_CLASS
  sdhci_activate_led(host);
#endif

  /*
   * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
   * requests if Auto-CMD12 is enabled.
   */
  if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
    if (mrq->stop) {
      mrq->data->stop = NULL;
      mrq->stop = NULL;
    }
  }

  host->mrq = mrq;

  if (!present || host->flags & SDHCI_DEVICE_DEAD) {
    host->mrq->cmd->error = -ENOMEDIUM;
    tasklet_schedule(&host->finish_tasklet);
  } else {
    if (mrq->sbc && !(host->flags & SDHCI_AUTO_CMD23))
      sdhci_send_command(host, mrq->sbc);
    else
      sdhci_send_command(host, mrq->cmd);
  }

  mmiowb();
  spin_unlock_irqrestore(&host->lock, flags);
}

在sdhci_send_command已经是接近最底层的操作了，大部分是位运算和写寄存器操作。

代码语言：javascript复制

void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
{
  int flags;
  u32 mask;
  unsigned long timeout;

  WARN_ON(host->cmd);

  /* Wait max 10 ms */
  timeout = 10;

  mask = SDHCI_CMD_INHIBIT;
  if ((cmd->data != NULL) || (cmd->flags & MMC_RSP_BUSY))
    mask |= SDHCI_DATA_INHIBIT;

  /* We shouldn't wait for data inihibit for stop commands, even
     though they might use busy signaling */
  if (host->mrq->data && (cmd == host->mrq->data->stop))
    mask &= ~SDHCI_DATA_INHIBIT;

  while (sdhci_readl(host, SDHCI_PRESENT_STATE) & mask) {
    if (timeout == 0) {
      pr_err("%s: Controller never released "
        "inhibit bit(s).n", mmc_hostname(host->mmc));
      sdhci_dumpregs(host);
      cmd->error = -EIO;
      tasklet_schedule(&host->finish_tasklet);
      return;
    }
    timeout--;
    mdelay(1);
  }

  timeout = jiffies;
  if (!cmd->data && cmd->busy_timeout > 9000)
    timeout  = DIV_ROUND_UP(cmd->busy_timeout, 1000) * HZ   HZ;
  else
    timeout  = 10 * HZ;
  mod_timer(&host->timer, timeout);

  host->cmd = cmd;
  host->busy_handle = 0;

  sdhci_prepare_data(host, cmd);

  sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT);

  sdhci_set_transfer_mode(host, cmd);

  if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
    pr_err("%s: Unsupported response type!n",
      mmc_hostname(host->mmc));
    cmd->error = -EINVAL;
    tasklet_schedule(&host->finish_tasklet);
    return;
  }

  if (!(cmd->flags & MMC_RSP_PRESENT))
    flags = SDHCI_CMD_RESP_NONE;
  else if (cmd->flags & MMC_RSP_136)
    flags = SDHCI_CMD_RESP_LONG;
  else if (cmd->flags & MMC_RSP_BUSY)
    flags = SDHCI_CMD_RESP_SHORT_BUSY;
  else
    flags = SDHCI_CMD_RESP_SHORT;

  if (cmd->flags & MMC_RSP_CRC)
    flags |= SDHCI_CMD_CRC;
  if (cmd->flags & MMC_RSP_OPCODE)
    flags |= SDHCI_CMD_INDEX;

  /* CMD19 is special in that the Data Present Select should be set */
  if (cmd->data || cmd->opcode == MMC_SEND_TUNING_BLOCK ||
      cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
    flags |= SDHCI_CMD_DATA;

  sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
}

中断处理

mmc在注册中断是使用的中断标志是IRQF_SHARED，所以内核并不判断究竟是共享中断线上的哪个设备产生了中断，它会循环执行所有该中断线上注册的中断处理函数（即irqaction->handler函数）。因此irqaction->handler（即sdhci_irq）函数有责任识别出是否是自己的硬件设备产生了中断，然后再执行该中断处理函数。通常是通过读取该硬件设备提供的中断flag标志位进行判断。

可以看到当中断发生时，handler函数sdhci_irq首先对mmc controller中断寄存器读取，判断自己的设备是否发生了中断。

sdhci_irq主要做了三件事，清中断标志，判断是命令中断还是数据中断，这里是中断的上半部。以发生数据中断时分析，调用sdhci_data_irq处理。

代码语言：javascript复制

static irqreturn_t sdhci_irq(int irq, void *dev_id)
{
  irqreturn_t result = IRQ_NONE;
  struct sdhci_host *host = dev_id;
  u32 intmask, mask, unexpected = 0;
  int max_loops = 16;
  int cardint = 0;

  spin_lock(&host->lock);

  if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
    spin_unlock(&host->lock);
    return IRQ_NONE;
  }

  intmask = sdhci_readl(host, SDHCI_INT_STATUS);
  if (!intmask || intmask == 0xffffffff) {
    result = IRQ_NONE;
    goto out;
  }

  do {
    /* Clear selected interrupts. */
    mask = intmask & (SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
          SDHCI_INT_BUS_POWER);
    sdhci_writel(host, mask, SDHCI_INT_STATUS);

    DBG("*** %s got interrupt: 0xxn",
      mmc_hostname(host->mmc), intmask);

    if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
      u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
              SDHCI_CARD_PRESENT;

      /*
       * There is a observation on i.mx esdhc. INSERT
       * bit will be immediately set again when it gets
       * cleared, if a card is inserted. We have to mask
       * the irq to prevent interrupt storm which will
       * freeze the system. And the REMOVE gets the
       * same situation.
       *
       * More testing are needed here to ensure it works
       * for other platforms though.
       */
      host->ier &= ~(SDHCI_INT_CARD_INSERT |
               SDHCI_INT_CARD_REMOVE);
      host->ier |= present ? SDHCI_INT_CARD_REMOVE :
                 SDHCI_INT_CARD_INSERT;
      sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
      sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);

      sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
             SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);

      host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
                   SDHCI_INT_CARD_REMOVE);
      result = IRQ_WAKE_THREAD;
    }

    if (intmask & SDHCI_INT_CMD_MASK)
      sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK,
              &intmask);

    if (intmask & SDHCI_INT_DATA_MASK)
      sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);

    if (intmask & SDHCI_INT_BUS_POWER)
      pr_err("%s: Card is consuming too much power!n",
        mmc_hostname(host->mmc));

    if (intmask & SDHCI_INT_RETUNE)
      mmc_retune_needed(host->mmc);

    if (intmask & SDHCI_INT_CARD_INT) {
      if (host->mmc->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD) {
        sdhci_enable_sdio_irq_nolock(host, false);
        host->thread_isr |= SDHCI_INT_CARD_INT;
        result = IRQ_WAKE_THREAD;
      } else {
        cardint = 1;
      }
    }

    intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
           SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
           SDHCI_INT_ERROR | SDHCI_INT_BUS_POWER |
           SDHCI_INT_CARD_INT);

    if (intmask) {
      unexpected |= intmask;
      sdhci_writel(host, intmask, SDHCI_INT_STATUS);
    }

    if (result == IRQ_NONE)
      result = IRQ_HANDLED;

    intmask = sdhci_readl(host, SDHCI_INT_STATUS);
  } while (intmask && --max_loops);
out:
  spin_unlock(&host->lock);

  if (unexpected) {
    pr_err("%s: Unexpected interrupt 0xx.n",
         mmc_hostname(host->mmc), unexpected);
    sdhci_dumpregs(host);
  }

  if (cardint && host->mmc->sdio_irqs)
    mmc_signal_sdio_irq(host->mmc);

  return result;
}

sdhci_data_irq

当上半部完成时，调用sdhci_finish_data(host)，随后执行tasklet_schedule(&host->finish_tasklet)，待处理器调度执行mmc的tasklet。

代码语言：javascript复制

static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
{
··············省略·················
    if (intmask & SDHCI_INT_DATA_END) {
      if (host->cmd) {
        /*
         * Data managed to finish before the
         * command completed. Make sure we do
         * things in the proper order.
         */
        host->data_early = 1;
      } else {
        sdhci_finish_data(host);
      }
    }
  }
}

sdhci_tasklet_finish

该函数在sdhci_add_host初始化，当中断下半部任务完成后，也就是mmc驱动程序完成了一个请求的处理，随后执行mmc_request_done。

代码语言：javascript复制

static void sdhci_tasklet_finish(unsigned long param)
{
  struct sdhci_host *host;
  unsigned long flags;
  struct mmc_request *mrq;

  host = (struct sdhci_host*)param;

  spin_lock_irqsave(&host->lock, flags);

        /*
         * If this tasklet gets rescheduled while running, it will
         * be run again afterwards but without any active request.
         */
  if (!host->mrq) {
    spin_unlock_irqrestore(&host->lock, flags);
    return;
  }

  del_timer(&host->timer);

  mrq = host->mrq;

  /*
   * The controller needs a reset of internal state machines
   * upon error conditions.
   */
  if (!(host->flags & SDHCI_DEVICE_DEAD) &&
      ((mrq->cmd && mrq->cmd->error) ||
       (mrq->sbc && mrq->sbc->error) ||
       (mrq->data && ((mrq->data->error && !mrq->data->stop) ||
          (mrq->data->stop && mrq->data->stop->error))) ||
       (host->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) {

    /* Some controllers need this kick or reset won't work here */
    if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
      /* This is to force an update */
      host->ops->set_clock(host, host->clock);

    /* Spec says we should do both at the same time, but Ricoh
       controllers do not like that. */
    sdhci_do_reset(host, SDHCI_RESET_CMD);
    sdhci_do_reset(host, SDHCI_RESET_DATA);
  }

  host->mrq = NULL;
  host->cmd = NULL;
  host->data = NULL;

#ifndef SDHCI_USE_LEDS_CLASS
  sdhci_deactivate_led(host);
#endif

  mmiowb();
  spin_unlock_irqrestore(&host->lock, flags);

  mmc_request_done(host->mmc, mrq);
  sdhci_runtime_pm_put(host);
}

wake_up_interruptible

最后通过mrq->done，即mmc_wait_data_done（在__mmc_start_data_req函数赋值），唤醒mmc上下文，作为回调传递给主控制器驱动程序到此完成一个闭环。

代码语言：javascript复制

static void mmc_wait_data_done(struct mmc_request *mrq)
{
  struct mmc_context_info *context_info = &mrq->host->context_info;

  context_info->is_done_rcv = true;
  wake_up_interruptible(&context_info->wait);
}

六总结

忽略掉大部分细节，提炼出主要流程，用一张图概括MMC读写的大概流程。

还有很多很多细节无法全部展示，只能身处那个情景才能分析，对于MMC驱动，内核已经实现，做产品的更多的关注host层以及card的初始化（由于篇幅过长，本文暂未介绍）。

对于mmc controller，对于做产品的来说其实是一个黑盒，eMMC内部也是有firmware去控制的，更多的细节eMMC或者SD卡厂商才知道。但是厂商也是根据JEDEC标准去设计的，他们的芯片手册类似但更简洁。

http 命令行工具 android

0 人点赞