step 0阶段,创建一个列表
// Step 0: prepare.
LoadTaskList load_tasks;
for (size_t i = 0; i < library_names_count; ++i) {
const char* name = library_names[i];
LD_LOG(kLogDlopen,"[linker.cpp] step 1 ,so_name",name);
load_tasks.push_back(LoadTask::create(name, start_with, ns, &readers_map));
}
// If soinfos array is null allocate one on stack.
// The array is needed in case of failure; for example
// when library_names[] = {libone.so, libtwo.so} and libone.so
// is loaded correctly but libtwo.so failed for some reason.
// In this case libone.so should be unloaded on return.
// See also implementation of failure_guard below.
if (soinfos == nullptr) {
size_t soinfos_size = sizeof(soinfo*)*library_names_count;
soinfos = reinterpret_cast<soinfo**>(alloca(soinfos_size));
memset(soinfos, 0, soinfos_size);
}
// list of libraries to link - see step 2.
size_t soinfos_count = 0;
auto scope_guard = android::base::make_scope_guard([&]() {
for (LoadTask* t : load_tasks) {
LD_LOG(kLogDlopen,"[linker.cpp] before call deleter %s",t->get_name());
LoadTask::deleter(t);
}
});
auto failure_guard = android::base::make_scope_guard([&]() {
// Housekeeping
soinfo_unload(soinfos, soinfos_count);
});
ZipArchiveCache zip_archive_cache;
我们跟踪创建Loadtask过程。
该函数调用LoadTask的重载函数。
static LoadTask* create(const char* name, soinfo* needed_by, android_namespace_t* start_from, std::unordered_map<const soinfo*, ElfReader>* readers_map) { LoadTask* ptr = TypeBasedAllocator<LoadTask>::alloc(); return new (ptr) LoadTask(name, needed_by, start_from, readers_map); }
该函数完成各个成员的赋值,简单看下哪些成员被赋值,名称,needed_by ,elf_reader_map_ , start_from.
LoadTask(const char* name,
soinfo* needed_by,
android_namespace_t* start_from,
std::unordered_map<const soinfo*, ElfReader>* readers_map)
: name_(name), needed_by_(needed_by), si_(nullptr),
fd_(-1), close_fd_(false), file_offset_(0), elf_readers_map_(readers_map),
is_dt_needed_(false), start_from_(start_from) {}
追溯needed_by 来历。
查看__builtin_return_address() 的说明 , 返回调用函数的地址,g++的内建函数 ,可以获取到调用函数时的ret地址。
在该函数将其转换为soinfo 。
find_containing_library 返回调用函数所在模块的soinfo
soinfo* find_containing_library(const void* p) { ElfW(Addr) address = reinterpret_cast<ElfW(Addr)>(p); for (soinfo* si = solist_get_head(); si != nullptr; si = si->next) { if (address >= si->base && address - si->base < si->size) { return si; } } return nullptr; }
Step 1 拓展列表来包含所有DT_NEEDED 库
该步骤给列表中增加so。
// Step 1: expand the list of load_tasks to include
// all DT_NEEDED libraries (do not load them just yet)
for (size_t i = 0; i<load_tasks.size(); ++i) {
LoadTask* task = load_tasks[i];
soinfo* needed_by = task->get_needed_by();
bool is_dt_needed = needed_by != nullptr && (needed_by != start_with || add_as_children);
task->set_extinfo(is_dt_needed ? nullptr : extinfo);
task->set_dt_needed(is_dt_needed);
/*
try to find the load.Note: start from the namespace that is stored in the LoadTask. This namespace is different from the current namespace when the LoadTask is for a transitive dependency and the lib that created the LoadTask is not found in the current namespace but in one of the linked namespace.
*/
if (!find_library_internal(const_cast<android_namespace_t*>(task->get_start_from()),
task,
&zip_archive_cache,
&load_tasks,
rtld_flags,
search_linked_namespaces || is_dt_needed)) {
return false;
}
soinfo* si = task->get_soinfo();
if (is_dt_needed) {
needed_by->add_child(si);
if (si->is_linked()) {
si->increment_ref_count();
}
}
// When ld_preloads is not null, the first
// ld_preloads_count libs are in fact ld_preloads.
if (ld_preloads != nullptr && soinfos_count < ld_preloads_count) {
ld_preloads->push_back(si);
}
if (soinfos_count < library_names_count) {
soinfos[soinfos_count++] = si;
}
}
筛选要载入的load_list 并解析elf文件的so
// Step 2: Load libraries in random order (see b/24047022)
LoadTaskList load_list;
for (auto&& task : load_tasks) {
soinfo* si = task->get_soinfo();
auto pred = [&](const LoadTask* t) {
return t->get_soinfo() == si;
};
if (!si->is_linked() &&
std::find_if(load_list.begin(), load_list.end(), pred) == load_list.end() ) {
load_list.push_back(task);
}
}
shuffle(&load_list);
for (auto&& task : load_list) {
//*** 看看这个loda函数
if (!task->load()) {
return false;
}
}
这个elf_reader 就是对应so用来解析elf文件的。
bool load() {
ElfReader& elf_reader = get_elf_reader();
//在elf_read.Load 下完成elf文件空间的分配和解析 ,还不能算载入了so。
if (!elf_reader.Load(extinfo_)) {
return false;
}
si_->base = elf_reader.load_start();
si_->size = elf_reader.load_size();
si_->set_mapped_by_caller(elf_reader.is_mapped_by_caller());
si_->load_bias = elf_reader.load_bias();
LD_LOG(kLogDlopen,"[linker.cpp] Step2 load_bias %p ",si_->load_bias);
si_->phnum = elf_reader.phdr_count();
si_->phdr = elf_reader.loaded_phdr();
return true;
}
Step3
Step3过程解析ELF文件获取相应字段的值
预连接所有DT_NEEDED 库, 逻辑简单,没有被连接过,则调用prelink_image()。
// Step 3: pre-link all DT_NEEDED libraries in breadth first order.
for (auto&& task : load_tasks) {
soinfo* si = task->get_soinfo();
if (!si->is_linked() && !si->prelink_image()) {
return false;
}
}
prelink_image,读取各类型的段 , 依次看全太肝了 , 后期在这里查字段。
Step4
略
linkerStep5-linkerStep6
// Step 5: link libraries that are not destined to this namespace.
// Do this by recursively calling find_libraries on the namespace where the lib
// was found during Step 1.
for (auto&& task : load_tasks) {
soinfo* si = task->get_soinfo();
if (si->get_primary_namespace() != ns) {
const char* name = task->get_name();
if (find_libraries(si->get_primary_namespace(), task->get_needed_by(), &name, 1,
nullptr /* soinfos */, nullptr /* ld_preloads */, 0 /* ld_preload_count */,
rtld_flags, nullptr /* extinfo */, false /* add_as_children */,
false /* search_linked_namespaces */, readers_map, namespaces)) {
// If this lib is directly needed by one of the libs in this namespace,
// then increment the count
soinfo* needed_by = task->get_needed_by();
if (needed_by != nullptr && needed_by->get_primary_namespace() == ns && si->is_linked()) {
si->increment_ref_count();
}
} else {
return false;
}
}
}
//连接这个命名空间的库文件
// Step 6: link libraries in this namespace
soinfo_list_t local_group;
walk_dependencies_tree(
(start_with != nullptr && add_as_children) ? &start_with : soinfos,
(start_with != nullptr && add_as_children) ? 1 : soinfos_count,
[&] (soinfo* si) {
if (ns->is_accessible(si)) {
local_group.push_back(si);
return kWalkContinue;
} else {
return kWalkSkip;
}
});
soinfo_list_t global_group = ns->get_global_group();
bool linked = local_group.visit([&](soinfo* si) {
if (!si->is_linked()) {
LD_LOG(kLogDlopen,"so %s is not linked , now try to link ",si->get_soname());
if (!si->link_image(global_group, local_group, extinfo) ||
!get_cfi_shadow()->AfterLoad(si, solist_get_head())) {
return false;
}
}
return true;
});
if (linked) {
local_group.for_each([](soinfo* si) {
LD_LOG(kLogDlopen,"travser local_group list %s" , si->get_soname());
if (!si->is_linked()) {
si->set_linked();
}
});
failure_guard.Disable();
}
return linked;
涉及到重定位的镜像连接。
bool soinfo::link_image(const soinfo_list_t& global_group, const soinfo_list_t& local_group,
const android_dlextinfo* extinfo) {
// 这里仅保留了大家比较熟悉的类型。常规都是rel。
#if defined(USE_RELA)
if (rela_ != nullptr) {
DEBUG("[ relocating %s ]", get_realpath());
if (!relocate(version_tracker,
plain_reloc_iterator(rela_, rela_count_), global_group, local_group)) {
return false;
}
}
if (plt_rela_ != nullptr) {
DEBUG("[ relocating %s plt ]", get_realpath());
if (!relocate(version_tracker,
plain_reloc_iterator(plt_rela_, plt_rela_count_), global_group, local_group)) {
return false;
}
}
#else
//把重心放在这里
if ( != nullptr) {
DEBUG("[ relocating %s ]", get_realpath());
if (!relocate(version_tracker,
plain_reloc_iterator(rel_, rel_count_), global_group, local_group)) {
return false;
}
}
if (plt_rel_ != nullptr) {
LD_LOG(kLogDlopen,"[ relocating %s plt ]", get_realpath());
if (!relocate(version_tracker,
plain_reloc_iterator(plt_rel_, plt_rel_count_), global_group, local_group)) {
return false;
}
}
#endif
在Step3中找这rel_ 和plt_rel_ 的赋值,分别为DT_JMPREL 和 DT_REL 对应.rel.plt 和.rel.dyn段 。
//该记录保留仅与重定位相关的PLT表的地址
/* ... */
case DT_JMPREL:
#if defined(USE_RELA)
plt_rela_ = reinterpret_cast<ElfW(Rela)*>(load_bias + d->d_un.d_ptr);
#else
plt_rel_ = reinterpret_cast<ElfW(Rel)*>(load_bias + d->d_un.d_ptr);
#endif
break;
//重定位相关plt表的大小
case DT_PLTRELSZ:
#if defined(USE_RELA)
plt_rela_count_ = d->d_un.d_val / sizeof(ElfW(Rela));
#else
plt_rel_count_ = d->d_un.d_val / sizeof(ElfW(Rel));
#endif
break;
/* ... */
case DT_REL:
rel_ = reinterpret_cast<ElfW(Rel)*>(load_bias + d->d_un.d_ptr);
break;
case DT_RELSZ:
rel_count_ = d->d_un.d_val / sizeof(ElfW(Rel));
break;
参数中有 plain_reloc_iterator ,传入rel的迭代器 。
relocate(version_tracker,plain_reloc_iterator(rel_, rel_count_), global_group, local_group)
Elf32_Rel的结构体。
typedef struct
{
Elf32_Addr r_offset; /* Address */
Elf32_Word r_info; /* Relocation type and symbol index */
} Elf32_Rel;
对于DT_REL,该函数获取到rel.dyn 段, 到这里重定位的内容就很简单了。
template<typename ElfRelIteratorT>
bool soinfo::relocate(const VersionTracker& version_tracker, ElfRelIteratorT&& rel_iterator,
const soinfo_list_t& global_group, const soinfo_list_t& local_group){
for (size_t idx = 0; rel_iterator.has_next(); ++idx) {
const auto rel = rel_iterator.next();
if (rel == nullptr) {
return false;
}
// 获取rel 的类型 , 和符号索引
ElfW(Word) type = ELFW(R_TYPE)(rel->r_info);
ElfW(Word) sym = ELFW(R_SYM)(rel->r_info);
//rel 内存地址
ElfW(Addr) reloc = static_cast<ElfW(Addr)>(rel->r_offset + load_bias);
ElfW(Addr) sym_addr = 0;
const char* sym_name = nullptr;
ElfW(Addr) addend = get_addend(rel, reloc);
DEBUG("Processing \"%s\" relocation at index %zd", get_realpath(), idx);
if (type == R_GENERIC_NONE) {
continue;
}
const ElfW(Sym)* s = nullptr;
soinfo* lsi = nullptr;
if (sym != 0) {
//从符号表中获取到符号名称
sym_name = get_string(symtab_[sym].st_name);
const version_info* vi = nullptr;
if (!lookup_version_info(version_tracker, sym, sym_name, &vi)) {
return false;
}
// 这个函数可以解释LD_PRELOAD 对于相同符号名称linker的处理方式。
if (!soinfo_do_lookup(this, sym_name, vi, &lsi, global_group, local_group, &s)) {
return false;
}
if (s == nullptr) {
// We only allow an undefined symbol if this is a weak reference...
s = &symtab_[sym];
if (ELF_ST_BIND(s->st_info) != STB_WEAK) {
DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, get_realpath());
return false;
}
#elif defined(__arm__)
case R_ARM_ABS32:
#endif
/*
* The sym_addr was initialized to be zero above, or the relocation
* code below does not care about value of sym_addr.
* No need to do anything.
*/
break;
#if defined(__x86_64__)
case R_X86_64_PC32:
sym_addr = reloc;
break;
#elif defined(__i386__)
case R_386_PC32:
sym_addr = reloc;
break;
#endif
default:
DL_ERR("unknown weak reloc type %d @ %p (%zu)", type, rel, idx);
return false;
}
} else { // We got a definition.
/*
.......
*/
}
count_relocation(kRelocSymbol);
}
// 重定位运算。
switch (type) {
case R_GENERIC_JUMP_SLOT:
count_relocation(kRelocAbsolute);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO JMP_SLOT %16p <- %16p %s\n",
reinterpret_cast<void*>(reloc),
reinterpret_cast<void*>(sym_addr + addend), sym_name);
*reinterpret_cast<ElfW(Addr)*>(reloc) = (sym_addr + addend);
break;
case R_GENERIC_GLOB_DAT:
count_relocation(kRelocAbsolute);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO GLOB_DAT %16p <- %16p %s\n",
reinterpret_cast<void*>(reloc),
reinterpret_cast<void*>(sym_addr + addend), sym_name);
*reinterpret_cast<ElfW(Addr)*>(reloc) = (sym_addr + addend);
break;
case R_GENERIC_RELATIVE:
count_relocation(kRelocRelative);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO RELATIVE %16p <- %16p\n",
reinterpret_cast<void*>(reloc),
reinterpret_cast<void*>(load_bias + addend));
*reinterpret_cast<ElfW(Addr)*>(reloc) = (load_bias + addend);
break;
case R_GENERIC_IRELATIVE:
count_relocation(kRelocRelative);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO IRELATIVE %16p <- %16p\n",
reinterpret_cast<void*>(reloc),
reinterpret_cast<void*>(load_bias + addend));
{
/* ... */
/* ... */
*reinterpret_cast<ElfW(Addr)*>(reloc) = ifunc_addr;
}
break;
#elif defined(__arm__)
case R_ARM_ABS32:
count_relocation(kRelocAbsolute);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO ABS %08x <- %08x %s", reloc, sym_addr, sym_name);
*reinterpret_cast<ElfW(Addr)*>(reloc) += sym_addr;
break;
case R_ARM_REL32:
count_relocation(kRelocRelative);
MARK(rel->r_offset);
TRACE_TYPE(RELO, "RELO REL32 %08x <- %08x - %08x %s",
reloc, sym_addr, rel->r_offset, sym_name);
*reinterpret_cast<ElfW(Addr)*>(reloc) += sym_addr - rel->r_offset;
break;
case R_ARM_COPY:
/*
* ET_EXEC is not supported so this should not happen.
*
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044d/IHI0044D_aaelf.pdf
*
* Section 4.6.1.10 "Dynamic relocations"
* R_ARM_COPY may only appear in executable objects where e_type is
* set to ET_EXEC.
*/
DL_ERR("%s R_ARM_COPY relocations are not supported", get_realpath());
return false;
#endif
default:
DL_ERR("unknown reloc type %d @ %p (%zu)", type, rel, idx);
return false;
}
}
return true;
}
- 追溯addend() ,如果rel类型为R_ARM_RELATIVE 或者R_ARM_IRELATIVE 则返回第二个参数,否则返回0.
static ElfW(Addr) get_addend(ElfW(Rel)* rel, ElfW(Addr) reloc_addr) {
if (ELFW(R_TYPE)(rel->r_info) == R_GENERIC_RELATIVE ||
ELFW(R_TYPE)(rel->r_info) == R_GENERIC_IRELATIVE) {
return *reinterpret_cast<ElfW(Addr)*>(reloc_addr);
}
return 0;
}
- load_bias_ 的计算方法
load_bias_ = reinterpret_cast<uint8_t*>(start) - addr;
start: =
start = ReserveAligned(mmap_hint, load_size_, kLibraryAlignment);
addr : = 0
size_t phdr_table_get_load_size(const ElfW(Phdr)* phdr_table, size_t phdr_count, ElfW(Addr)* out_min_vaddr, ElfW(Addr)* out_max_vaddr){ ...... ElfW(Addr) min_vaddr = 0xffffffffffffffff; ElfW(Addr) max_vaddr = 0; //遍历程序头表所有的字段 for (size_t i = 0; i < phdr_count; ++i) { const ElfW(Phdr)* phdr = &phdr_table[i]; if (phdr->p_type != PT_LOAD) { continue; } found_pt_load = true; if (phdr->p_vaddr < min_vaddr) { min_vaddr = phdr->p_vaddr; } if (phdr->p_vaddr + phdr->p_memsz > max_vaddr) { max_vaddr = phdr->p_vaddr + phdr->p_memsz; } } if (!found_pt_load) { min_vaddr = 0; } min_vaddr = PAGE_START(min_vaddr); max_vaddr = PAGE_END(max_vaddr); ...... if (out_max_vaddr != nullptr) { *out_max_vaddr = max_vaddr; } ...... } load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr); uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);
// 如果rel类型为R_ARM_RELATIVE 或者R_ARM_IRELATIVE 则返回第二个参数指向的值。 static ElfW(Addr) get_addend(ElfW(Rel)* rel, ElfW(Addr) reloc_addr) { if (ELFW(R_TYPE)(rel->r_info) == R_GENERIC_RELATIVE || ELFW(R_TYPE)(rel->r_info) == R_GENERIC_IRELATIVE) { return *reinterpret_cast<ElfW(Addr)*>(reloc_addr); } return 0; }
重定位的类型:
#define R_GENERIC_JUMP_SLOT R_ARM_JMP_SLOT //用于提供延迟绑定。此重定位偏移成员可指定过程链接表项的位置。运行时链接程序会修改过程链接表项,以将控制权转移到指定的符号地址 #define R_GENERIC_GLOB_DAT R_ARM_GLOB_DAT //用于将 GOT 项设置为所指定符号的地址 #define R_GENERIC_RELATIVE R_ARM_RELATIVE //此类型的重定位项必须为符号表索引指定值零,此重定位偏移成员可指定共享库中包含表示相对地址的值的位置
重定位运算方法:
#define R_GENERIC_JUMP_SLOT R_ARM_JMP_SLOT // #define R_GENERIC_GLOB_DAT R_ARM_GLOB_DAT // 修正offset为符号地址。 #define R_GENERIC_RELATIVE R_ARM_RELATIVE // 重定位offset为基地址 + 偏移
验证重定位过程。
以类型为R_ARM_RELATIVE 为例。
offset type 0001a610 00000017 R_ARM_RELATIVE
偏移为1a610 类型为R_ARM_RELATIVE ,该偏移的值为19758 , 则修正 base+1a610 的地址的值为 19758 + base。
重定位结果。
libnative-lib.so RELO RELATIVE 0xcfa22610 <-0xcfa21758 load_bias 0xcfa08000 addend 0x19758 si->base 0xcfa08000
但是重定位算法解决了,但是对于不同的类型含义是什么?? 可以确定R_ARM_RELATIVE 对应内部符号重定位 , R_ARM_GLOB_DAT,R_ARM_JMP_SLOT对应外部符号 ,其中R_ARM_GLOB_DAT和R_ARM_JMP_SLOT的差别是??
连接过程结束后,在依次返回到上层函数,就可以明白一个so的加载过程为 分配空间-> 解析elf -> 重定位空间 -> 调用init -> 存在则调用Jni_Onload .
装载Segment
bionic/linker/linker_phdr.cpp ElfReader::LoadSegments
https://docs.oracle.com/cd/E19120-01/open.solaris/819-0690/chapter6-26/index.html 重定位类型
http://nicephil.blinkenshell.org/my_book/ch07s04.html ELF 相关知识
0 条评论