zygisk注入原理 – Azureusの小屋

Magisk原理

一切的一切先从Magisk出发。

都知道，安卓的root权限是通过一个su的可执行文件来获取的，但是手机厂商一般不会把su给你，就会陷入一个死循环。

之前的root方式

利用系统漏洞提权注入。这个方式很多，早期版本的系统用的多。现在系统越来越安全基本难以实现。
定制内核修改system分区的方式。
厂商会定制开发版本自带root权限。在于本身ASOP就支持userdebug版本模式去编译的。

现在的root更多是systemless root，即不修改system文件夹，去应对高版本的系统检查，magisk就是实现的一种方式。

magisk刷入过程

刷入patch_boot.img开机即可

安装magisk apk

提取boot.img

制作修改的patch_boot.img

系统在正常加载的时候，BootLoader会决定加载boot.img。然后执行里面的init程序，程序会帮忙实现挂在文件系统。magisk就是通过替换这个init程序，使得magisk能够在init实现任意事情。

https://github.com/topjohnwu/Magisk

https://cs.android.com/android/platform/superproject/+/android-latest-release:system/core/init/first_stage_init.cpp

上面的链接安卓版本疑似有点高了（

    const char* path = "/system/bin/init";
    const char* args[] = {path, "selinux_setup", nullptr};
    auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
    dup2(fd, STDOUT_FILENO);
    dup2(fd, STDERR_FILENO);
    close(fd);
    execv(path, const_cast<char**>(args));

    // execv() only returns if an error happened, in which case we
    // panic and never fall through this conditional.
    PLOG(FATAL) << "execv(\"" << path << "\") failed";

    return 1;

    const char* path = "/system/bin/init";
    const char* args[] = {path, "selinux_setup", nullptr};
    auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
    dup2(fd, STDOUT_FILENO);
    dup2(fd, STDERR_FILENO);
    close(fd);
    execv(path, const_cast<char**>(args));

    // execv() only returns if an error happened, in which case we
    // panic and never fall through this conditional.
    PLOG(FATAL) << "execv(\"" << path << "\") failed";

    return 1;

安卓的的init程序主要做了两件事情：

调用FirstStageMain进行一堆文件目录的挂载，挂载 /system ，/vendor ，/product 和 /metadata等
调用exec去执行/system/bin/init，参数是selinux_setup，进入二阶段

回看magisk的init：

void start() override {
    prepare();      
    exec_init();    // 执行原本的init，不是/system/bin/init
}

void start() override {
    prepare();      
    exec_init();    // 执行原本的init，不是/system/bin/init
}

执行的是FirstStageInit::start函数：

void FirstStageInit::prepare() {
    xmkdirs("/data", 0755);
    xmount("tmpfs", "/data", "tmpfs", 0, "mode=755");   // 挂载tmpfs到/data
    cp_afc("/init" /* magiskinit */, REDIR_PATH);       

    restore_ramdisk_init();  // "/.backup"保存原本的init，把它拷贝到"/init"
    {
        auto init = mmap_data("/init", true);           // init文件加载到内存中
        // Redirect original init to magiskinit
        init.patch({ make_pair(INIT_PATH, REDIR_PATH) });     // 在init二进制文件中找到"/system/bin/init"字符串，修改成为"/data/magiskinit"
    }

    // Copy files to tmpfs
    cp_afc(".backup", "/data/.backup");
    cp_afc("overlay.d", "/data/overlay.d");
}

void FirstStageInit::prepare() {
    xmkdirs("/data", 0755);
    xmount("tmpfs", "/data", "tmpfs", 0, "mode=755");   // 挂载tmpfs到/data
    cp_afc("/init" /* magiskinit */, REDIR_PATH);       

    restore_ramdisk_init();  // "/.backup"保存原本的init，把它拷贝到"/init"
    {
        auto init = mmap_data("/init", true);           // init文件加载到内存中
        // Redirect original init to magiskinit
        init.patch({ make_pair(INIT_PATH, REDIR_PATH) });     // 在init二进制文件中找到"/system/bin/init"字符串，修改成为"/data/magiskinit"
    }

    // Copy files to tmpfs
    cp_afc(".backup", "/data/.backup");
    cp_afc("overlay.d", "/data/overlay.d");
}

主要通过去替换init中的字符串，从而使得原本的init变成执行/data/magiskinit

然后看magisk的源码：

虽然magisk后面用rust重构了，但是大体过程没太大变化：

    fn start(&mut self) -> LoggedResult<()> {
         //一些检查挂载..

        setup_klog();

        self.config.init();

        let argv1 = unsafe { *self.argv.offset(1) };
        if !argv1.is_null() && unsafe { CStr::from_ptr(argv1) == c"selinux_setup" } {
            self.second_stage();  //根据传参
        } else if self.config.skip_initramfs {
            self.legacy_system_as_root();
        } else if self.config.force_normal_boot {
            self.first_stage();
        } else if cstr!("/sbin/recovery").exists() || cstr!("/system/bin/recovery").exists() {
            self.recovery();
        } else if self.check_two_stage() {
            self.first_stage();
        } else {
            self.rootfs();
        }

        // Finally execute the original init
        self.exec_init();

        Ok(())
    }

    fn start(&mut self) -> LoggedResult<()> {
         //一些检查挂载..

        setup_klog();

        self.config.init();

        let argv1 = unsafe { *self.argv.offset(1) };
        if !argv1.is_null() && unsafe { CStr::from_ptr(argv1) == c"selinux_setup" } {
            self.second_stage();  //根据传参
        } else if self.config.skip_initramfs {
            self.legacy_system_as_root();
        } else if self.config.force_normal_boot {
            self.first_stage();
        } else if cstr!("/sbin/recovery").exists() || cstr!("/system/bin/recovery").exists() {
            self.recovery();
        } else if self.check_two_stage() {
            self.first_stage();
        } else {
            self.rootfs();
        }

        // Finally execute the original init
        self.exec_init();

        Ok(())
    }

接着执行second_stage:

    fn second_stage(&mut self) {
        // ...
        if is_rootfs() {
            // We are still on rootfs, so make sure we will execute the init of the 2nd stage
            let init_path = cstr!("/init");
            init_path.remove().ok();
            init_path
                .create_symlink_to(cstr!("/system/bin/init"))
                .log_ok();
            self.patch_rw_root();
        } else {
            self.patch_ro_root();
        }
    }

    fn second_stage(&mut self) {
        // ...
        if is_rootfs() {
            // We are still on rootfs, so make sure we will execute the init of the 2nd stage
            let init_path = cstr!("/init");
            init_path.remove().ok();
            init_path
                .create_symlink_to(cstr!("/system/bin/init"))
                .log_ok();
            self.patch_rw_root();
        } else {
            self.patch_ro_root();
        }
    }

现在还没root，就去执行patch_ro_root

void MagiskInit::patch_ro_root() noexcept {
    //....
    setup_tmp(tmp_dir.data()); // 挂载tmofs到sbin，这里有su
    chdir(tmp_dir.data());    //切换到 /sbin

    if (tmp_dir == "/sbin") {
        // Recreate original sbin structure
        xmkdir(MIRRDIR, 0755);    // 创建目录".magisk" "/mirror" "/system_root"
        xmount("/", MIRRDIR, nullptr, MS_BIND, nullptr);
        recreate_sbin(MIRRDIR "/sbin", true);
        xumount2(MIRRDIR, MNT_DETACH);
    } else {
         //....
    }

    xrename("overlay.d", ROOTOVL);

    extern bool avd_hack;
    // Handle avd hack
    if (avd_hack) {
        int src = xopen("/init", O_RDONLY | O_CLOEXEC);
        mmap_data init("/init");
        // Force disable early mount on original init
        for (size_t off : init.patch("android,fstab", "xxx")) {
            LOGD("Patch @ %08zX [android,fstab] -> [xxx]\n", off);
        }
        int dest = xopen(ROOTOVL "/init", O_CREAT | O_WRONLY | O_CLOEXEC, 0);
        xwrite(dest, init.data(), init.size());
        fclone_attr(src, dest);
        close(src);
        close(dest);
    }

    load_overlay_rc(ROOTOVL);
    if (access(ROOTOVL "/sbin", F_OK) == 0) {
        // Move files in overlay.d/sbin into tmp_dir
        mv_path(ROOTOVL "/sbin", ".");
    }

    // Patch init.rc
    bool p;
    if (access(NEW_INITRC_DIR "/" INIT_RC, F_OK) == 0) {
        // Android 11's new init.rc
        p = patch_rc_scripts(NEW_INITRC_DIR, tmp_dir.data(), false);
    } else {
        p = patch_rc_scripts("/", tmp_dir.data(), false);
    }
    if (p) patch_fissiond(tmp_dir.data());

    // Extract overlay archives
    extract_files(false);
    //...
}

void MagiskInit::patch_ro_root() noexcept {
    //....
    setup_tmp(tmp_dir.data()); // 挂载tmofs到sbin，这里有su
    chdir(tmp_dir.data());    //切换到 /sbin

    if (tmp_dir == "/sbin") {
        // Recreate original sbin structure
        xmkdir(MIRRDIR, 0755);    // 创建目录".magisk" "/mirror" "/system_root"
        xmount("/", MIRRDIR, nullptr, MS_BIND, nullptr);
        recreate_sbin(MIRRDIR "/sbin", true);
        xumount2(MIRRDIR, MNT_DETACH);
    } else {
         //....
    }

    xrename("overlay.d", ROOTOVL);

    extern bool avd_hack;
    // Handle avd hack
    if (avd_hack) {
        int src = xopen("/init", O_RDONLY | O_CLOEXEC);
        mmap_data init("/init");
        // Force disable early mount on original init
        for (size_t off : init.patch("android,fstab", "xxx")) {
            LOGD("Patch @ %08zX [android,fstab] -> [xxx]\n", off);
        }
        int dest = xopen(ROOTOVL "/init", O_CREAT | O_WRONLY | O_CLOEXEC, 0);
        xwrite(dest, init.data(), init.size());
        fclone_attr(src, dest);
        close(src);
        close(dest);
    }

    load_overlay_rc(ROOTOVL);
    if (access(ROOTOVL "/sbin", F_OK) == 0) {
        // Move files in overlay.d/sbin into tmp_dir
        mv_path(ROOTOVL "/sbin", ".");
    }

    // Patch init.rc
    bool p;
    if (access(NEW_INITRC_DIR "/" INIT_RC, F_OK) == 0) {
        // Android 11's new init.rc
        p = patch_rc_scripts(NEW_INITRC_DIR, tmp_dir.data(), false);
    } else {
        p = patch_rc_scripts("/", tmp_dir.data(), false);
    }
    if (p) patch_fissiond(tmp_dir.data());

    // Extract overlay archives
    extract_files(false);
    //...
}

然后去patch init.rc，把自己的脚本注入进去，/system/bin/init 会解析执行init.rc

其中一段脚本：

"service %2$s %1$s/magisk --post-fs-data\n"
"    user root\n"
"    seclabel u:r:" SEPOL_PROC_DOMAIN ":s0\n"
"    oneshot\n"
"\n"

"service %2$s %1$s/magisk --post-fs-data\n"
"    user root\n"
"    seclabel u:r:" SEPOL_PROC_DOMAIN ":s0\n"
"    oneshot\n"
"\n"

这会拉起一个magisk服务，然后传入–post-fs-data参数。

magisk 服务代码入口在 native/jni/core/magisk.cpp 的 magisk_main

int magisk_main(int argc, char *argv[]) {
    if (argv[1] == "-c"sv) {
    // ...
    }
    // ...
    else if (argv[1] == "--post-fs-data"sv) {
        int fd = connect_daemon(+RequestCode::POST_FS_DATA, true);
        struct pollfd pfd = { fd, POLLIN, 0 };
        poll(&pfd, 1, 1000 * POST_FS_DATA_WAIT_TIME);
        return 0;
    // ...
}

int magisk_main(int argc, char *argv[]) {
    if (argv[1] == "-c"sv) {
    // ...
    }
    // ...
    else if (argv[1] == "--post-fs-data"sv) {
        int fd = connect_daemon(+RequestCode::POST_FS_DATA, true);
        struct pollfd pfd = { fd, POLLIN, 0 };
        poll(&pfd, 1, 1000 * POST_FS_DATA_WAIT_TIME);
        return 0;
    // ...
}

connect_daemon 函数与 magiskd 通信

magiskd 的 handle 代码在 native/jni/core/daemon.cpp 的 handle_request_async

static void handle_request_async(int client, int code, const sock_cred &cred) {
    switch (code) {
    // ...
    case MainRequest::POST_FS_DATA:
        post_fs_data(client);
        break;
    // ...
    }
}

//native/jni/core/bootstages.cpp
void post_fs_data(int client) {
    // ...
    // We still do magic mount because root itself might need it
    magic_mount();
    // ...
}

static void handle_request_async(int client, int code, const sock_cred &cred) {
    switch (code) {
    // ...
    case MainRequest::POST_FS_DATA:
        post_fs_data(client);
        break;
    // ...
    }
}

//native/jni/core/bootstages.cpp
void post_fs_data(int client) {
    // ...
    // We still do magic mount because root itself might need it
    magic_mount();
    // ...
}

接下来调用 magic_mount 开始进入 zygisk 的流程，

magisk 的启动流程大概就是

替换 boot.img 的 init 劫持启动的一阶段
一阶段中 patch 原 init 程序启动 magiskinit 达到劫持二阶段的效果
二阶段中 patch init.rc，之后 /system/bin/init 会把 magisk 自己的服务跑起来，同时还挂载自己的目录

Zygisk原理

补个知识点：安卓每个应用进程都从都从一个名为 Zygote 的现有进程进行fork得到的。系统启动并加载通用框架代码和资源时，Zygote 进程随之启动。为启动新的应用进程，系统会fork Zygote进程，然后在新进程中加载并运行应用代码。Zygote 是 Android 所有其他应用进程的父进程。

Zygisk的意思其实是Magisk注入Zygote

先从Zygisk怎么注入说起吧

前面说到的magic_mount会先检测是否zygisk有没有打开来决定是否挂载zygisk，然后调用mount_zygisk()

int app_process_32 = -1;
int app_process_64 = -1;

#define mount_zygisk(bit)                                                            
if (access("/system/bin/app_process" #bit, F_OK) == 0) {                             
    app_process_##bit = xopen("/system/bin/app_process" #bit, O_RDONLY | O_CLOEXEC);    // 全局变量保存app_process文件描述符
    string zbin = zygisk_bin + "/app_process" #bit;                                  
    string dbin = zygisk_bin + "/magisk" #bit;                                       
    string mbin = MAGISKTMP + "/magisk" #bit;                                        
    int src = xopen(mbin.data(), O_RDONLY | O_CLOEXEC);                              
    int out = xopen(zbin.data(), O_CREAT | O_WRONLY | O_CLOEXEC, 0);                 
    xsendfile(out, src, nullptr, INT_MAX);       // 相当于拷贝了一份magisk，重命名成app_process                                    
    close(out);                                                                      
    out = xopen(dbin.data(), O_CREAT | O_WRONLY | O_CLOEXEC, 0);                     
    lseek(src, 0, SEEK_SET);                                                         
    xsendfile(out, src, nullptr, INT_MAX);                                           
    close(out);                                                                      
    close(src);                                                                      
    clone_attr("/system/bin/app_process" #bit, zbin.data());                         
    clone_attr("/system/bin/app_process" #bit, dbin.data());                         
    bind_mount(zbin.data(), "/system/bin/app_process" #bit);        // 这里用magisk文件替换/system/bin/app_process                    
}

int app_process_32 = -1;
int app_process_64 = -1;

#define mount_zygisk(bit)                                                            
if (access("/system/bin/app_process" #bit, F_OK) == 0) {                             
    app_process_##bit = xopen("/system/bin/app_process" #bit, O_RDONLY | O_CLOEXEC);    // 全局变量保存app_process文件描述符
    string zbin = zygisk_bin + "/app_process" #bit;                                  
    string dbin = zygisk_bin + "/magisk" #bit;                                       
    string mbin = MAGISKTMP + "/magisk" #bit;                                        
    int src = xopen(mbin.data(), O_RDONLY | O_CLOEXEC);                              
    int out = xopen(zbin.data(), O_CREAT | O_WRONLY | O_CLOEXEC, 0);                 
    xsendfile(out, src, nullptr, INT_MAX);       // 相当于拷贝了一份magisk，重命名成app_process                                    
    close(out);                                                                      
    out = xopen(dbin.data(), O_CREAT | O_WRONLY | O_CLOEXEC, 0);                     
    lseek(src, 0, SEEK_SET);                                                         
    xsendfile(out, src, nullptr, INT_MAX);                                           
    close(out);                                                                      
    close(src);                                                                      
    clone_attr("/system/bin/app_process" #bit, zbin.data());                         
    clone_attr("/system/bin/app_process" #bit, dbin.data());                         
    bind_mount(zbin.data(), "/system/bin/app_process" #bit);        // 这里用magisk文件替换/system/bin/app_process                    
}

主要是干了两件事情：

app_process 32/64 移动到了zygisk下面
magisk 32/64放到了MAGISKTMP下面

然后通过挂载实现替换执行，原本要执行的app_process变成了magisk。

用的老的版本：https://github.com/topjohnwu/Magisk/tree/76ddfeb93a8b3612cd68988323f422e996751e16

再看/native/jni/zygisk/main.cpp中的app_process_main

// Entrypoint for app_process overlay
int app_process_main(int argc, char *argv[]) {
  // ...
  
   if (int socket = connect_daemon(); socket >= 0) {
        do {
            write_int(socket, ZYGISK_REQUEST);
            write_int(socket, ZYGISK_SETUP);  //发送SETUP请求，由守护进程处理

            if (read_int(socket) != 0)
                break;

            int app_proc_fd = recv_fd(socket);  //接收到原本app_process的fd
            if (app_proc_fd < 0)
                break;
                string tmp = read_string(socket);
#if defined(__LP64__)
            string lib = tmp + "/" ZYGISKBIN "/zygisk.app_process64.1.so";
#else
            string lib = tmp + "/" ZYGISKBIN "/zygisk.app_process32.1.so";
#endif
            if (char *ld = getenv("LD_PRELOAD")) {
                char env[256];
                sprintf(env, "%s:%s", ld, lib.data());
                setenv("LD_PRELOAD", env, 1);    //写环境变量
            } else {
                setenv("LD_PRELOAD", lib.data(), 1);
            }
            setenv(INJECT_ENV_1, "1", 1);
            setenv("MAGISKTMP", tmp.data(), 1);  //写环境变量

            close(socket);

            snprintf(buf, sizeof(buf), "/proc/self/fd/%d", app_proc_fd);
            fcntl(app_proc_fd, F_SETFD, FD_CLOEXEC);
            execve(buf, argv, environ);         // 执行原本的app_process
        } while (false);

        close(socket);
    }
  // ...

}

// Entrypoint for app_process overlay
int app_process_main(int argc, char *argv[]) {
  // ...
  
   if (int socket = connect_daemon(); socket >= 0) {
        do {
            write_int(socket, ZYGISK_REQUEST);
            write_int(socket, ZYGISK_SETUP);  //发送SETUP请求，由守护进程处理

            if (read_int(socket) != 0)
                break;

            int app_proc_fd = recv_fd(socket);  //接收到原本app_process的fd
            if (app_proc_fd < 0)
                break;
                string tmp = read_string(socket);
#if defined(__LP64__)
            string lib = tmp + "/" ZYGISKBIN "/zygisk.app_process64.1.so";
#else
            string lib = tmp + "/" ZYGISKBIN "/zygisk.app_process32.1.so";
#endif
            if (char *ld = getenv("LD_PRELOAD")) {
                char env[256];
                sprintf(env, "%s:%s", ld, lib.data());
                setenv("LD_PRELOAD", env, 1);    //写环境变量
            } else {
                setenv("LD_PRELOAD", lib.data(), 1);
            }
            setenv(INJECT_ENV_1, "1", 1);
            setenv("MAGISKTMP", tmp.data(), 1);  //写环境变量

            close(socket);

            snprintf(buf, sizeof(buf), "/proc/self/fd/%d", app_proc_fd);
            fcntl(app_proc_fd, F_SETFD, FD_CLOEXEC);
            execve(buf, argv, environ);         // 执行原本的app_process
        } while (false);

        close(socket);
    }
  // ...

}

守护进程处理完了之后LD_PRELOAD 和MAGISKTMP_ENV环境变量
LD_PRELOAD在Linux系统中用于指定要在程序运行前加载的共享库。实际上这里添加的是后面提到的loader32.so和loader64.so。而MAGISKTMP_ENV 实际就是/debug_ramdisk/.magisk 。
设置环境变量后fexecve(app_proc_fd, argv, environ); 这里的文件描述符实际是原来的app_process_##bit 因此会执行原本的app_process

现在看这个守护进程：

void zygisk_handler(int client, const sock_cred *cred) {
    int code = read_int(client);
    char buf[256];
    switch (code) {
    case ZYGISK_SETUP:
        setup_files(client, cred);
        break;
    // ....
    }
    close(client);
}

static void setup_files(int client, const sock_cred *cred) {
    LOGD("zygisk: setup files for pid=[%d]\n", cred->pid);

    char buf[256];    // 请求者的可执行程序路径 (/proc/pid/exec) ，一般是 /system/bin/app_process[32|64]
    if (!get_exe(cred->pid, buf, sizeof(buf))) {
        write_int(client, 1);
        return;
    }
    bool is_64_bit = str_ends(buf, "64");
    // ....

    write_int(client, 0);
    send_fd(client, is_64_bit ? app_process_64 : app_process_32);// 发送持有的真正的 app_process 文件 fd

    string path = MAGISKTMP + "/" ZYGISKBIN "/zygisk." + basename(buf);
    cp_afc(buf, (path + ".1.so").data());// 复制 buf 路径的文件到 MAGISKTMP/zygisk/zygisk.app_process[32|64].1.so
    cp_afc(buf, (path + ".2.so").data());
    write_string(client, MAGISKTMP);// 发送 MAGISKTMP 路径
}

void zygisk_handler(int client, const sock_cred *cred) {
    int code = read_int(client);
    char buf[256];
    switch (code) {
    case ZYGISK_SETUP:
        setup_files(client, cred);
        break;
    // ....
    }
    close(client);
}

static void setup_files(int client, const sock_cred *cred) {
    LOGD("zygisk: setup files for pid=[%d]\n", cred->pid);

    char buf[256];    // 请求者的可执行程序路径 (/proc/pid/exec) ，一般是 /system/bin/app_process[32|64]
    if (!get_exe(cred->pid, buf, sizeof(buf))) {
        write_int(client, 1);
        return;
    }
    bool is_64_bit = str_ends(buf, "64");
    // ....

    write_int(client, 0);
    send_fd(client, is_64_bit ? app_process_64 : app_process_32);// 发送持有的真正的 app_process 文件 fd

    string path = MAGISKTMP + "/" ZYGISKBIN "/zygisk." + basename(buf);
    cp_afc(buf, (path + ".1.so").data());// 复制 buf 路径的文件到 MAGISKTMP/zygisk/zygisk.app_process[32|64].1.so
    cp_afc(buf, (path + ".2.so").data());
    write_string(client, MAGISKTMP);// 发送 MAGISKTMP 路径
}

用原始的 app_process 的fd去执行exec

接着把 MAGISKTMP/zygisk/zygisk.app_process[32|64].1.so 写到了环境变量 LD_PRELOAD 里面，看来 magisk 本体注入到 app_process 里面就是通过它。

不像 riru 用 zygote 的 native bridge ，或者是以前那样替换某个原生库，也不像 xposed 直接修改了 app_process ，Zygisk用 LD_PRELOAD 注入进去。

Zygisk的加载

__attribute__((constructor))  //加载so的时候会执行这个
static void zygisk_init() {
    if (getenv(INJECT_ENV_2)) {
        // Return function pointer to first stage
        char buf[128];
        snprintf(buf, sizeof(buf), "%p", &second_stage_entry);
        setenv(SECOND_STAGE_PTR, buf, 1);
    } else if (getenv(INJECT_ENV_1)) {    //在前面的app_process_main设置
        first_stage_entry();
    }
}

__attribute__((constructor))  //加载so的时候会执行这个
static void zygisk_init() {
    if (getenv(INJECT_ENV_2)) {
        // Return function pointer to first stage
        char buf[128];
        snprintf(buf, sizeof(buf), "%p", &second_stage_entry);
        setenv(SECOND_STAGE_PTR, buf, 1);
    } else if (getenv(INJECT_ENV_1)) {    //在前面的app_process_main设置
        first_stage_entry();
    }
}

static void first_stage_entry() {
    android_logging();
    ZLOGD("inject 1st stage\n");

    char *ld = getenv("LD_PRELOAD");
    char tmp[128];
    strlcpy(tmp, getenv("MAGISKTMP"), sizeof(tmp));  // /debug_ramdisk/.magisk 
    char *path;
    if (char *c = strrchr(ld, ':')) {
        *c = '\0';
        setenv("LD_PRELOAD", ld, 1);  // Restore original LD_PRELOAD
        path = strdup(c + 1);
    } else {
        unsetenv("LD_PRELOAD");
        path = strdup(ld);
    }
    unsetenv(INJECT_ENV_1);
    unsetenv("MAGISKTMP");
    sanitize_environ();

    char *num = strrchr(path, '.') - 1;

    // Update path to 2nd stage lib
    *num = '2';

    // Load second stage
    setenv(INJECT_ENV_2, "1", 1);
    void *handle = dlopen(path, RTLD_LAZY); // zygisk.app_process.[32|64].2.so
    remap_all(path);

    // Revert path to 1st stage lib
    *num = '1';

    // Run second stage entry
    char *env = getenv(SECOND_STAGE_PTR);
    decltype(&second_stage_entry) second_stage;
    sscanf(env, "%p", &second_stage);
    second_stage(handle, tmp, path);
}

static void first_stage_entry() {
    android_logging();
    ZLOGD("inject 1st stage\n");

    char *ld = getenv("LD_PRELOAD");
    char tmp[128];
    strlcpy(tmp, getenv("MAGISKTMP"), sizeof(tmp));  // /debug_ramdisk/.magisk 
    char *path;
    if (char *c = strrchr(ld, ':')) {
        *c = '\0';
        setenv("LD_PRELOAD", ld, 1);  // Restore original LD_PRELOAD
        path = strdup(c + 1);
    } else {
        unsetenv("LD_PRELOAD");
        path = strdup(ld);
    }
    unsetenv(INJECT_ENV_1);
    unsetenv("MAGISKTMP");
    sanitize_environ();

    char *num = strrchr(path, '.') - 1;

    // Update path to 2nd stage lib
    *num = '2';

    // Load second stage
    setenv(INJECT_ENV_2, "1", 1);
    void *handle = dlopen(path, RTLD_LAZY); // zygisk.app_process.[32|64].2.so
    remap_all(path);

    // Revert path to 1st stage lib
    *num = '1';

    // Run second stage entry
    char *env = getenv(SECOND_STAGE_PTR);
    decltype(&second_stage_entry) second_stage;
    sscanf(env, "%p", &second_stage);
    second_stage(handle, tmp, path);
}

一阶段的作用似乎只有一个：dlopen 加载二阶段

除此之外，一阶段还重置了 LD_PRELOAD 和 INJECT_ENV_1 ，并调用一个 sanitize_environ 函数。

// Make sure /proc/self/environ is sanitized
// Filter env and reset MM_ENV_END
static void sanitize_environ() {
    char *cur = environ[0];

    for (int i = 0; environ[i]; ++i) {
        // Copy all env onto the original stack
        int len = strlen(environ[i]);
        memmove(cur, environ[i], len + 1);
        environ[i] = cur;
        cur += len + 1;
    }

    prctl(PR_SET_MM, PR_SET_MM_ENV_END, cur, 0, 0);
}

// Make sure /proc/self/environ is sanitized
// Filter env and reset MM_ENV_END
static void sanitize_environ() {
    char *cur = environ[0];

    for (int i = 0; environ[i]; ++i) {
        // Copy all env onto the original stack
        int len = strlen(environ[i]);
        memmove(cur, environ[i], len + 1);
        environ[i] = cur;
        cur += len + 1;
    }

    prctl(PR_SET_MM, PR_SET_MM_ENV_END, cur, 0, 0);
}

环境变量存储在进程的内存中，由指针数组 environ[] 保存每个环境变量的位置（0 终止）。环境变量的字符串都是连续放置的，形如 NAME=value\0 。
这个函数看起来是要让环境变量对齐，避免检测到被删除的环境变量留下的空白

清理完成 env 后又注入了 INJECT_ENV_2 ，此时加载的就是 zygisk.app_process.[32|64].2.so 了

void remap_all(const char *name) {
    vector<map_info> maps = find_maps(name);
    for (map_info &info : maps) { // 遍历 maps 中指定文件名的映射信息
        void *addr = reinterpret_cast<void *>(info.start);
        size_t size = info.end - info.start;
        void *copy = xmmap(nullptr, size, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // 映射和目标同样大小的可写内存
        if ((info.perms & PROT_READ) == 0) {
            mprotect(addr, size, PROT_READ); // 如果目标不可读，让其可读
        }
        memcpy(copy, addr, size); // 复制目标的内存到新的映射
        mremap(copy, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, addr); // 用新的映射覆盖到原先目标的位置
        mprotect(addr, size, info.perms); // 恢复权限使其和目标一致
    }
}

void remap_all(const char *name) {
    vector<map_info> maps = find_maps(name);
    for (map_info &info : maps) { // 遍历 maps 中指定文件名的映射信息
        void *addr = reinterpret_cast<void *>(info.start);
        size_t size = info.end - info.start;
        void *copy = xmmap(nullptr, size, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // 映射和目标同样大小的可写内存
        if ((info.perms & PROT_READ) == 0) {
            mprotect(addr, size, PROT_READ); // 如果目标不可读，让其可读
        }
        memcpy(copy, addr, size); // 复制目标的内存到新的映射
        mremap(copy, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, addr); // 用新的映射覆盖到原先目标的位置
        mprotect(addr, size, info.perms); // 恢复权限使其和目标一致
    }
}

加载完成后，调用了一个 remap_all ，传入的是它的 path 。看上去是把对应 path 的映射全都重新映射成匿名的，目的是为了从 maps 中隐藏自身。

load 二阶段同样会调用 constructor ，只不过这回走 INJECT_ENV_2 的分支，把 second_stage_entry 的地址放到了环境变量中，供一阶段调用。

static void second_stage_entry(void *handle, const char *tmp, char *path) {
    self_handle = handle;
    MAGISKTMP = tmp;
    unsetenv(INJECT_ENV_2);
    unsetenv(SECOND_STAGE_PTR);

    zygisk_logging();
    ZLOGD("inject 2nd stage\n");
    hook_functions();        //  关键函数，在/native/jni/zygisk/hook.cpp

    // First stage will be unloaded before the first fork
    first_stage_path = path;
}

static void second_stage_entry(void *handle, const char *tmp, char *path) {
    self_handle = handle;
    MAGISKTMP = tmp;
    unsetenv(INJECT_ENV_2);
    unsetenv(SECOND_STAGE_PTR);

    zygisk_logging();
    ZLOGD("inject 2nd stage\n");
    hook_functions();        //  关键函数，在/native/jni/zygisk/hook.cpp

    // First stage will be unloaded before the first fork
    first_stage_path = path;
}

进入二阶段后，zygisk 的 so 已经在 maps 中隐藏了。二阶段的入口接收了从一阶段传入的自身的 handle ，magisk tmp 目录和一阶段的 path 。

void hook_functions() {
#if MAGISK_DEBUG
    // xhook_enable_debug(1);
    xhook_enable_sigsegv_protection(0);
#endif
    default_new(xhook_list);
    default_new(jni_hook_list);
    default_new(jni_method_map);

    XHOOK_REGISTER(ANDROID_RUNTIME, fork);
    XHOOK_REGISTER(ANDROID_RUNTIME, unshare);
    XHOOK_REGISTER(ANDROID_RUNTIME, jniRegisterNativeMethods);
    XHOOK_REGISTER(ANDROID_RUNTIME, selinux_android_setcontext);
    XHOOK_REGISTER_SYM(ANDROID_RUNTIME, "__android_log_close", android_log_close);
    hook_refresh();

    // Remove unhooked methods
    xhook_list->erase(
            std::remove_if(xhook_list->begin(), xhook_list->end(),
            [](auto &t) { return *std::get<2>(t) == nullptr;}),
            xhook_list->end());

    if (old_jniRegisterNativeMethods == nullptr) {
        ZLOGD("jniRegisterNativeMethods not hooked, using fallback\n");

        // android::AndroidRuntime::setArgv0(const char*, bool)
        XHOOK_REGISTER_SYM(APP_PROCESS, "_ZN7android14AndroidRuntime8setArgv0EPKcb", setArgv0);
        hook_refresh();

        // We still need old_jniRegisterNativeMethods as other code uses it
        // android::AndroidRuntime::registerNativeMethods(_JNIEnv*, const char*, const JNINativeMethod*, int)
        constexpr char sig[] = "_ZN7android14AndroidRuntime21registerNativeMethodsEP7_JNIEnvPKcPK15JNINativeMethodi";
        *(void **) &old_jniRegisterNativeMethods = dlsym(RTLD_DEFAULT, sig);
    }
}

void hook_functions() {
#if MAGISK_DEBUG
    // xhook_enable_debug(1);
    xhook_enable_sigsegv_protection(0);
#endif
    default_new(xhook_list);
    default_new(jni_hook_list);
    default_new(jni_method_map);

    XHOOK_REGISTER(ANDROID_RUNTIME, fork);
    XHOOK_REGISTER(ANDROID_RUNTIME, unshare);
    XHOOK_REGISTER(ANDROID_RUNTIME, jniRegisterNativeMethods);
    XHOOK_REGISTER(ANDROID_RUNTIME, selinux_android_setcontext);
    XHOOK_REGISTER_SYM(ANDROID_RUNTIME, "__android_log_close", android_log_close);
    hook_refresh();

    // Remove unhooked methods
    xhook_list->erase(
            std::remove_if(xhook_list->begin(), xhook_list->end(),
            [](auto &t) { return *std::get<2>(t) == nullptr;}),
            xhook_list->end());

    if (old_jniRegisterNativeMethods == nullptr) {
        ZLOGD("jniRegisterNativeMethods not hooked, using fallback\n");

        // android::AndroidRuntime::setArgv0(const char*, bool)
        XHOOK_REGISTER_SYM(APP_PROCESS, "_ZN7android14AndroidRuntime8setArgv0EPKcb", setArgv0);
        hook_refresh();

        // We still need old_jniRegisterNativeMethods as other code uses it
        // android::AndroidRuntime::registerNativeMethods(_JNIEnv*, const char*, const JNINativeMethod*, int)
        constexpr char sig[] = "_ZN7android14AndroidRuntime21registerNativeMethodsEP7_JNIEnvPKcPK15JNINativeMethodi";
        *(void **) &old_jniRegisterNativeMethods = dlsym(RTLD_DEFAULT, sig);
    }
}

先看第一个fork的hook：

#define DCL_HOOK_FUNC(ret, func, ...) \
ret (*old_##func)(__VA_ARGS__);       \
ret new_##func(__VA_ARGS__)

// Skip actual fork and return cached result if applicable
// Also unload first stage zygisk if necessary
DCL_HOOK_FUNC(int, fork) {
    unload_first_stage();
    return (g_ctx && g_ctx->pid >= 0) ? g_ctx->pid : old_fork();
}


--->//上面的代码相当于下面的代码，一句话多写完两个定义。

int (*old_fork)();
int new_fork() {
    unload_first_stage();
    return (g_ctx && g_ctx->pid >= 0) ? g_ctx->pid : old_fork();
}

#define DCL_HOOK_FUNC(ret, func, ...) \
ret (*old_##func)(__VA_ARGS__);       \
ret new_##func(__VA_ARGS__)

// Skip actual fork and return cached result if applicable
// Also unload first stage zygisk if necessary
DCL_HOOK_FUNC(int, fork) {
    unload_first_stage();
    return (g_ctx && g_ctx->pid >= 0) ? g_ctx->pid : old_fork();
}


--->//上面的代码相当于下面的代码，一句话多写完两个定义。

int (*old_fork)();
int new_fork() {
    unload_first_stage();
    return (g_ctx && g_ctx->pid >= 0) ? g_ctx->pid : old_fork();
}

看hook的fork，应该是会提前返回一个可用预存的pid，不行才用个旧fork。

g_ctx是HookContext 类型的全局变量：

#define DCL_PRE_POST(name) \
void name##_pre();         \
void name##_post();

struct HookContext {
    JNIEnv *env;
    union {
        AppSpecializeArgsImpl *args;
        ServerSpecializeArgsImpl *server_args;
        void *raw_args;
    };
    const char *process;
    int pid;
    bitset<FLAG_MAX> flags;
    AppInfo info;
    vector<ZygiskModule> modules;

    HookContext() : pid(-1), info{} {}

    static void close_fds();
    void unload_zygisk();

    DCL_PRE_POST(fork)
    void run_modules_pre(const vector<int> &fds);
    void run_modules_post();
    DCL_PRE_POST(nativeForkAndSpecialize)
    DCL_PRE_POST(nativeSpecializeAppProcess)
    DCL_PRE_POST(nativeForkSystemServer)
};

#undef DCL_PRE_POST

#define DCL_PRE_POST(name) \
void name##_pre();         \
void name##_post();

struct HookContext {
    JNIEnv *env;
    union {
        AppSpecializeArgsImpl *args;
        ServerSpecializeArgsImpl *server_args;
        void *raw_args;
    };
    const char *process;
    int pid;
    bitset<FLAG_MAX> flags;
    AppInfo info;
    vector<ZygiskModule> modules;

    HookContext() : pid(-1), info{} {}

    static void close_fds();
    void unload_zygisk();

    DCL_PRE_POST(fork)
    void run_modules_pre(const vector<int> &fds);
    void run_modules_post();
    DCL_PRE_POST(nativeForkAndSpecialize)
    DCL_PRE_POST(nativeSpecializeAppProcess)
    DCL_PRE_POST(nativeForkSystemServer)
};

#undef DCL_PRE_POST

DCL_PRE_POST(fork)声明了两个函数，一个fork_pre，一个fork_post。先看pre：

// Do our own fork before loading any 3rd party code
// First block SIGCHLD, unblock after original fork is done
void HookContext::fork_pre() {
    g_ctx = this;
    sigmask(SIG_BLOCK, SIGCHLD);
    pid = old_fork(); // this->pid, 即 g_ctx->pid
}

// Do our own fork before loading any 3rd party code
// First block SIGCHLD, unblock after original fork is done
void HookContext::fork_pre() {
    g_ctx = this;
    sigmask(SIG_BLOCK, SIGCHLD);
    pid = old_fork(); // this->pid, 即 g_ctx->pid
}

做了一个事前准备，赋值然后屏蔽 SIGCHLD 信号，并主动调用了原先的 fork 函数。
但是Zygisk没有提供这个接口，因为这个是在forkAndSpecialize 和 forkSystemServer 的 fork 之前主动调用的。

void HookContext::nativeForkSystemServer_pre() {
    fork_pre();
    flags[SERVER_SPECIALIZE] = true;
    if (pid == 0) {
        ZLOGV("pre  forkSystemServer\n");
        run_modules_pre(remote_get_info(1000, "system_server", &info));
        close_fds();
        android_logging();
    }
}

void HookContext::nativeForkAndSpecialize_pre() {
    fork_pre();
    flags[FORK_AND_SPECIALIZE] = true;
    if (pid == 0) {
        nativeSpecializeAppProcess_pre();
    }
}

void HookContext::nativeForkSystemServer_pre() {
    fork_pre();
    flags[SERVER_SPECIALIZE] = true;
    if (pid == 0) {
        ZLOGV("pre  forkSystemServer\n");
        run_modules_pre(remote_get_info(1000, "system_server", &info));
        close_fds();
        android_logging();
    }
}

void HookContext::nativeForkAndSpecialize_pre() {
    fork_pre();
    flags[FORK_AND_SPECIALIZE] = true;
    if (pid == 0) {
        nativeSpecializeAppProcess_pre();
    }
}

preFork() 是一个钩子方法,它会在Zygote fork新进程之前被调用，进行一些fork前操作例如通过在Java层记录fork事件来跟踪进程创建预加载新进程可能需要的类或资源申请和预热新进程可能需要的内存

实际上，fork 之前的工作主要是做一些 fd 检查，防止不合法的 fd 泄露到 fork 后的进程，因此，先 fork 实际上是合理的。并且这样就达到了 Zygisk 的目的：在 Specialize pre 的时候加载模块，而不必 Zygote 进程中加载，因为这样 fork 之后，我们得到了一个处于 pre specialize 且与原 zygote 隔离开的进程，此时即可安全地根据 denylist 决定是否加载模块。

既然已经「预 fork」了，那就原本过程上的「fork」就不需要了，因此 pre fork 的时候缓存「预 fork」的结果——原进程得到子进程的 pid ，子进程得到 0——到调用 fork 的时候，实际上不做 fork ，直接返回这个缓存的值即可。

无论是哪个安卓版本Magisk对于nativeForkAndSpecialize 的Hook都是三步策略ctx.nativeForkAndSpecialize_pre -> nativeForkAndSpecializ -> ctx.nativeForkAndSpecialize_post，而Android源码中forkAndSpecialize也是三步走，ZygoteHooks.preFork -> nativeForkAndSpecialize -> ZygoteHooks.postForkCommon ,其中第二步才调用到了nativeForkAndSpecialize

void HookContext::nativeForkAndSpecialize_pre() {
    fork_pre();
    flags[FORK_AND_SPECIALIZE] = true;
    if (pid == 0) {
        nativeSpecializeAppProcess_pre();
    }
}

//  --->

void HookContext::nativeSpecializeAppProcess_pre() {
    g_ctx = this;
    flags[APP_SPECIALIZE] = true;
    process = env->GetStringUTFChars(args->nice_name, nullptr);
    if (flags[FORK_AND_SPECIALIZE]) {
        ZLOGV("pre  forkAndSpecialize [%s]\n", process);
    } else {
        ZLOGV("pre  specialize [%s]\n", process);
    }

    auto module_fds = remote_get_info(args->uid, process, &info);
    if (info.on_denylist) {
        // TODO: Handle MOUNT_EXTERNAL_NONE on older platforms
        ZLOGI("[%s] is on the denylist\n", process);
        flags[UNMOUNT_FLAG] = true;
    } else {
        run_modules_pre(module_fds);    //加载模块
    }

    close_fds();
    android_logging();
}

void HookContext::nativeForkAndSpecialize_pre() {
    fork_pre();
    flags[FORK_AND_SPECIALIZE] = true;
    if (pid == 0) {
        nativeSpecializeAppProcess_pre();
    }
}

//  --->

void HookContext::nativeSpecializeAppProcess_pre() {
    g_ctx = this;
    flags[APP_SPECIALIZE] = true;
    process = env->GetStringUTFChars(args->nice_name, nullptr);
    if (flags[FORK_AND_SPECIALIZE]) {
        ZLOGV("pre  forkAndSpecialize [%s]\n", process);
    } else {
        ZLOGV("pre  specialize [%s]\n", process);
    }

    auto module_fds = remote_get_info(args->uid, process, &info);
    if (info.on_denylist) {
        // TODO: Handle MOUNT_EXTERNAL_NONE on older platforms
        ZLOGI("[%s] is on the denylist\n", process);
        flags[UNMOUNT_FLAG] = true;
    } else {
        run_modules_pre(module_fds);    //加载模块
    }

    close_fds();
    android_logging();
}

Magisk原理

Zygisk原理

Zygisk的加载

发送评论 编辑评论

发送评论编辑评论