/// Decompiled function. Decompilation result is kept here. structcfunc_t { ea_t entry_ea; ///< function entry address mba_t *mba; ///< underlying microcode cinsn_t body; ///< function body, must be a block intvec_t &argidx; ///< list of arguments (indexes into vars) ctree_maturity_t maturity; ///< maturity level // The following maps must be accessed using helper functions. // Example: for user_labels_t, see functions starting with "user_labels_". user_labels_t *user_labels;///< user-defined labels. user_cmts_t *user_cmts; ///< user-defined comments. user_numforms_t *numforms; ///< user-defined number formats. user_iflags_t *user_iflags;///< user-defined item flags \\ref CIT_ user_unions_t *user_unions;///< user-defined union field selections. /// \\defgroup CIT_ ctree item iflags bits //@{ #define CIT_COLLAPSED 0x0001 ///< display element in collapsed form //@} int refcnt; ///< reference count to this object. use cfuncptr_t int statebits; ///< current cfunc_t state. see \\ref CFS_ /// \\defgroup CFS_ cfunc state bits #define CFS_BOUNDS 0x0001 ///< 'eamap' and 'boundaries' are ready #define CFS_TEXT 0x0002 ///< 'sv' is ready (and hdrlines) #define CFS_LVARS_HIDDEN 0x0004 ///< local variable definitions are collapsed #define CFS_LOCKED 0x0008 ///< cfunc is temporarily locked eamap_t *eamap; ///< ea->insn map. use \\ref get_eamap boundaries_t *boundaries; ///< map of instruction boundaries. use \\ref get_boundaries strvec_t sv; ///< decompilation output: function text. use \\ref get_pseudocode int hdrlines; ///< number of lines in the declaration area mutablectree_items_t treeitems; ///< vector of ctree items
// the exact size of this class is not documented, there may be more fields char reserved[];
/// Generate the function body. /// This function (re)generates the function body from the underlying microcode. void hexapi build_c_tree(void);
/// Verify the ctree. /// This function verifies the ctree. If the ctree is malformed, an internal error /// is generated. Use it to verify the ctree after your modifications. /// \\param aul Are unused labels acceptable? /// \\param even_without_debugger if false and there is no debugger, the verification will be skipped void hexapi verify(allow_unused_labels_t aul, bool even_without_debugger)const;
/// Print function text. /// \\param vp printer helper class to receive the generated text. void hexapi print_func(vc_printer_t &vp)const;
/// Get the function type. /// \\param type variable where the function type is returned /// \\return false if failure bool hexapi get_func_type(tinfo_t *type)const;
/// Get vector of local variables. /// \\return pointer to the vector of local variables. If you modify this vector, /// the ctree must be regenerated in order to have correct cast operators. /// Use build_c_tree() for that. /// Removing lvars should be done carefully: all references in ctree /// and microcode must be corrected after that. lvars_t *hexapi get_lvars(void);
/// Get stack offset delta. /// The local variable stack offsets retrieved by v.location.stkoff() /// should be adjusted before being used as stack frame offsets in IDA. /// \\return the delta to apply. /// example: ida_stkoff = v.location.stkoff() - f->get_stkoff_delta() sval_t hexapi get_stkoff_delta(void);
/// Find the label. /// \\return pointer to the ctree item with the specified label number. citem_t *hexapi find_label(int label);
/// Remove unused labels. /// This function check what labels are really used by the function and /// removes the unused ones. void hexapi remove_unused_labels(void);
/// Retrieve a user defined comment. /// \\param loc ctree location /// \\param rt should already retrieved comments retrieved again? /// \\return pointer to the comment string or NULL constchar *hexapi get_user_cmt(consttreeloc_t &loc, cmt_retrieval_type_t rt)const;
/// Set a user defined comment. /// This function stores the specified comment in the cfunc_t structure. /// The save_user_cmts() function must be called after it. /// \\param loc ctree location /// \\param cmt new comment. if empty or NULL, then an existing comment is deleted. void hexapi set_user_cmt(consttreeloc_t &loc, constchar *cmt);
/// Retrieve citem iflags. /// \\param loc citem locator /// \\return \\ref CIT_ or 0 int32 hexapi get_user_iflags(constcitem_locator_t &loc)const;
/// Set citem iflags. /// \\param loc citem locator /// \\param iflags new iflags void hexapi set_user_iflags(constcitem_locator_t &loc, int32 iflags);
/// Check if there are orphan comments. bool hexapi has_orphan_cmts(void)const;
/// Delete all orphan comments. /// The save_user_cmts() function must be called after this call. int hexapi del_orphan_cmts(void);
/// Retrieve a user defined union field selection. /// \\param ea address /// \\param path out: path describing the union selection. /// \\return pointer to the path or NULL bool hexapi get_user_union_selection(ea_t ea, intvec_t *path);
/// Set a union field selection. /// The save_user_unions() function must be called after calling this function. /// \\param ea address /// \\param path in: path describing the union selection. void hexapi set_user_union_selection(ea_t ea, constintvec_t &path);
/// Save user-defined labels into the database void hexapi save_user_labels(void)const; /// Save user-defined comments into the database void hexapi save_user_cmts(void)const; /// Save user-defined number formats into the database void hexapi save_user_numforms(void)const; /// Save user-defined iflags into the database void hexapi save_user_iflags(void)const; /// Save user-defined union field selections into the database void hexapi save_user_unions(void)const;
/// Get ctree item for the specified cursor position. /// \\return false if failed to get the current item /// \\param line line of decompilation text (element of \\ref sv) /// \\param x x cursor coordinate in the line /// \\param is_ctree_line does the line belong to statement area? (if not, it is assumed to belong to the declaration area) /// \\param phead ptr to the first item on the line (used to attach block comments). May be NULL /// \\param pitem ptr to the current item. May be NULL /// \\param ptail ptr to the last item on the line (used to attach indented comments). May be NULL /// \\sa vdui_t::get_current_item() bool hexapi get_line_item( constchar *line, int x, bool is_ctree_line, ctree_item_t *phead, ctree_item_t *pitem, ctree_item_t *ptail);
/// Get information about decompilation warnings. /// \\return reference to the vector of warnings hexwarns_t &hexapi get_warnings(void);
/// Get pointer to ea->insn map. /// This function initializes eamap if not done yet. eamap_t &hexapi get_eamap(void);
/// Get pointer to map of instruction boundaries. /// This function initializes the boundary map if not done yet. boundaries_t &hexapi get_boundaries(void);
/// Get pointer to decompilation output: the pseudocode. /// This function generates pseudocode if not done yet. conststrvec_t &hexapi get_pseudocode(void);
/// Refresh ctext after a ctree modification. /// This function informs the decompiler that ctree (\\ref body) have been /// modified and ctext (\\ref sv) does not correspond to it anymore. /// It also refreshes the pseudocode windows if there is any. void hexapi refresh_func_ctext(void);
bool hexapi gather_derefs(constctree_item_t &ci, udt_type_data_t *udm=NULL)const; bool hexapi find_item_coords(constcitem_t *item, int *px, int *py); boollocked(void)const{ return (statebits & CFS_LOCKED) != 0; } private: /// Cleanup. /// Properly delete all children and free memory. void hexapi cleanup(void); DECLARE_UNCOPYABLE(cfunc_t) }; typedefqrefcnt_t<cfunc_t> cfuncptr_t;
vds2
这个插件将指针中的 0 替换成 Null。 IDA 的反编译结果经常是动态变化的,为了实现这个任务,这个插件通过注册 IDA 的 hexrays 事件回调,当 ctree 生成完成后立即对 ctree 中的 0 进行替换。
// This callback will detect when the ctree is ready to be displayed // and call convert_zeroes() to create NULLs ssize_t idaapi plugin_ctx_t::hr_callback( void*, hexrays_event_t event, va_list va) { if (event == hxe_maturity) { cfunc_t* cfunc = va_arg(va, cfunc_t*); ctree_maturity_t mat = va_argi(va, ctree_maturity_t); if (mat == CMAT_FINAL) // ctree is ready, time to convert zeroes to NULLs convert_zeroes(cfunc); } return0; }
event 参数由 IDA 传入回调类型,全部的回调类型与说明可以查看 hexrays.hpp 中 enum hexrays_event_t 定义代码与注释。 hxe_maturity 回调会在 Ctree maturity 改变时调用,并且变参列表 va 的参数是:
// Convert zeroes of the ctree to NULLs staticvoidconvert_zeroes(cfunc_t* cfunc) { // To represent NULLs, we will use the MACRO_NULL enumeration // Normally it is present in the loaded tils but let's verify it if (!get_named_type(NULL, null_type, NTF_TYPE)) { msg("%s type is missing, cannot convert zeroes to NULLs\\n", null_type); return; }
// We derive a helper class from ctree_visitor_t // The ctree_visitor_t is a base class to derive // ctree walker classes. // You have to redefine some virtual functions // to do the real job. Here we redefine visit_expr() since we want // to examine and modify expressions. structida_localzero_converter_t :publicctree_visitor_t { zero_converter_t(void) : ctree_visitor_t(CV_FAST) {} int idaapi visit_expr(cexpr_t* e)override { // verify if the current expression has pointer expressions // we handle the following patterns: // A. ptr = 0; // B. func(0); where argument is a pointer // C. ptr op 0 where op is a comparison switch (e->op) { case cot_asg: // A cot_asg: x = y if (e->x->type.is_ptr()) // e->x first operand make_null_if_zero(e->y); break;
case cot_call: // B { carglist_t& args = *e->a; for (int i = 0; i < args.size(); i++) // check all arguments { carg_t& a = args[i]; if (a.formal_type.is_ptr_or_array()) make_null_if_zero(&a); } } break;
case cot_eq: // C case cot_ne: case cot_sge: case cot_uge: case cot_sle: case cot_ule: case cot_sgt: case cot_ugt: case cot_slt: case cot_ult: // check both sides for zeroes if (e->y->type.is_ptr()) // e->y second operand make_null_if_zero(e->x); if (e->x->type.is_ptr()) make_null_if_zero(e->y); break;
default: break;
} return0; // continue walking the tree } }; zero_converter_t zc; // walk the whole function body zc.apply_to(&cfunc->body, NULL); }
识别这三种语句的方法依赖于 Ctree 节点的 op 属性,匹配成功后调用 make_null_if_zero 进行进一步处理。 make_null_if_zero 代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// If the expression is zero, convert it to NULL staticvoidmake_null_if_zero(cexpr_t* e) { if (e->is_zero_const() && !e->type.is_ptr()) { // this is plain zero, convert it number_format_t& nf = e->n->nf; nf.flags = enum_flag(); // nf.flags ida flags, which describe number radix, enum, etc nf.serial = 0; // for enums: constant serial number nf.props |= NF_VALID; nf.type_name = null_type; e->type.get_named_type(nullptr, null_type, BTF_ENUM); } }
def convert_zeroes(cfunc: idaapi.cfunc_t): class zero_converter_t(idaapi.ctree_visitor_t): def __init__(self, *args): super().__init__(idaapi.CV_FAST)
def visit_expr(self, *args) -> "int": e = args[0] if e.op == idaapi.cot_asg: # A if e.x.type.is_ptr(): make_null_if_zero(e.y) if e.op == idaapi.cot_call: # B for arg in e.a: if arg.formal_type.is_ptr_or_array(): make_null_if_zero(arg) if e.op in [idaapi.cot_eq, idaapi.cot_ne, idaapi.cot_sge, idaapi.cot_uge, idaapi.cot_sle,idaapi.cot_ule, idaapi.cot_sgt, idaapi.cot_ugt, idaapi.cot_slt, idaapi.cot_ult]: if e.y.type.is_ptr(): make_null_if_zero(e.x) elif e.x.type.is_ptr(): make_null_if_zero(e.y) return0
----------------------------- // This callback handles various hexrays events. staticssize_t idaapi callback(void* ud, hexrays_event_t event, va_list va) { vds3_t* plugmod = (vds3_t*)ud; switch (event) { case hxe_populating_popup: { // If the current item is an if-statement, then add the menu item TWidget* widget = va_arg(va, TWidget*); TPopupMenu* popup = va_arg(va, TPopupMenu*); vdui_t& vu = *va_arg(va, vdui_t*); if (plugmod->find_if_statement(vu) != NULL) attach_action_to_popup(widget, popup, ACTION_NAME); } break;
case hxe_maturity: if (!plugmod->inverted_ifs.empty()) { // If the ctree is ready, invert marked ifs cfunc_t* cfunc = va_arg(va, cfunc_t*); ctree_maturity_t new_maturity = va_argi(va, ctree_maturity_t); if (new_maturity == CMAT_FINAL) // ctree is ready plugmod->convert_marked_ifs(cfunc); } break;
// Check if the item under the cursor is 'if' or 'else' keyword // If yes, return pointer to the corresponding ctree item cinsn_t* vds3_t::find_if_statement(constvdui_t& vu) { // 'if' keyword: straightforward check if (vu.item.is_citem()) { cinsn_t* i = vu.item.i; // we can handle only if-then-else statements, so check that the 'else' // clause exists if (i->op == cit_if && i->cif->ielse != NULL) return i; } // check for 'else' line. The else lines do not correspond // to any ctree item. That's why we have to check for them separately. // we could extract the corresponding text line but this would be a bad approach // a line with single 'else' would not give us enough information to locate // the corresponding 'if'. That's why we use the line tail marks. // All 'else' line will have the ITP_ELSE mark if (vu.tail.citype == VDI_TAIL && vu.tail.loc.itp == ITP_ELSE) { // for tail marks, we know only the corresponding ea, // not the pointer to if-statement // find it by walking the whole ctree structida_localif_finder_t :publicctree_visitor_t { ea_t ea; cinsn_t* found; if_finder_t(ea_t e) : ctree_visitor_t(CV_FAST | CV_INSNS), ea(e), found(NULL) {} int idaapi visit_insn(cinsn_t* i)override { if (i->op == cit_if && i->ea == ea) { found = i; return1; // stop enumeration } return0; } }; if_finder_tiff(vu.tail.loc.ea); if (iff.apply_to(&vu.cfunc->body, NULL)) return iff.found; } returnNULL; }
如果用户选中的是 if 且该 if 存在 else 就直接返回其对应的 ctree 指令对象指针。若用户选中的是 else,就有点复杂了。 else 在 ctree 中没有任何对应的项,只能通过 vdui_t 中提供的信息来判断,判断代码如下
voidvds3_t::convert_marked_ifs(cfunc_t* cfunc) { // we walk the ctree and for each if-statement check if has to be inverted structida_localif_inverter_t :publicctree_visitor_t { vds3_t* self; if_inverter_t(vds3_t* _self) : ctree_visitor_t(CV_FAST | CV_INSNS), self(_self) {} int idaapi visit_insn(cinsn_t* i)override { if (i->op == cit_if && self->inverted_ifs.has(i->ea)) self->do_invert_if(i); return0; // continue enumeration } }; if_inverter_tifi(this); ifi.apply_to(&cfunc->body, NULL); // go! }
// The user has selected to invert the if statement. Update ctree // and refresh the view. voidvds3_t::do_invert_if(cinsn_t* i)//lint !e818 could be declared as const* { QASSERT(30198, i->op == cit_if); cif_t& cif = *i->cif; // create an inverted condition and swap it with the if-condition cexpr_t* notcond = lnot(newcexpr_t(cif.expr)); notcond->swap(cif.expr); delete notcond; // swap if branches qswap(cif.ielse, cif.ithen); }
该函数将输入的 if-statement 中的 expr 替换为 not expr,并交换 then 与 else 指令。
//------------------------------------------------------------------------- /// Generic microcode generator class. /// An instance of a derived class can be registered to be used for /// non-standard microcode generation. Before microcode generation for an /// instruction all registered object will be visited by the following way: /// if ( filter->match(cdg) ) /// code = filter->apply(cdg); /// if ( code == MERR_OK ) /// continue; // filter generated microcode, go to the next instruction structmicrocode_filter_t { /// check if the filter object is to be appied /// \\return success virtualboolmatch(codegen_t &cdg)= 0;
/// generate microcode for an instruction /// \\return MERR_... code: /// MERR_OK - user-defined call generated, go to the next instruction /// MERR_INSN - not generated - the caller should try the standard way /// else - error virtualmerror_tapply(codegen_t &cdg)= 0; };
// Generate microcode. This call returns fully optimized microcode. // If desired, we could hook to decompiler events and return MERR_STOP // to return microcode from previous analysis stages. Another and easier // way of obtaining microcode of earlier stages is to explicitly specify // the required maturity level in the gen_mircocode() call. hexrays_failure_t hf; func_t *pfn = get_func(get_screen_ea()); mba_t *mba = gen_microcode(pfn, &hf, NULL, DECOMP_WARNINGS); // Dump the microcode to the output window vd_printer_t vp; mba->print(vp); delete mba; // 需要用户清理内存
gen_microcode 函数定义如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/// Generate microcode of an arbitrary code snippet /// \\param mbr snippet ranges /// \\param hf extended error information (if failed) /// \\param retlist list of registers the snippet returns /// \\param decomp_flags bitwise combination of \\ref DECOMP_... bits /// \\param reqmat required microcode maturity /// \\return pointer to the microcode, NULL if failed.
/// Microcode maturity levels enummba_maturity_t { MMAT_ZERO, ///< microcode does not exist MMAT_GENERATED, ///< generated microcode MMAT_PREOPTIMIZED, ///< preoptimized pass is complete MMAT_LOCOPT, ///< local optimization of each basic block is complete. ///< control flow graph is ready too. MMAT_CALLS, ///< detected call arguments MMAT_GLBOPT1, ///< performed the first pass of global optimization MMAT_GLBOPT2, ///< most global optimization passes are done MMAT_GLBOPT3, ///< completed all global optimization. microcode is fixed now. MMAT_LVARS, ///< allocated local variables };
关于 vd_printer_t 的一些注释
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Notes: // 1. You may derive your own class based on vd_printer_t and redirect the // output anywhere you want. // 2. There is also mblock_t::print() that prints one basic block. // 3. There are also mba_t::dump() and mblock_t::dump() functions // that create a file in the directory pointed by IDA_DUMPDIR environment // variable. These function work only under debugger (they are convenient // to use under debugger: dump the current microcode and study it). // The decompiler itself will dump its internal state if run under // debugger, so that all microcode transformations can be tracked. // 4. Printing individual instructions with minsn_t::print() and omitting // SHINS_SHORT is supported only while decompiling the function or immediately // after it because minsn_t::print() uses a global variable that points // to the current mba_t. However, printing mblock_t and mba_t // is ok any time from the main thread. Decompiler is not thread-safe // and must be used only from the main thread.
/// User defined callback to optimize individual microcode instructions structoptinsn_t { /// Optimize an instruction. /// \\param blk current basic block. maybe NULL, which means that /// the instruction must be optimized without context /// \\param ins instruction to optimize; it is always a top-level instruction. /// the callback may not delete the instruction but may /// convert it into nop (see mblock_t::make_nop). to optimize /// sub-instructions, visit them using minsn_visitor_t. /// sub-instructions may not be converted into nop but /// can be converted to "mov x,x". for example: /// add x,0,x => mov x,x /// this callback may change other instructions in the block, /// but should do this with care, e.g. to no break the /// propagation algorithm if called with OPTI_NO_LDXOPT. /// \\param optflags combination of \\ref OPTI_ bits /// \\return number of changes made to the instruction. /// if after this call the instruction's use/def lists have changed, /// you must mark the block level lists as dirty (see mark_lists_dirty) virtualint idaapi func(mblock_t *blk, minsn_t *ins, int optflags)= 0; };
/// User defined callback to optimize microcode blocks structoptblock_t { /// Optimize a block. /// This function usually performs the optimizations that require analyzing /// the entire block and/or its neighbors. For example it can recognize /// patterns and perform conversions like: /// b0: b0: /// ... ... /// jnz x, 0, @b2 => jnz x, 0, @b2 /// b1: b1: /// add x, 0, y mov x, y /// ... ... /// \\param blk Basic block to optimize as a whole. /// \\return number of changes made to the block. See also mark_lists_dirty. virtualint idaapi func(mblock_t *blk)= 0; };
structgoto_optimizer_t :publicoptblock_t { virtualint idaapi func(mblock_t *blk)override { if ( handle_goto_chain(blk) ) return1; return0; } //lint -e{818} ins could be made const boolhandle_goto_chain(mblock_t *blk)const { minsn_t *mgoto = blk->tail; if ( mgoto == NULL || mgoto->opcode != m_goto ) returnfalse;
intvec_t visited; int t0 = mgoto->l.b; int i = t0; mba_t *mba = blk->mba;
// follow the goto chain while ( true ) { if ( !visited.add_unique(i) ) returnfalse; // an endless loop, prefer to keep things as is mblock_t *b = mba->get_mblock(i); // skip assertion instructions and find first regular instruction minsn_t *m2 = getf_reginsn(b->head); if ( m2 == NULL || m2->opcode != m_goto ) break; // not a goto i = m2->l.b; } if ( i == t0 ) returnfalse; // not a chain
// all ok, found a goto chain mgoto->l.b = i; // jump directly to the end of the chain
// fix the successor/predecessor lists blk->succset[0] = i; mba->get_mblock(i)->predset.add(blk->serial); mba->get_mblock(t0)->predset.del(blk->serial);
// since we changed the control flow graph, invalidate the use/def chains. // stricly speaking it is not really necessary in our plugin because // we did not move around any microcode operands. mba->mark_chains_dirty();
// it is a good idea to verify microcode after each change // however, it may be time consuming, so comment it out eventually mba->verify(true); returntrue; } };
7. gco_info_t::append_to_list 把用户选中的寄存器转成 mlist_t (因为后面的 API 要求 mlist_t)
这一步是非常迷惑的操作, 先看代码
1 2 3 4 5 6 7 8 9 10 11 12 13
// prepare mlist for the current operand. we will use to to find references // to the current operand in the microcode. usually we do not use operands // (processor instruction operands nor microcode instruction operands) // for searches. instead, we build a 'mlist_t' instance and use it. mlist_tlist; if (!gco.append_to_list(&list, mba)) { warning("Failed to represent %s as microcode list", gco.name.c_str()); delete mba; returnfalse; } // list 中只有选中的寄存器对应的 micro 寄存器,例如 eax.4
{ // get use-def chains. do it inside a block in order to release // the chains immediately after using them mbl_graph_t* graph = mba->get_graph(); chain_keeper_t ud = graph->get_ud(GC_REGS_AND_STKVARS); chain_keeper_t du = graph->get_du(GC_REGS_AND_STKVARS); }
mbl_graph_t * 会在基本块退出时自动释放。
10. 根据选中寄存器 use/def 情况调用 collect_xrefs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
if (gco.is_use()) { // collect definitions collect_xrefs(&xrefs, ctx, mop, list, ud, false); ndefs = xrefs.size(); // register is used by the current instruction - add 'ea' as use-addr xrefs.add_unique(ea); }
if (gco.is_def()) { // register is defined by the current instruction - add 'ea' as def-addr if (xrefs.add_unique(ea)) ndefs = xrefs.size(); // collect using collect_xrefs(&xrefs, ctx, mop, list, du, true); }
如果选中的寄存器是 use 则使用 ud 去调用 collect_xrefs 且最后一个参数 find_uses = true 如果选中的寄存器是 def 则使用 du 去调用 collect_xrefs 且最后一个参数 find_uses = false find_uses 决定的是基本块内搜索的方向。
collect_xrefs 分析
collect_xrefs 主要的作用是调用 collect_block_xrefs 函数收集基本块内的交叉引用信息, 先收集当前基本块的交叉引用,再通过 du 链查找其它 use 或 def 基本块,并收集其信息。 ud/du 链中只存储了use或def的基本块信息,没有具体到某一条指令。
staticvoidcollect_xrefs( eavec_t* out, constop_parent_info_t& ctx, constmop_t* mop, mlist_tlist, constgraph_chains_t& du, bool find_uses) { // first collect the references in the current block minsn_t* start = find_uses ? ctx.topins->next : ctx.topins->prev; collect_block_xrefs(out, &list, ctx.blk, start, find_uses);
// then find references in other blocks int serial = ctx.blk->serial; // block number of the operand constblock_chains_t& bc = du[serial]; // chains of that block constchain_t* ch = bc.get_chain(*mop); // chain of the operand if (ch == NULL) return; // odd for (int i = 0; i < ch->size(); i++) { int bn = ch->at(i); mblock_t* b = ctx.mba->get_mblock(bn); // block that uses the instruction minsn_t* ins = find_uses ? b->head : b->tail; mlist_t tmp = list; // 注意这里每次循环都重新赋值,不是传址 collect_block_xrefs(out, &tmp, b, ins, find_uses); } }
从这段代码我们可以窥探 IDA 的 du/ud 链条的设计, IDA 用 graph_chains_t 数据类型来表示整个函数的du/ud 链,它其实是一个 vector 存储的是 block_chains_t 其下标与基本块的 serial 相对应。 block_chains_t 是一个 map 结构,将基本块内的所有 mop 操作数与对应的 ud/du 链关联。 chain_keeper_t 其实是一个三级结构,第一级由基本块下标索引,第二级由操作数索引,第三级才是操作数对应的 du 链。
staticvoidcollect_block_xrefs( eavec_t* out, mlist_t* list, constmblock_t* blk, constminsn_t* ins, bool find_uses) { for (constminsn_t* p = ins; p != NULL && !list->empty(); p = find_uses ? p->next : p->prev) { mlist_t use = blk->build_use_list(*p, MUST_ACCESS); // things used by the insn mlist_t def = blk->build_def_list(*p, MUST_ACCESS); // things defined by the insn mlist_t& plst = find_uses ? use : def; if (list->has_common(plst)) out->add_unique(p->ea); // this microinstruction seems to use our operand list->sub(def); } }
msg("Successfully generated microcode for %a..%a\\n", ea1, ea2); vd_printer_t vp; mba->print(vp);
// We must explicitly delete the microcode array delete mba;
主要演示了 mba_ranges_t 的构造。
vds14
略
vds15
演示使用 get_valranges() 获取寄存器的值域.
这个插件也受到诸多的限制
1 2 3 4 5 6 7 8
* Unfortunately this plugin is of limited use because: * - simple cases where a single value is assigned to a register * are automatically handled by the decompiler and the register * is replaced by the value * - too complex cases where the register gets its value from untrackable * sources, it fails * - only value ranges at the basic block start are shown
bool idaapi plugin_ctx_t::run(size_t) { ea_t ea = get_screen_ea(); func_t* pfn = get_func(ea); if (pfn == NULL) { msg("Please position the cursor within a function\\n"); returntrue; }
flags_t F = get_flags(ea); if (!is_code(F)) { msg("Please position the cursor on an instruction\\n\\n"); returntrue; }
gco_info_t gco; if (!get_current_operand(&gco)) { msg("Could not find a register or stkvar in the current operand\\n"); returntrue; }
// prepare mlist for the current operand mlist_tlist; if (!gco.append_to_list(&list, mba)) { msg("Failed to represent %s as microcode list\\n", gco.name.c_str()); delete mba; returnfalse; }
// find micro-insn nearest to EA constmblock_t* b; constminsn_t* ins; if (!find_insn_with_list(&b, &ins, mba, ea, list, gco.is_def())) { msg("Could not find %s after %a in the microcode, sorry\\n" "Probably it has been optimized away\\n", gco.name.c_str(), ea); delete mba; returnfalse; }
valrng_t vr; int vrflags = VR_AT_START | VR_EXACT; if (b->get_valranges(&vr, gco.cvt_to_ivl(), ins, vrflags)) { qstring vrstr; vr.print(&vrstr); msg("Value ranges of %s at %a: %s\\n", gco.name.c_str(), ins->ea, vrstr.c_str()); } else { msg("Cannot find value ranges of %s\\n", gco.name.c_str()); }
// We must explicitly delete the microcode array delete mba; returntrue; }
#define VR_AT_START 0x0000 ///< get value ranges before the instruction or ///< at the block start (if M is NULL) #define VR_AT_END 0x0001 ///< get value ranges after the instruction or ///< at the block end, just after the last ///< instruction (if M is NULL) #define VR_EXACT 0x0002 ///< find exact match. if not set, the returned ///< valrng size will be >= vivl.size
具体调用代码如下:
1 2 3 4 5 6 7 8 9 10 11 12
valrng_t vr; int vrflags = VR_AT_START | VR_EXACT; if (b->get_valranges(&vr, gco.cvt_to_ivl(), ins, vrflags)) { qstring vrstr; vr.print(&vrstr); msg("Value ranges of %s at %a: %s\\n", gco.name.c_str(), ins->ea, vrstr.c_str()); }
// find the first top micro-instruction after EA that uses or defines LIST staticboolfind_insn_with_list( constmblock_t** blk, constminsn_t** ins, mba_t* mba, ea_t _ea, constmlist_t& _list, bool _is_dest) { structida_localtop_visitor_t :publicminsn_visitor_t { constmblock_t* b = nullptr; constminsn_t* ins = nullptr; ea_t ea; constmlist_t& list; bool is_dest; top_visitor_t(ea_t e, constmlist_t& l, bool d) : ea(e), list(l), is_dest(d) {} int idaapi visit_minsn(void)override { if (topins->ea == ea) // 当前指令刚好与目标 ea 匹配 { // exact match b = blk; // The context info used by visitors ins = topins; returntrue; } if (blk->start <= ea && topins->ea > ea) // blk-start <= ea < topIns->ea { mlist_t defuse = is_dest ? blk->build_def_list(*topins, MUST_ACCESS) : blk->build_use_list(*topins, MUST_ACCESS); if (defuse.has_common(list) && (ins == nullptr || topins->ea < ins->ea)) { // nearest use/def to EA b = blk; ins = topins; } } returnfalse; } }; top_visitor_ttv(_ea, _list, _is_dest);
* mov #N, var.4 mov #N, var.4 * xor var@1.1, #M, var@1.1 => mov #NM, var@1.1 * where NM == (N>>8)^M * * We need this rule because the decompiler cannot propagate the second * byte of VAR into the xor instruction. * * The XOR opcode can be replaced by any other, we donot rely on it. * Also operand sizes can vary.
//-------------------------------------------------------------------------- structglbprop_t :publicoptinsn_t { virtualint idaapi func(mblock_t* blk, minsn_t* ins, int/*optflags*/)override { if (ins->r.t != mop_n) // 判断第二个操作数必须为常数 xor var@1.1, #M, var@1.1 return0; // we want a constant as the second operand
if (ins->r.size > 2) // 只有长度为1时,decompiler 无法处理 return0; // bigger sizes are handled by the decompiler without problems
// build list of data used by INS mlist_t use = blk->build_use_list(*ins, MAY_ACCESS);
// find the instruction that defines anything from USE constminsn_t* di = find_prev_def(blk, use, ins); // 寻找对当前操作数的定值指令 if (di == NULL) return0; // not found
if (di->opcode != m_mov || di->l.t != mop_n)// 判断定值指令的第一个操作数是否为立即数 return0; // must be 'mov #N, ...'
// compare the destination of DI and the left operand of INS mop_t v1 = ins->l; constmop_t& v2 = di->d; if (v1.t != v2.t) return0; // operand types are different
// if operand sizes are the same, hexrays can handle it without our help // if the size of INS->L is bigger than the size of DI->D, may not propagate // we handle only the case where the size of INS->L is less than the size // of DI->D because the hexrays sometimes has problems with it. if (v1.size >= v2.size) return0;
// this is not very efficient... but acceptable int off = 0;//v1 -> var@1.1 不是很理解这个循环在寻找什么东西? while (!v1.equal_mops(v2, EQ_IGNSIZE)) // EQ_IGNSIZE:ignore source operand sizes { if (++off >= v2.size) return0; if (!v1.shift_mop(-1)) return0; }
// found a match! shift N in order to propagate the correct part of it // we don't truncate the high bits, it will happen in make_number() uint64 N = di->l.value(false); N >>= (off * 8);
// store the new value in INS ins->l.make_number(N, ins->l.size, di->l.nnn->ea, di->l.nnn->opnum);
// optimize the instruction, it is highly likely that we will get // a much simpler instruction like 'mov' ins->optimize_solo();
return1; // success, we made one change } };
向前(低地址)寻找引用的定值指令
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// find backwards the instruction that defines anything from LST staticconstminsn_t* find_prev_def( constmblock_t* blk, constmlist_t& lst, constminsn_t* ins) { constminsn_t* p = ins; while ((p = p->prev) != NULL) { mlist_t def = blk->build_def_list(*p, MAY_ACCESS | FULL_XDSU); if (def.has_common(lst)) break; } return p; }
//-------------------------------------------------------------------------- bool idaapi plugin_ctx_t::run(size_t) { // Currently the user can only add new regvals. Since the main goal of // this plugin to illustrate how to modify the microcode, deleting or showing // fixed regvals is left as an exercise to the reader. staticconstchar form[] = "Specify known register value\\n" "<~A~ddress :$::16::>\\n" "<~R~egister:q::16::>\\n" "<~V~alue :L::16::>\\n" "\\n"; static qstring regname; staticfixed_regval_info_t fri; CASSERT(sizeof(fri.ea) == sizeof(ea_t)); CASSERT(sizeof(fri.value) == sizeof(uint64)); while (ask_form(form, &fri.ea, ®name, &fri.value)) { reg_info_t ri; if (!parse_reg_name(&ri, regname.c_str())) { warning("Sorry, bad register name: %s", regname.c_str()); continue; } fri.nbytes = ri.size; fri.reg = reg2mreg(ri.reg); if (fri.reg == mr_none) { warning("Failed to convert to microregister: %s", regname.c_str()); continue; // apparently this register is not supported by the decompiler } bool found = false; for (auto& rv : user_regvals) { if (rv.ea == fri.ea && rv.reg == fri.reg) { rv.nbytes = fri.nbytes; rv.value = fri.value; found = true; break; } } if (!found) user_regvals.push_back(fri); staticconstchar fmt[] = "Register %s at %a is considered to be equal to 0x%" FMT_64 "X\\n"; info(fmt, regname.c_str(), fri.ea, fri.value); msg(fmt, regname.c_str(), fri.ea, fri.value); returntrue; } returnfalse; }
// This callback intercepts control as soon microcode is generated // and adds necessary assertions to it. These assertions will inform // the decompiler about the user-specifed register values. ssize_t idaapi plugin_ctx_t::hr_callback( void* ud, hexrays_event_t event, va_list va) { plugin_ctx_t& ctx = *(plugin_ctx_t*)ud; if (event == hxe_microcode) { mba_t* mba = va_arg(va, mba_t*); ctx.insert_assertions(mba); } return0; }
voidplugin_ctx_t::insert_assertions(mba_t* mba)const { func_t* pfn = mba->get_curfunc(); if (pfn == NULL) return; // currently only functions are supported, not snippets
// filter out the addresses outside of the decompiled function fixed_regvals_t regvals; for (constauto& rv : user_regvals) // 遍历已经添加的修改项 { if (func_contains(pfn, rv.ea)) // 判断是否为当前函数中的修改项 regvals.push_back(rv); } if (regvals.empty()) return; // no addresses inside our function
structida_localassertion_inserter_t :publicminsn_visitor_t { fixed_regvals_t& regvals; virtualint idaapi visit_minsn(void)override { for (size_t i = 0; i < regvals.size(); i++) { fixed_regval_info_t& fri = regvals[i]; if (curins->ea == fri.ea) // 当前指令的地址与修改项的地址匹配 { // create "mov #value, reg" minsn_t* m = create_mov(fri); // insert it before the current instruction blk->insert_into_block(m, curins->prev); // remove this fixed regval from consideration regvals.erase(regvals.begin() + i); --i; } } return regvals.empty(); // stop if regvals becomes empty } assertion_inserter_t(fixed_regvals_t& fr) : regvals(fr) {} }; assertion_inserter_tai(regvals);
// find the specified addresses in mba and insert assertions. // note: if the address specified by the user has the 'nop' instruction, it // won't be translated into mircocode. we may fail to add an assertion because // of this. the user should not specify the address of a 'nop' instruction // or the logic in visit_minsn() should be improved to handle the situation // when the specified address is not present in the microcode. mba->for_all_topinsns(ai);
// This will work if IDA_DUMPDIR envvar points to a directory mba->dump();
// it is a good idea to ensure that we did not break anything // call the verifier for that mba->verify(true); }
上面这段代码中最核心的代码就是插入指令的代码
1 2 3 4 5 6 7
// create "mov #value, reg" minsn_t* m = create_mov(fri); // insert it before the current instruction blk->insert_into_block(m, curins->prev); // remove this fixed regval from consideration regvals.erase(regvals.begin() + i); --i;
create_mov 的代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14
staticminsn_t* create_mov(constfixed_regval_info_t& fri) { minsn_t* m = newminsn_t(fri.ea); m->opcode = m_mov; m->l.make_number(fri.value, fri.nbytes, fri.ea); m->d.make_reg(fri.reg, fri.nbytes); // declare this 'mov' as an assertion. // assertions are deleted before generating ctree and don't // appear in the output m->iprops |= IPROP_ASSERT; // Just for debugging let us print the constructed assertion: msg("Created insn: %s\\n", m->dstr()); return m; }
// recognize "x | ~x" and replace by -1 structsubinsn_optimizer_t :publicminsn_visitor_t { int cnt = 0; int idaapi visit_minsn()override// for each instruction... { // THE CORE OF THE PLUGIN IS HERE: // check the pattern "x | ~x" if ( curins->opcode == m_or && curins->r.is_insn(m_bnot) && curins->l == curins->r.d->l ) { if ( !curins->l.has_side_effects() ) // avoid destroying side effects { // pattern matched, convert to "mov -1, ..." curins->opcode = m_mov; curins->l.make_number(-1, curins->r.size); curins->r.erase(); cnt = cnt + 1; // number of changes we made } } return0; // continue traversal } };
逻辑比较简单,遍历指令过程中若遇到 x | ~x 表达式,则将其替换成 mov -1 指令。
通过代码可以看出,这种优化只能处理 not 是 or 的子指令的情况,IDA 设计的指令嵌套确实很方便这种优化,可以不要考虑 删除掉 not 或 or 对于其它指令的影响。