1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """
32 Module instrumentation.
33
34 @group Instrumentation:
35 Module
36
37 @group Warnings:
38 DebugSymbolsWarning
39 """
40
41 from __future__ import with_statement
42
43 __revision__ = "$Id: module.py 1299 2013-12-20 09:30:55Z qvasimodo $"
44
45 __all__ = ['Module', 'DebugSymbolsWarning']
46
47 import win32
48 from textio import HexInput, HexDump
49 from util import PathOperations
50
51
52 Process = None
53
54 import os
55 import warnings
56 import traceback
61 """
62 This warning is issued if the support for debug symbols
63 isn't working properly.
64 """
65
69 """
70 Interface to a DLL library loaded in the context of another process.
71
72 @group Properties:
73 get_base, get_filename, get_name, get_size, get_entry_point,
74 get_process, set_process, get_pid,
75 get_handle, set_handle, open_handle, close_handle
76
77 @group Labels:
78 get_label, get_label_at_address, is_address_here,
79 resolve, resolve_label, match_name
80
81 @group Symbols:
82 load_symbols, unload_symbols, get_symbols, iter_symbols,
83 resolve_symbol, get_symbol_at_address
84
85 @group Modules snapshot:
86 clear
87
88 @type unknown: str
89 @cvar unknown: Suggested tag for unknown modules.
90
91 @type lpBaseOfDll: int
92 @ivar lpBaseOfDll: Base of DLL module.
93 Use L{get_base} instead.
94
95 @type hFile: L{FileHandle}
96 @ivar hFile: Handle to the module file.
97 Use L{get_handle} instead.
98
99 @type fileName: str
100 @ivar fileName: Module filename.
101 Use L{get_filename} instead.
102
103 @type SizeOfImage: int
104 @ivar SizeOfImage: Size of the module.
105 Use L{get_size} instead.
106
107 @type EntryPoint: int
108 @ivar EntryPoint: Entry point of the module.
109 Use L{get_entry_point} instead.
110
111 @type process: L{Process}
112 @ivar process: Process where the module is loaded.
113 Use the L{get_process} method instead.
114 """
115
116 unknown = '<unknown>'
117
119 """
120 Internally used by L{Module} to enumerate symbols in a module.
121 """
122
123 - def __init__(self, undecorate = False):
124 self.symbols = list()
125 self.undecorate = undecorate
126
127 - def __call__(self, SymbolName, SymbolAddress, SymbolSize, UserContext):
128 """
129 Callback that receives symbols and stores them in a Python list.
130 """
131 if self.undecorate:
132 try:
133 SymbolName = win32.UnDecorateSymbolName(SymbolName)
134 except Exception, e:
135 pass
136 self.symbols.append( (SymbolName, SymbolAddress, SymbolSize) )
137 return win32.TRUE
138
139 - def __init__(self, lpBaseOfDll, hFile = None, fileName = None,
140 SizeOfImage = None,
141 EntryPoint = None,
142 process = None):
143 """
144 @type lpBaseOfDll: str
145 @param lpBaseOfDll: Base address of the module.
146
147 @type hFile: L{FileHandle}
148 @param hFile: (Optional) Handle to the module file.
149
150 @type fileName: str
151 @param fileName: (Optional) Module filename.
152
153 @type SizeOfImage: int
154 @param SizeOfImage: (Optional) Size of the module.
155
156 @type EntryPoint: int
157 @param EntryPoint: (Optional) Entry point of the module.
158
159 @type process: L{Process}
160 @param process: (Optional) Process where the module is loaded.
161 """
162 self.lpBaseOfDll = lpBaseOfDll
163 self.fileName = fileName
164 self.SizeOfImage = SizeOfImage
165 self.EntryPoint = EntryPoint
166
167 self.__symbols = list()
168
169 self.set_handle(hFile)
170 self.set_process(process)
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
190 """
191 @rtype: L{Handle}
192 @return: File handle.
193 Returns C{None} if unknown.
194 """
195
196 return self.__hFile
197
206
207 hFile = property(get_handle, set_handle, doc="")
208
210 """
211 @rtype: L{Process}
212 @return: Parent Process object.
213 Returns C{None} if unknown.
214 """
215
216 return self.__process
217
219 """
220 Manually set the parent process. Use with care!
221
222 @type process: L{Process}
223 @param process: (Optional) Process object. Use C{None} for no process.
224 """
225 if process is None:
226 self.__process = None
227 else:
228 global Process
229 if Process is None:
230 from process import Process
231 if not isinstance(process, Process):
232 msg = "Parent process must be a Process instance, "
233 msg += "got %s instead" % type(process)
234 raise TypeError(msg)
235 self.__process = process
236
237 process = property(get_process, set_process, doc="")
238
240 """
241 @rtype: int or None
242 @return: Parent process global ID.
243 Returns C{None} on error.
244 """
245 process = self.get_process()
246 if process is not None:
247 return process.get_pid()
248
250 """
251 @rtype: int or None
252 @return: Base address of the module.
253 Returns C{None} if unknown.
254 """
255 return self.lpBaseOfDll
256
266
267 - def get_entry_point(self):
268 """
269 @rtype: int or None
270 @return: Entry point of the module.
271 Returns C{None} if unknown.
272 """
273 if not self.EntryPoint:
274 self.__get_size_and_entry_point()
275 return self.EntryPoint
276
278 "Get the size and entry point of the module using the Win32 API."
279 process = self.get_process()
280 if process:
281 try:
282 handle = process.get_handle( win32.PROCESS_VM_READ |
283 win32.PROCESS_QUERY_INFORMATION )
284 base = self.get_base()
285 mi = win32.GetModuleInformation(handle, base)
286 self.SizeOfImage = mi.SizeOfImage
287 self.EntryPoint = mi.EntryPoint
288 except WindowsError, e:
289 warnings.warn(
290 "Cannot get size and entry point of module %s, reason: %s"\
291 % (self.get_name(), e.strerror), RuntimeWarning)
292
306
326
328 """
329 @rtype: str
330 @return: Module name, as used in labels.
331
332 @warning: Names are B{NOT} guaranteed to be unique.
333
334 If you need unique identification for a loaded module,
335 use the base address instead.
336
337 @see: L{get_label}
338 """
339 pathname = self.get_filename()
340 if pathname:
341 modName = self.__filename_to_modname(pathname)
342 if isinstance(modName, unicode):
343 try:
344 modName = modName.encode('cp1252')
345 except UnicodeEncodeError, e:
346 warnings.warn(str(e))
347 else:
348 modName = "0x%x" % self.get_base()
349 return modName
350
352 """
353 @rtype: bool
354 @return:
355 C{True} if the given name could refer to this module.
356 It may not be exactly the same returned by L{get_name}.
357 """
358
359
360
361 my_name = self.get_name().lower()
362 if name.lower() == my_name:
363 return True
364
365
366 try:
367 base = HexInput.integer(name)
368 except ValueError:
369 base = None
370 if base is not None and base == self.get_base():
371 return True
372
373
374
375 modName = self.__filename_to_modname(name)
376 if modName.lower() == my_name:
377 return True
378
379
380 return False
381
382
383
406
408 """
409 Closes the handle to the module.
410
411 @note: Normally you don't need to call this method. All handles
412 created by I{WinAppDbg} are automatically closed when the garbage
413 collector claims them. So unless you've been tinkering with it,
414 setting L{hFile} to C{None} should be enough.
415 """
416 try:
417 if hasattr(self.hFile, 'close'):
418 self.hFile.close()
419 elif self.hFile not in (None, win32.INVALID_HANDLE_VALUE):
420 win32.CloseHandle(self.hFile)
421 finally:
422 self.hFile = None
423
432
434 """
435 Clears the resources held by this object.
436 """
437 try:
438 self.set_process(None)
439 finally:
440 self.close_handle()
441
442
443
444
445
446
447
449 """
450 Loads the debugging symbols for a module.
451 Automatically called by L{get_symbols}.
452 """
453 if win32.PROCESS_ALL_ACCESS == win32.PROCESS_ALL_ACCESS_VISTA:
454 dwAccess = win32.PROCESS_QUERY_LIMITED_INFORMATION
455 else:
456 dwAccess = win32.PROCESS_QUERY_INFORMATION
457 hProcess = self.get_process().get_handle(dwAccess)
458 hFile = self.hFile
459 BaseOfDll = self.get_base()
460 SizeOfDll = self.get_size()
461 Enumerator = self._SymbolEnumerator()
462 try:
463 win32.SymInitialize(hProcess)
464 SymOptions = win32.SymGetOptions()
465 SymOptions |= (
466 win32.SYMOPT_ALLOW_ZERO_ADDRESS |
467 win32.SYMOPT_CASE_INSENSITIVE |
468 win32.SYMOPT_FAVOR_COMPRESSED |
469 win32.SYMOPT_INCLUDE_32BIT_MODULES |
470 win32.SYMOPT_UNDNAME
471 )
472 SymOptions &= ~(
473 win32.SYMOPT_LOAD_LINES |
474 win32.SYMOPT_NO_IMAGE_SEARCH |
475 win32.SYMOPT_NO_CPP |
476 win32.SYMOPT_IGNORE_NT_SYMPATH
477 )
478 win32.SymSetOptions(SymOptions)
479 try:
480 win32.SymSetOptions(
481 SymOptions | win32.SYMOPT_ALLOW_ABSOLUTE_SYMBOLS)
482 except WindowsError:
483 pass
484 try:
485 try:
486 success = win32.SymLoadModule64(
487 hProcess, hFile, None, None, BaseOfDll, SizeOfDll)
488 except WindowsError:
489 success = 0
490 if not success:
491 ImageName = self.get_filename()
492 success = win32.SymLoadModule64(
493 hProcess, None, ImageName, None, BaseOfDll, SizeOfDll)
494 if success:
495 try:
496 win32.SymEnumerateSymbols64(
497 hProcess, BaseOfDll, Enumerator)
498 finally:
499 win32.SymUnloadModule64(hProcess, BaseOfDll)
500 finally:
501 win32.SymCleanup(hProcess)
502 except WindowsError, e:
503 msg = "Cannot load debug symbols for process ID %d, reason:\n%s"
504 msg = msg % (self.get_pid(), traceback.format_exc(e))
505 warnings.warn(msg, DebugSymbolsWarning)
506 self.__symbols = Enumerator.symbols
507
509 """
510 Unloads the debugging symbols for a module.
511 """
512 self.__symbols = list()
513
515 """
516 Returns the debugging symbols for a module.
517 The symbols are automatically loaded when needed.
518
519 @rtype: list of tuple( str, int, int )
520 @return: List of symbols.
521 Each symbol is represented by a tuple that contains:
522 - Symbol name
523 - Symbol memory address
524 - Symbol size in bytes
525 """
526 if not self.__symbols:
527 self.load_symbols()
528 return list(self.__symbols)
529
531 """
532 Returns an iterator for the debugging symbols in a module,
533 in no particular order.
534 The symbols are automatically loaded when needed.
535
536 @rtype: iterator of tuple( str, int, int )
537 @return: Iterator of symbols.
538 Each symbol is represented by a tuple that contains:
539 - Symbol name
540 - Symbol memory address
541 - Symbol size in bytes
542 """
543 if not self.__symbols:
544 self.load_symbols()
545 return self.__symbols.__iter__()
546
548 """
549 Resolves a debugging symbol's address.
550
551 @type symbol: str
552 @param symbol: Name of the symbol to resolve.
553
554 @type bCaseSensitive: bool
555 @param bCaseSensitive: C{True} for case sensitive matches,
556 C{False} for case insensitive.
557
558 @rtype: int or None
559 @return: Memory address of symbol. C{None} if not found.
560 """
561 if bCaseSensitive:
562 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
563 if symbol == SymbolName:
564 return SymbolAddress
565 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
566 try:
567 SymbolName = win32.UnDecorateSymbolName(SymbolName)
568 except Exception, e:
569 continue
570 if symbol == SymbolName:
571 return SymbolAddress
572 else:
573 symbol = symbol.lower()
574 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
575 if symbol == SymbolName.lower():
576 return SymbolAddress
577 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
578 try:
579 SymbolName = win32.UnDecorateSymbolName(SymbolName)
580 except Exception, e:
581 continue
582 if symbol == SymbolName.lower():
583 return SymbolAddress
584
586 """
587 Tries to find the closest matching symbol for the given address.
588
589 @type address: int
590 @param address: Memory address to query.
591
592 @rtype: None or tuple( str, int, int )
593 @return: Returns a tuple consisting of:
594 - Name
595 - Address
596 - Size (in bytes)
597 Returns C{None} if no symbol could be matched.
598 """
599 found = None
600 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
601 if SymbolAddress > address:
602 continue
603 if SymbolAddress + SymbolSize > address:
604 if not found or found[1] < SymbolAddress:
605 found = (SymbolName, SymbolAddress, SymbolSize)
606 return found
607
608
609
610 - def get_label(self, function = None, offset = None):
611 """
612 Retrieves the label for the given function of this module or the module
613 base address if no function name is given.
614
615 @type function: str
616 @param function: (Optional) Exported function name.
617
618 @type offset: int
619 @param offset: (Optional) Offset from the module base address.
620
621 @rtype: str
622 @return: Label for the module base address, plus the offset if given.
623 """
624 return _ModuleContainer.parse_label(self.get_name(), function, offset)
625
627 """
628 Creates a label from the given memory address.
629
630 If the address belongs to the module, the label is made relative to
631 it's base address.
632
633 @type address: int
634 @param address: Memory address.
635
636 @type offset: None or int
637 @param offset: (Optional) Offset value.
638
639 @rtype: str
640 @return: Label pointing to the given address.
641 """
642
643
644 if offset:
645 address = address + offset
646
647
648 module = self.get_name()
649 function = None
650 offset = address - self.get_base()
651
652
653
654 start = self.get_entry_point()
655 if start and start <= address:
656 function = "start"
657 offset = address - start
658
659
660
661 try:
662 symbol = self.get_symbol_at_address(address)
663 if symbol:
664 (SymbolName, SymbolAddress, SymbolSize) = symbol
665 new_offset = address - SymbolAddress
666 if new_offset <= offset:
667 function = SymbolName
668 offset = new_offset
669 except WindowsError, e:
670 pass
671
672
673 return _ModuleContainer.parse_label(module, function, offset)
674
676 """
677 Tries to determine if the given address belongs to this module.
678
679 @type address: int
680 @param address: Memory address.
681
682 @rtype: bool or None
683 @return: C{True} if the address belongs to the module,
684 C{False} if it doesn't,
685 and C{None} if it can't be determined.
686 """
687 base = self.get_base()
688 size = self.get_size()
689 if base and size:
690 return base <= address < (base + size)
691 return None
692
735
737 """
738 Resolves a label for this module only. If the label refers to another
739 module, an exception is raised.
740
741 @type label: str
742 @param label: Label to resolve.
743
744 @rtype: int
745 @return: Memory address pointed to by the label.
746
747 @raise ValueError: The label is malformed or impossible to resolve.
748 @raise RuntimeError: Cannot resolve the module or function.
749 """
750
751
752
753 aProcess = self.get_process()
754 if aProcess is not None:
755 (module, procedure, offset) = aProcess.split_label(label)
756 else:
757 (module, procedure, offset) = _ModuleContainer.split_label(label)
758
759
760
761 if module and not self.match_name(module):
762 raise RuntimeError("Label does not belong to this module")
763
764
765 if procedure:
766 address = self.resolve(procedure)
767 if address is None:
768
769
770 address = self.resolve_symbol(procedure)
771
772
773 if address is None and procedure == "start":
774 address = self.get_entry_point()
775
776
777 if address is None:
778 if not module:
779 module = self.get_name()
780 msg = "Can't find procedure %s in module %s"
781 raise RuntimeError(msg % (procedure, module))
782
783
784 else:
785 address = self.get_base()
786
787
788 if offset:
789 address = address + offset
790 return address
791
801 """
802 Encapsulates the capability to contain Module objects.
803
804 @note: Labels are an approximated way of referencing memory locations
805 across different executions of the same process, or different processes
806 with common modules. They are not meant to be perfectly unique, and
807 some errors may occur when multiple modules with the same name are
808 loaded, or when module filenames can't be retrieved.
809
810 @group Modules snapshot:
811 scan_modules,
812 get_module, get_module_bases, get_module_count,
813 get_module_at_address, get_module_by_name,
814 has_module, iter_modules, iter_module_addresses,
815 clear_modules
816
817 @group Labels:
818 parse_label, split_label, sanitize_label, resolve_label,
819 resolve_label_components, get_label_at_address, split_label_strict,
820 split_label_fuzzy
821
822 @group Symbols:
823 load_symbols, unload_symbols, get_symbols, iter_symbols,
824 resolve_symbol, get_symbol_at_address
825
826 @group Debugging:
827 is_system_defined_breakpoint, get_system_breakpoint,
828 get_user_breakpoint, get_breakin_breakpoint,
829 get_wow64_system_breakpoint, get_wow64_user_breakpoint,
830 get_wow64_breakin_breakpoint, get_break_on_error_ptr
831 """
832
839
841 """
842 Private method to automatically initialize the snapshot
843 when you try to use it without calling any of the scan_*
844 methods first. You don't need to call this yourself.
845 """
846 if not self.__moduleDict:
847 try:
848 self.scan_modules()
849 except WindowsError:
850 pass
851
853 """
854 @type anObject: L{Module}, int
855 @param anObject:
856 - C{Module}: Module object to look for.
857 - C{int}: Base address of the DLL to look for.
858
859 @rtype: bool
860 @return: C{True} if the snapshot contains
861 a L{Module} object with the same base address.
862 """
863 if isinstance(anObject, Module):
864 anObject = anObject.lpBaseOfDll
865 return self.has_module(anObject)
866
868 """
869 @see: L{iter_modules}
870 @rtype: dictionary-valueiterator
871 @return: Iterator of L{Module} objects in this snapshot.
872 """
873 return self.iter_modules()
874
876 """
877 @see: L{get_module_count}
878 @rtype: int
879 @return: Count of L{Module} objects in this snapshot.
880 """
881 return self.get_module_count()
882
884 """
885 @type lpBaseOfDll: int
886 @param lpBaseOfDll: Base address of the DLL to look for.
887
888 @rtype: bool
889 @return: C{True} if the snapshot contains a
890 L{Module} object with the given base address.
891 """
892 self.__initialize_snapshot()
893 return lpBaseOfDll in self.__moduleDict
894
896 """
897 @type lpBaseOfDll: int
898 @param lpBaseOfDll: Base address of the DLL to look for.
899
900 @rtype: L{Module}
901 @return: Module object with the given base address.
902 """
903 self.__initialize_snapshot()
904 if lpBaseOfDll not in self.__moduleDict:
905 msg = "Unknown DLL base address %s"
906 msg = msg % HexDump.address(lpBaseOfDll)
907 raise KeyError(msg)
908 return self.__moduleDict[lpBaseOfDll]
909
911 """
912 @see: L{iter_modules}
913 @rtype: dictionary-keyiterator
914 @return: Iterator of DLL base addresses in this snapshot.
915 """
916 self.__initialize_snapshot()
917 return self.__moduleDict.iterkeys()
918
920 """
921 @see: L{iter_module_addresses}
922 @rtype: dictionary-valueiterator
923 @return: Iterator of L{Module} objects in this snapshot.
924 """
925 self.__initialize_snapshot()
926 return self.__moduleDict.itervalues()
927
929 """
930 @see: L{iter_module_addresses}
931 @rtype: list( int... )
932 @return: List of DLL base addresses in this snapshot.
933 """
934 self.__initialize_snapshot()
935 return self.__moduleDict.keys()
936
938 """
939 @rtype: int
940 @return: Count of L{Module} objects in this snapshot.
941 """
942 self.__initialize_snapshot()
943 return len(self.__moduleDict)
944
945
946
948 """
949 @type modName: int
950 @param modName:
951 Name of the module to look for, as returned by L{Module.get_name}.
952 If two or more modules with the same name are loaded, only one
953 of the matching modules is returned.
954
955 You can also pass a full pathname to the DLL file.
956 This works correctly even if two modules with the same name
957 are loaded from different paths.
958
959 @rtype: L{Module}
960 @return: C{Module} object that best matches the given name.
961 Returns C{None} if no C{Module} can be found.
962 """
963
964
965
966 modName = modName.lower()
967
968
969 if PathOperations.path_is_absolute(modName):
970 for lib in self.iter_modules():
971 if modName == lib.get_filename().lower():
972 return lib
973 return None
974
975
976
977
978 modDict = [ ( lib.get_name(), lib ) for lib in self.iter_modules() ]
979 modDict = dict(modDict)
980
981
982 if modName in modDict:
983 return modDict[modName]
984
985
986 filepart, extpart = PathOperations.split_extension(modName)
987 if filepart and extpart:
988 if filepart in modDict:
989 return modDict[filepart]
990
991
992 try:
993 baseAddress = HexInput.integer(modName)
994 except ValueError:
995 return None
996 if self.has_module(baseAddress):
997 return self.get_module(baseAddress)
998
999
1000 return None
1001
1003 """
1004 @type address: int
1005 @param address: Memory address to query.
1006
1007 @rtype: L{Module}
1008 @return: C{Module} object that best matches the given address.
1009 Returns C{None} if no C{Module} can be found.
1010 """
1011 bases = self.get_module_bases()
1012 bases.sort()
1013 bases.append(0x10000000000000000L)
1014 if address >= bases[0]:
1015 i = 0
1016 max_i = len(bases) - 1
1017 while i < max_i:
1018 begin, end = bases[i:i+2]
1019 if begin <= address < end:
1020 module = self.get_module(begin)
1021 here = module.is_address_here(address)
1022 if here is False:
1023 break
1024 else:
1025 return module
1026 i = i + 1
1027 return None
1028
1029
1087
1089 """
1090 Clears the modules snapshot.
1091 """
1092 for aModule in self.__moduleDict.itervalues():
1093 aModule.clear()
1094 self.__moduleDict = dict()
1095
1096
1097
1098 @staticmethod
1099 - def parse_label(module = None, function = None, offset = None):
1100 """
1101 Creates a label from a module and a function name, plus an offset.
1102
1103 @warning: This method only creates the label, it doesn't make sure the
1104 label actually points to a valid memory location.
1105
1106 @type module: None or str
1107 @param module: (Optional) Module name.
1108
1109 @type function: None, str or int
1110 @param function: (Optional) Function name or ordinal.
1111
1112 @type offset: None or int
1113 @param offset: (Optional) Offset value.
1114
1115 If C{function} is specified, offset from the function.
1116
1117 If C{function} is C{None}, offset from the module.
1118
1119 @rtype: str
1120 @return:
1121 Label representing the given function in the given module.
1122
1123 @raise ValueError:
1124 The module or function name contain invalid characters.
1125 """
1126
1127
1128
1129
1130
1131 try:
1132 function = "#0x%x" % function
1133 except TypeError:
1134 pass
1135
1136
1137 if module is not None and ('!' in module or '+' in module):
1138 raise ValueError("Invalid module name: %s" % module)
1139 if function is not None and ('!' in function or '+' in function):
1140 raise ValueError("Invalid function name: %s" % function)
1141
1142
1143 if module:
1144 if function:
1145 if offset:
1146 label = "%s!%s+0x%x" % (module, function, offset)
1147 else:
1148 label = "%s!%s" % (module, function)
1149 else:
1150 if offset:
1151
1152 label = "%s!0x%x" % (module, offset)
1153 else:
1154 label = "%s!" % module
1155 else:
1156 if function:
1157 if offset:
1158 label = "!%s+0x%x" % (function, offset)
1159 else:
1160 label = "!%s" % function
1161 else:
1162 if offset:
1163 label = "0x%x" % offset
1164 else:
1165 label = "0x0"
1166
1167 return label
1168
1169 @staticmethod
1171 """
1172 Splits a label created with L{parse_label}.
1173
1174 To parse labels with a less strict syntax, use the L{split_label_fuzzy}
1175 method instead.
1176
1177 @warning: This method only parses the label, it doesn't make sure the
1178 label actually points to a valid memory location.
1179
1180 @type label: str
1181 @param label: Label to split.
1182
1183 @rtype: tuple( str or None, str or int or None, int or None )
1184 @return: Tuple containing the C{module} name,
1185 the C{function} name or ordinal, and the C{offset} value.
1186
1187 If the label doesn't specify a module,
1188 then C{module} is C{None}.
1189
1190 If the label doesn't specify a function,
1191 then C{function} is C{None}.
1192
1193 If the label doesn't specify an offset,
1194 then C{offset} is C{0}.
1195
1196 @raise ValueError: The label is malformed.
1197 """
1198 module = function = None
1199 offset = 0
1200
1201
1202 if not label:
1203 label = "0x0"
1204 else:
1205
1206
1207 label = label.replace(' ', '')
1208 label = label.replace('\t', '')
1209 label = label.replace('\r', '')
1210 label = label.replace('\n', '')
1211
1212
1213 if not label:
1214 label = "0x0"
1215
1216
1217 if '!' in label:
1218 try:
1219 module, function = label.split('!')
1220 except ValueError:
1221 raise ValueError("Malformed label: %s" % label)
1222
1223
1224 if function:
1225 if '+' in module:
1226 raise ValueError("Malformed label: %s" % label)
1227
1228
1229 if '+' in function:
1230 try:
1231 function, offset = function.split('+')
1232 except ValueError:
1233 raise ValueError("Malformed label: %s" % label)
1234 try:
1235 offset = HexInput.integer(offset)
1236 except ValueError:
1237 raise ValueError("Malformed label: %s" % label)
1238 else:
1239
1240
1241 try:
1242 offset = HexInput.integer(function)
1243 function = None
1244 except ValueError:
1245 pass
1246 else:
1247
1248
1249 if '+' in module:
1250 try:
1251 module, offset = module.split('+')
1252 except ValueError:
1253 raise ValueError("Malformed label: %s" % label)
1254 try:
1255 offset = HexInput.integer(offset)
1256 except ValueError:
1257 raise ValueError("Malformed label: %s" % label)
1258
1259 else:
1260
1261
1262 try:
1263 offset = HexInput.integer(module)
1264 module = None
1265
1266
1267 except ValueError:
1268 pass
1269
1270 if not module:
1271 module = None
1272 if not function:
1273 function = None
1274
1275
1276 else:
1277
1278
1279 try:
1280 offset = HexInput.integer(label)
1281
1282
1283 except ValueError:
1284 if label.startswith('#'):
1285 function = label
1286 try:
1287 HexInput.integer(function[1:])
1288
1289
1290
1291 except ValueError:
1292 raise ValueError("Ambiguous label: %s" % label)
1293
1294
1295
1296 else:
1297 raise ValueError("Ambiguous label: %s" % label)
1298
1299
1300 if function and function.startswith('#'):
1301 try:
1302 function = HexInput.integer(function[1:])
1303 except ValueError:
1304 pass
1305
1306
1307 if not offset:
1308 offset = None
1309
1310 return (module, function, offset)
1311
1313 """
1314 Splits a label entered as user input.
1315
1316 It's more flexible in it's syntax parsing than the L{split_label_strict}
1317 method, as it allows the exclamation mark (B{C{!}}) to be omitted. The
1318 ambiguity is resolved by searching the modules in the snapshot to guess
1319 if a label refers to a module or a function. It also tries to rebuild
1320 labels when they contain hardcoded addresses.
1321
1322 @warning: This method only parses the label, it doesn't make sure the
1323 label actually points to a valid memory location.
1324
1325 @type label: str
1326 @param label: Label to split.
1327
1328 @rtype: tuple( str or None, str or int or None, int or None )
1329 @return: Tuple containing the C{module} name,
1330 the C{function} name or ordinal, and the C{offset} value.
1331
1332 If the label doesn't specify a module,
1333 then C{module} is C{None}.
1334
1335 If the label doesn't specify a function,
1336 then C{function} is C{None}.
1337
1338 If the label doesn't specify an offset,
1339 then C{offset} is C{0}.
1340
1341 @raise ValueError: The label is malformed.
1342 """
1343 module = function = None
1344 offset = 0
1345
1346
1347 if not label:
1348 label = "0x0"
1349 else:
1350
1351
1352 label = label.replace(' ', '')
1353 label = label.replace('\t', '')
1354 label = label.replace('\r', '')
1355 label = label.replace('\n', '')
1356
1357
1358 if not label:
1359 label = "0x0"
1360
1361
1362 if '!' in label:
1363 return self.split_label_strict(label)
1364
1365
1366
1367
1368
1369
1370
1371
1372 if '+' in label:
1373 try:
1374 prefix, offset = label.split('+')
1375 except ValueError:
1376 raise ValueError("Malformed label: %s" % label)
1377 try:
1378 offset = HexInput.integer(offset)
1379 except ValueError:
1380 raise ValueError("Malformed label: %s" % label)
1381 label = prefix
1382
1383
1384 modobj = self.get_module_by_name(label)
1385 if modobj:
1386
1387
1388
1389 module = modobj.get_name()
1390
1391 else:
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 try:
1403 address = HexInput.integer(label)
1404
1405 if offset:
1406
1407
1408
1409
1410
1411
1412 offset = address + offset
1413 else:
1414
1415 offset = address
1416
1417
1418
1419
1420
1421
1422 try:
1423 new_label = self.get_label_at_address(offset)
1424 module, function, offset = \
1425 self.split_label_strict(new_label)
1426 except ValueError:
1427 pass
1428
1429
1430
1431 except ValueError:
1432 function = label
1433
1434
1435 if function and function.startswith('#'):
1436 try:
1437 function = HexInput.integer(function[1:])
1438 except ValueError:
1439 pass
1440
1441
1442 if not offset:
1443 offset = None
1444
1445 return (module, function, offset)
1446
1447 @classmethod
1449 """
1450 Splits a label into it's C{module}, C{function} and C{offset}
1451 components, as used in L{parse_label}.
1452
1453 When called as a static method, the strict syntax mode is used::
1454
1455 winappdbg.Process.split_label( "kernel32!CreateFileA" )
1456
1457 When called as an instance method, the fuzzy syntax mode is used::
1458
1459 aProcessInstance.split_label( "CreateFileA" )
1460
1461 @see: L{split_label_strict}, L{split_label_fuzzy}
1462
1463 @type label: str
1464 @param label: Label to split.
1465
1466 @rtype: tuple( str or None, str or int or None, int or None )
1467 @return:
1468 Tuple containing the C{module} name,
1469 the C{function} name or ordinal, and the C{offset} value.
1470
1471 If the label doesn't specify a module,
1472 then C{module} is C{None}.
1473
1474 If the label doesn't specify a function,
1475 then C{function} is C{None}.
1476
1477 If the label doesn't specify an offset,
1478 then C{offset} is C{0}.
1479
1480 @raise ValueError: The label is malformed.
1481 """
1482
1483
1484
1485
1486
1487
1488
1489 return cls.split_label_strict(label)
1490
1491
1495
1496
1498 """
1499 Converts a label taken from user input into a well-formed label.
1500
1501 @type label: str
1502 @param label: Label taken from user input.
1503
1504 @rtype: str
1505 @return: Sanitized label.
1506 """
1507 (module, function, offset) = self.split_label_fuzzy(label)
1508 label = self.parse_label(module, function, offset)
1509 return label
1510
1512 """
1513 Resolve the memory address of the given label.
1514
1515 @note:
1516 If multiple modules with the same name are loaded,
1517 the label may be resolved at any of them. For a more precise
1518 way to resolve functions use the base address to get the L{Module}
1519 object (see L{Process.get_module}) and then call L{Module.resolve}.
1520
1521 If no module name is specified in the label, the function may be
1522 resolved in any loaded module. If you want to resolve all functions
1523 with that name in all processes, call L{Process.iter_modules} to
1524 iterate through all loaded modules, and then try to resolve the
1525 function in each one of them using L{Module.resolve}.
1526
1527 @type label: str
1528 @param label: Label to resolve.
1529
1530 @rtype: int
1531 @return: Memory address pointed to by the label.
1532
1533 @raise ValueError: The label is malformed or impossible to resolve.
1534 @raise RuntimeError: Cannot resolve the module or function.
1535 """
1536
1537
1538 module, function, offset = self.split_label_fuzzy(label)
1539
1540
1541 address = self.resolve_label_components(module, function, offset)
1542
1543
1544 return address
1545
1549 """
1550 Resolve the memory address of the given module, function and/or offset.
1551
1552 @note:
1553 If multiple modules with the same name are loaded,
1554 the label may be resolved at any of them. For a more precise
1555 way to resolve functions use the base address to get the L{Module}
1556 object (see L{Process.get_module}) and then call L{Module.resolve}.
1557
1558 If no module name is specified in the label, the function may be
1559 resolved in any loaded module. If you want to resolve all functions
1560 with that name in all processes, call L{Process.iter_modules} to
1561 iterate through all loaded modules, and then try to resolve the
1562 function in each one of them using L{Module.resolve}.
1563
1564 @type module: None or str
1565 @param module: (Optional) Module name.
1566
1567 @type function: None, str or int
1568 @param function: (Optional) Function name or ordinal.
1569
1570 @type offset: None or int
1571 @param offset: (Optional) Offset value.
1572
1573 If C{function} is specified, offset from the function.
1574
1575 If C{function} is C{None}, offset from the module.
1576
1577 @rtype: int
1578 @return: Memory address pointed to by the label.
1579
1580 @raise ValueError: The label is malformed or impossible to resolve.
1581 @raise RuntimeError: Cannot resolve the module or function.
1582 """
1583
1584
1585 address = 0
1586
1587
1588
1589 if module:
1590 modobj = self.get_module_by_name(module)
1591 if not modobj:
1592 if module == "main":
1593 modobj = self.get_main_module()
1594 else:
1595 raise RuntimeError("Module %r not found" % module)
1596
1597
1598
1599 if function:
1600 address = modobj.resolve(function)
1601 if address is None:
1602 address = modobj.resolve_symbol(function)
1603 if address is None:
1604 if function == "start":
1605 address = modobj.get_entry_point()
1606 if address is None:
1607 msg = "Symbol %r not found in module %s"
1608 raise RuntimeError(msg % (function, module))
1609
1610
1611 else:
1612 address = modobj.get_base()
1613
1614
1615
1616 elif function:
1617 for modobj in self.iter_modules():
1618 address = modobj.resolve(function)
1619 if address is not None:
1620 break
1621 if address is None:
1622 if function == "start":
1623 modobj = self.get_main_module()
1624 address = modobj.get_entry_point()
1625 elif function == "main":
1626 modobj = self.get_main_module()
1627 address = modobj.get_base()
1628 else:
1629 msg = "Function %r not found in any module" % function
1630 raise RuntimeError(msg)
1631
1632
1633 if offset:
1634 address = address + offset
1635 return address
1636
1638 """
1639 Creates a label from the given memory address.
1640
1641 @warning: This method uses the name of the nearest currently loaded
1642 module. If that module is unloaded later, the label becomes
1643 impossible to resolve.
1644
1645 @type address: int
1646 @param address: Memory address.
1647
1648 @type offset: None or int
1649 @param offset: (Optional) Offset value.
1650
1651 @rtype: str
1652 @return: Label pointing to the given address.
1653 """
1654 if offset:
1655 address = address + offset
1656 modobj = self.get_module_at_address(address)
1657 if modobj:
1658 label = modobj.get_label_at_address(address)
1659 else:
1660 label = self.parse_label(None, None, address)
1661 return label
1662
1663
1664
1665
1666
1667
1668
1670 try:
1671 return self.__system_breakpoints[label]
1672 except KeyError:
1673 try:
1674 address = self.resolve_label(label)
1675 except Exception:
1676 return None
1677 self.__system_breakpoints[label] = address
1678 return address
1679
1680
1681
1683 """
1684 @rtype: int
1685 @return:
1686 If present, returns the address of the C{g_dwLastErrorToBreakOn}
1687 global variable for this process. If not, returns C{None}.
1688 """
1689 address = self.__get_system_breakpoint("ntdll!g_dwLastErrorToBreakOn")
1690 if not address:
1691 address = self.__get_system_breakpoint(
1692 "kernel32!g_dwLastErrorToBreakOn")
1693
1694 self.__system_breakpoints["ntdll!g_dwLastErrorToBreakOn"] = address
1695 return address
1696
1698 """
1699 @type address: int
1700 @param address: Memory address.
1701
1702 @rtype: bool
1703 @return: C{True} if the given address points to a system defined
1704 breakpoint. System defined breakpoints are hardcoded into
1705 system libraries.
1706 """
1707 if address:
1708 module = self.get_module_at_address(address)
1709 if module:
1710 return module.match_name("ntdll") or \
1711 module.match_name("kernel32")
1712 return False
1713
1714
1715
1717 """
1718 @rtype: int or None
1719 @return: Memory address of the system breakpoint
1720 within the process address space.
1721 Returns C{None} on error.
1722 """
1723 return self.__get_system_breakpoint("ntdll!DbgBreakPoint")
1724
1725
1727 """
1728 @rtype: int or None
1729 @return: Memory address of the user breakpoint
1730 within the process address space.
1731 Returns C{None} on error.
1732 """
1733 return self.__get_system_breakpoint("ntdll!DbgUserBreakPoint")
1734
1735
1736
1738 """
1739 @rtype: int or None
1740 @return: Memory address of the remote breakin breakpoint
1741 within the process address space.
1742 Returns C{None} on error.
1743 """
1744 return self.__get_system_breakpoint("ntdll!DbgUiRemoteBreakin")
1745
1746
1748 """
1749 @rtype: int or None
1750 @return: Memory address of the Wow64 system breakpoint
1751 within the process address space.
1752 Returns C{None} on error.
1753 """
1754 return self.__get_system_breakpoint("ntdll32!DbgBreakPoint")
1755
1756
1758 """
1759 @rtype: int or None
1760 @return: Memory address of the Wow64 user breakpoint
1761 within the process address space.
1762 Returns C{None} on error.
1763 """
1764 return self.__get_system_breakpoint("ntdll32!DbgUserBreakPoint")
1765
1766
1768 """
1769 @rtype: int or None
1770 @return: Memory address of the Wow64 remote breakin breakpoint
1771 within the process address space.
1772 Returns C{None} on error.
1773 """
1774 return self.__get_system_breakpoint("ntdll32!DbgUiRemoteBreakin")
1775
1776
1777
1779 """
1780 Loads the debugging symbols for all modules in this snapshot.
1781 Automatically called by L{get_symbols}.
1782 """
1783 for aModule in self.iter_modules():
1784 aModule.load_symbols()
1785
1787 """
1788 Unloads the debugging symbols for all modules in this snapshot.
1789 """
1790 for aModule in self.iter_modules():
1791 aModule.unload_symbols()
1792
1794 """
1795 Returns the debugging symbols for all modules in this snapshot.
1796 The symbols are automatically loaded when needed.
1797
1798 @rtype: list of tuple( str, int, int )
1799 @return: List of symbols.
1800 Each symbol is represented by a tuple that contains:
1801 - Symbol name
1802 - Symbol memory address
1803 - Symbol size in bytes
1804 """
1805 symbols = list()
1806 for aModule in self.iter_modules():
1807 for symbol in aModule.iter_symbols():
1808 symbols.append(symbol)
1809 return symbols
1810
1812 """
1813 Returns an iterator for the debugging symbols in all modules in this
1814 snapshot, in no particular order.
1815 The symbols are automatically loaded when needed.
1816
1817 @rtype: iterator of tuple( str, int, int )
1818 @return: Iterator of symbols.
1819 Each symbol is represented by a tuple that contains:
1820 - Symbol name
1821 - Symbol memory address
1822 - Symbol size in bytes
1823 """
1824 for aModule in self.iter_modules():
1825 for symbol in aModule.iter_symbols():
1826 yield symbol
1827
1829 """
1830 Resolves a debugging symbol's address.
1831
1832 @type symbol: str
1833 @param symbol: Name of the symbol to resolve.
1834
1835 @type bCaseSensitive: bool
1836 @param bCaseSensitive: C{True} for case sensitive matches,
1837 C{False} for case insensitive.
1838
1839 @rtype: int or None
1840 @return: Memory address of symbol. C{None} if not found.
1841 """
1842 if bCaseSensitive:
1843 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
1844 if symbol == SymbolName:
1845 return SymbolAddress
1846 else:
1847 symbol = symbol.lower()
1848 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
1849 if symbol == SymbolName.lower():
1850 return SymbolAddress
1851
1853 """
1854 Tries to find the closest matching symbol for the given address.
1855
1856 @type address: int
1857 @param address: Memory address to query.
1858
1859 @rtype: None or tuple( str, int, int )
1860 @return: Returns a tuple consisting of:
1861 - Name
1862 - Address
1863 - Size (in bytes)
1864 Returns C{None} if no symbol could be matched.
1865 """
1866
1867
1868 found = None
1869 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols():
1870 if SymbolAddress <= address:
1871 if SymbolAddress + SymbolSize > address:
1872 if not found or found[1] < SymbolAddress:
1873 found = (SymbolName, SymbolAddress, SymbolSize)
1874 return found
1875
1876
1877
1878
1879
1881 """
1882 Private method to add a module object to the snapshot.
1883
1884 @type aModule: L{Module}
1885 @param aModule: Module object.
1886 """
1887
1888
1889
1890
1891
1892
1893
1894 lpBaseOfDll = aModule.get_base()
1895
1896
1897
1898 aModule.set_process(self)
1899 self.__moduleDict[lpBaseOfDll] = aModule
1900
1902 """
1903 Private method to remove a module object from the snapshot.
1904
1905 @type lpBaseOfDll: int
1906 @param lpBaseOfDll: Module base address.
1907 """
1908 try:
1909 aModule = self.__moduleDict[lpBaseOfDll]
1910 del self.__moduleDict[lpBaseOfDll]
1911 except KeyError:
1912 aModule = None
1913 msg = "Unknown base address %d" % HexDump.address(lpBaseOfDll)
1914 warnings.warn(msg, RuntimeWarning)
1915 if aModule:
1916 aModule.clear()
1917
1954
1956 """
1957 Notify the load of the main module.
1958
1959 This is done automatically by the L{Debug} class, you shouldn't need
1960 to call it yourself.
1961
1962 @type event: L{CreateProcessEvent}
1963 @param event: Create process event.
1964
1965 @rtype: bool
1966 @return: C{True} to call the user-defined handle, C{False} otherwise.
1967 """
1968 self.__add_loaded_module(event)
1969 return True
1970
1972 """
1973 Notify the load of a new module.
1974
1975 This is done automatically by the L{Debug} class, you shouldn't need
1976 to call it yourself.
1977
1978 @type event: L{LoadDLLEvent}
1979 @param event: Load DLL event.
1980
1981 @rtype: bool
1982 @return: C{True} to call the user-defined handle, C{False} otherwise.
1983 """
1984 self.__add_loaded_module(event)
1985 return True
1986
1988 """
1989 Notify the release of a loaded module.
1990
1991 This is done automatically by the L{Debug} class, you shouldn't need
1992 to call it yourself.
1993
1994 @type event: L{UnloadDLLEvent}
1995 @param event: Unload DLL event.
1996
1997 @rtype: bool
1998 @return: C{True} to call the user-defined handle, C{False} otherwise.
1999 """
2000 lpBaseOfDll = event.get_module_base()
2001
2002 if lpBaseOfDll in self.__moduleDict:
2003 self._del_module(lpBaseOfDll)
2004 return True
2005