diff --git a/.gitignore b/.gitignore index a56f419f..03e5ba34 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ cabal.project stack.yaml dist-newstyle +dist +*.DS_STORE diff --git a/refurbish/tests/binaries/test-switch-jump-table.c b/refurbish/tests/binaries/test-switch-jump-table.c new file mode 100644 index 00000000..c1f1fcd1 --- /dev/null +++ b/refurbish/tests/binaries/test-switch-jump-table.c @@ -0,0 +1,46 @@ +#include "util.h" + +int gin = 1; +int g0 = 0; +int g1; +int g2; +int g3; +int g4; + +int res; + +long mod_5(long n, long i0, long i1, long i2, long i3, long i4){ +beginning: + switch(n) + { + case 0: res = res + i0; break; + case 1: res = res + i1; break; + case 2: res = res + i2; break; + case 3: res = res + i3; break; + case 4: res = res + i4; break; + default: n = n - 5; goto beginning; + } +} + + +void entry() { + long in = (long)&gin; + long i0 = (long)&g0; + long i1 = (long)&g1; + long i2 = (long)&g2; + long i3 = (long)&g3; + long i4 = (long)&g4; + res = mod_5(gin, i0, i1, i2, i3, i4); +} + +#if defined(NOSTDLIB) +void _start() { + entry(); + EXIT(0); +} +#else +int main() { + entry(); + return 0; +} +#endif diff --git a/refurbish/tests/binaries/test-switch-jump-table.clang.glibc.x86_64.exe b/refurbish/tests/binaries/test-switch-jump-table.clang.glibc.x86_64.exe new file mode 100755 index 00000000..40d53ea5 Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.clang.glibc.x86_64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.clang.nostdlib.x86_64.exe b/refurbish/tests/binaries/test-switch-jump-table.clang.nostdlib.x86_64.exe new file mode 100755 index 00000000..19d645cd Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.clang.nostdlib.x86_64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.glibc.x86_64.exe b/refurbish/tests/binaries/test-switch-jump-table.glibc.x86_64.exe new file mode 100755 index 00000000..7b16d19c Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.glibc.x86_64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.musl.x86_64.exe b/refurbish/tests/binaries/test-switch-jump-table.musl.x86_64.exe new file mode 100755 index 00000000..2fde04b2 Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.musl.x86_64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.nostdlib.ppc64.exe b/refurbish/tests/binaries/test-switch-jump-table.nostdlib.ppc64.exe new file mode 100755 index 00000000..8ded8e0c Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.nostdlib.ppc64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.nostdlib.x86_64.exe b/refurbish/tests/binaries/test-switch-jump-table.nostdlib.x86_64.exe new file mode 100755 index 00000000..aa7bd9c6 Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.nostdlib.x86_64.exe differ diff --git a/refurbish/tests/binaries/test-switch-jump-table.stdlib.ppc64.exe b/refurbish/tests/binaries/test-switch-jump-table.stdlib.ppc64.exe new file mode 100755 index 00000000..cf19b512 Binary files /dev/null and b/refurbish/tests/binaries/test-switch-jump-table.stdlib.ppc64.exe differ diff --git a/renovate-ppc/src/Renovate/Arch/PPC/ISA.hs b/renovate-ppc/src/Renovate/Arch/PPC/ISA.hs index 67fee7cf..e80012ee 100644 --- a/renovate-ppc/src/Renovate/Arch/PPC/ISA.hs +++ b/renovate-ppc/src/Renovate/Arch/PPC/ISA.hs @@ -93,6 +93,7 @@ isa = , R.isaMakeSymbolicJump = ppcMakeSymbolicJump , R.isaConcretizeAddresses = ppcConcretizeAddresses , R.isaSymbolizeAddresses = ppcSymbolizeAddresses + , R.isaSymbolizeLookupJump = ppcSymbolizeLookupJump } ppcPrettyInstruction :: Instruction a -> String @@ -151,6 +152,13 @@ ppcMakeSymbolicJump symAddr = [R.tagInstruction (Just symAddr) i] jmp = D.Instruction D.B (D.Directbrtarget (D.BT 0) D.:< D.Nil) i = annotateInstr (fromInst jmp) NoAddress + +ppcSymbolizeLookupJump :: + R.SymbolicLookupTableInfo arch + -> Maybe [R.TaggedInstruction arch (TargetAddress arch)] +ppcSymbolizeLookupJump _ = Nothing -- not implemented yet + + -- | This function converts symbolic address references in operands back to -- concrete values. As with 'ppcSymbolizeAddresses', it is a no-op on PowerPC. ppcConcretizeAddresses :: (MM.MemWidth (MM.ArchAddrWidth arch)) diff --git a/renovate-x86/renovate-x86.cabal b/renovate-x86/renovate-x86.cabal index cf5111d4..62bbecff 100644 --- a/renovate-x86/renovate-x86.cabal +++ b/renovate-x86/renovate-x86.cabal @@ -23,6 +23,7 @@ library -- other-extensions: build-depends: base >=4.10 && <5, bytestring, + vector, flexdis86 >=0.1.4, macaw-x86, macaw-x86-symbolic, diff --git a/renovate-x86/src/Renovate/Arch/X86_64/ISA.hs b/renovate-x86/src/Renovate/Arch/X86_64/ISA.hs index 55012dab..00ddc4ab 100644 --- a/renovate-x86/src/Renovate/Arch/X86_64/ISA.hs +++ b/renovate-x86/src/Renovate/Arch/X86_64/ISA.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ScopedTypeVariables #-} -- | The 'ISA' for x86_64 module Renovate.Arch.X86_64.ISA ( isa, @@ -13,6 +14,7 @@ import qualified GHC.Err.Located as L import qualified Control.Monad.Catch as C import Data.Bits ( bit ) +import qualified Data.Vector as Vec import qualified Data.ByteString as B import qualified Data.ByteString.Lazy.Builder as B import qualified Data.ByteString.Lazy as LB @@ -23,10 +25,15 @@ import qualified Data.List.NonEmpty as DLN import Data.Maybe import Data.Parameterized.Some import Data.Parameterized.NatRepr +import Data.Parameterized.Classes ( testEquality ) +import Data.Parameterized.Pair ( Pair(..) ) +import qualified Data.Parameterized.Map as MapF + import qualified Data.Text.Prettyprint.Doc as PD import Data.Word ( Word8, Word64 ) import qualified Data.Macaw.Memory as MM +import qualified Data.Macaw.CFG as MC import qualified Data.Macaw.Types as MT import qualified Data.Macaw.X86 as X86 import qualified Flexdis86 as D @@ -78,7 +85,8 @@ isa = R.ISA , R.isaStoreImmediate = x86StoreImmediate , R.isaAddImmediate = x86AddImmediate , R.isaSubtractImmediate = x86SubtractImmediate - } + , R.isaSymbolizeLookupJump = x64SymbolizeLookupJump + } x86StackAddress :: R.StackAddress X86.X86_64 -> (Some MT.TypeRepr) -> Value x86StackAddress addr (Some tp) = do @@ -432,6 +440,42 @@ x64SymbolizeAddresses mem _lookup insnAddr mSymbolicTarget xi@(XI ii) jmpInstr = XI (jmpInstr0 { D.iiArgs = fmap (saveAbsoluteRipAddresses mem insnAddr xi) (D.iiArgs jmpInstr0) }) + +x64SymbolizeLookupJump :: + R.SymbolicLookupTableInfo X86.X86_64 + -> Maybe [R.TaggedInstruction X86.X86_64 TargetAddress] +x64SymbolizeLookupJump R.SymbolicLookupTableInfo + { R.symbolicLookupRegs = regState + , R.symbolicLookupIdx = rawIdx + , R.symbolicLookupAddrs = tgts + } = + case mapMaybe computeJumpInsns regs of + [insns] -> Just insns + _ -> Nothing + where + regs = MapF.toList $ MC.regStateMap regState + n = Vec.length tgts + computeJumpInsns (Pair idxReg v) + | Just Refl <- testEquality v rawIdx + , X86.X86_GP idxGpReg <- idxReg = + Just $ concat $ + [ [ cmpIdx -- cmp i idx + , jmpTgt -- je target[i] + ] + | i <- [0..n-2] + , let iTgt = tgts Vec.! i + cmpIdx = + R.tagInstruction Nothing + $ noAddr + $ makeInstr + "cmp" [ D.QWordReg idxGpReg + , D.DWordSignedImm (fromIntegral i) + ] + jmpTgt = x64MakeSymbolicJumpOrCall "je" iTgt + ] -- otherwise just jump to the final target + ++ [[x64MakeSymbolicJumpOrCall "jmp" (tgts Vec.! (n-1))]] + | otherwise = Nothing + saveAbsoluteRipAddresses :: MM.Memory 64 -> R.ConcreteAddress X86.X86_64 -> Instruction () -> AnnotatedOperand () -> AnnotatedOperand TargetAddress saveAbsoluteRipAddresses mem insnAddr i AnnotatedOperand { aoOperand = (v, ty) } = AnnotatedOperand { aoOperand = (I.runIdentity (mapAddrRef promoteRipDisp8 I.Identity v), ty) diff --git a/renovate/src/Renovate.hs b/renovate/src/Renovate.hs index 6eac502d..ac4d3862 100644 --- a/renovate/src/Renovate.hs +++ b/renovate/src/Renovate.hs @@ -90,6 +90,7 @@ module Renovate A.concreteFromAbsolute, A.addressAddOffset, A.addressDiff, + A.SymbolicLookupTableInfo(..), -- * Analysis -- ** Function Recovery FR.recoverFunctions, diff --git a/renovate/src/Renovate/Address.hs b/renovate/src/Renovate/Address.hs index 47609c1c..1d4ab27a 100644 --- a/renovate/src/Renovate/Address.hs +++ b/renovate/src/Renovate/Address.hs @@ -2,6 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ExistentialQuantification #-} -- | This module defines opaque concrete and symbolic address types. module Renovate.Address ( SymbolicAddress(..), @@ -11,13 +12,15 @@ module Renovate.Address ( concreteAsSegmentOff, absoluteAddress, addressAddOffset, - addressDiff + addressDiff, + SymbolicLookupTableInfo(..) ) where import qualified GHC.Err.Located as L import qualified Data.Text.Prettyprint.Doc as PD import Data.Word ( Word64 ) +import Data.Vector ( Vector ) import qualified Numeric as N import qualified Data.Macaw.Memory as MM @@ -126,3 +129,14 @@ addressDiff :: (MM.MemWidth (MM.ArchAddrWidth arch)) => ConcreteAddress arch -> addressDiff (ConcreteAddress memAddr1) (ConcreteAddress memAddr2) = diff where Just diff = MM.diffAddr memAddr1 memAddr2 + + +-- | Stores the jump-table lookup info found in Macaw's +-- @ParsedLookupTable@ so we can unpack in each backend and +-- rewrite the jumps. +data SymbolicLookupTableInfo arch = + forall ids . SymbolicLookupTableInfo + { symbolicLookupRegs :: MM.RegState (MM.ArchReg arch) (MM.Value arch ids) + , symbolicLookupIdx :: MM.ArchAddrValue arch ids + , symbolicLookupAddrs :: Vector (SymbolicAddress arch) + } diff --git a/renovate/src/Renovate/BasicBlock/Types.hs b/renovate/src/Renovate/BasicBlock/Types.hs index b8a0124e..b91139d3 100644 --- a/renovate/src/Renovate/BasicBlock/Types.hs +++ b/renovate/src/Renovate/BasicBlock/Types.hs @@ -77,6 +77,7 @@ import qualified SemMC.Architecture as SA import Renovate.Address + -- | The type of instructions for an architecture -- -- Instructions are parameterized by an annotation type that is usually either diff --git a/renovate/src/Renovate/ISA.hs b/renovate/src/Renovate/ISA.hs index b3fc272e..a10fc28d 100644 --- a/renovate/src/Renovate/ISA.hs +++ b/renovate/src/Renovate/ISA.hs @@ -22,7 +22,12 @@ import qualified Data.Macaw.CFG as MM import qualified Data.Macaw.Types as MT import Renovate.Address -import Renovate.BasicBlock.Types ( ConcreteFallthrough, Instruction, InstructionAnnotation, RegisterType, TaggedInstruction ) +import Renovate.BasicBlock.Types ( ConcreteFallthrough + , Instruction + , InstructionAnnotation + , RegisterType + , TaggedInstruction + ) -- | The variety of a jump: either conditional or unconditional. This -- is used as a tag for 'JumpType's. One day, we could model the type @@ -98,6 +103,18 @@ data ISA arch = ISA -- has the worst case size behavior. , isaConcretizeAddresses :: MM.Memory (MM.ArchAddrWidth arch) -> ConcreteAddress arch -> Instruction arch (InstructionAnnotation arch) -> Instruction arch () -- ^ Remove the annotation, with possible post-processing. + + + , isaSymbolizeLookupJump :: + SymbolicLookupTableInfo arch + -> Maybe [TaggedInstruction arch (InstructionAnnotation arch)] + -- ^ Attempts to return a series of instructions which + -- mimic a jump-table-lookup indirect jump (described by + -- the given @SymbolicLookupTableInfo@) via a series of + -- comparisons and direct jumps. This allows the related + -- blocks to be safely relocated and the corresponding + -- jumps can be updated to reflect the new location(s). + , isaJumpType :: forall t . Instruction arch t -> MM.Memory (MM.ArchAddrWidth arch) -> ConcreteAddress arch -> JumpType arch -- ^ Test if an instruction is a jump; if it is, return some -- metadata about the jump (destination or offset). diff --git a/renovate/src/Renovate/Redirect.hs b/renovate/src/Renovate/Redirect.hs index a5b68642..ac3098eb 100644 --- a/renovate/src/Renovate/Redirect.hs +++ b/renovate/src/Renovate/Redirect.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} -- | This module is the entry point for binary code redirection module Renovate.Redirect ( redirect, @@ -31,10 +32,13 @@ import qualified Data.List as L import qualified Data.List.NonEmpty as DLN import Data.Ord ( comparing ) import qualified Data.Traversable as T +import Data.Parameterized.Some import Prelude import qualified Data.Macaw.CFG as MM +import qualified Data.Macaw.CFG as MC +import qualified Data.Macaw.Discovery.State as MDS import Renovate.Address import Renovate.BasicBlock @@ -67,7 +71,11 @@ import Renovate.Rewrite ( HasInjectedFunctions, getInjectedFunctions ) -- The function runs in an arbitrary 'Monad' to allow instrumentors to -- carry around their own state. -- -redirect :: (MonadIO m, InstructionConstraints arch, HasInjectedFunctions m arch) +redirect :: (MonadIO m + , InstructionConstraints arch + , HasInjectedFunctions m arch + , MC.ArchConstraints arch + ) => ISA arch -- ^ Information about the ISA in use -> BlockInfo arch @@ -102,7 +110,7 @@ redirect isa blockInfo (textStart, textEnd) instrumentor mem strat layoutAddr ba -- Also, see Note [PIC Jump Tables] case and [ textStart <= concreteBlockAddress cb , concreteBlockAddress cb < textEnd - , isRelocatableTerminatorType (terminatorType isa mem cb) + , hasRelocatableTerminatorType isa mem cb , not (isIncompleteBlockAddress blockInfo (concreteBlockAddress cb)) , disjoint isa (biOverlap blockInfo) cb ] of @@ -116,7 +124,7 @@ redirect isa blockInfo (textStart, textEnd) instrumentor mem strat layoutAddr ba Nothing -> return $! WithProvenance cb sb Unmodified False -> do - when (not (isRelocatableTerminatorType (terminatorType isa mem cb))) $ do + when (not (hasRelocatableTerminatorType isa mem cb)) $ do RM.recordUnrelocatableTermBlock when (isIncompleteBlockAddress blockInfo (concreteBlockAddress cb)) $ do RM.recordIncompleteBlock @@ -152,11 +160,19 @@ toBlockMapping wps = , let concBlock = withoutProvenance wp ] -isRelocatableTerminatorType :: JumpType arch -> Bool -isRelocatableTerminatorType jt = - case jt of - IndirectJump {} -> False - _ -> True +hasRelocatableTerminatorType :: + (MC.ArchConstraints arch) + => ISA arch + -> MC.Memory (MC.ArchAddrWidth arch) + -> ConcreteBlock arch + -> Bool +hasRelocatableTerminatorType isa mem cb + | isRelocatableTerminatorType (terminatorType isa mem cb) = True + | Some parsedBlock <- concreteDiscoveryBlock cb + , MDS.ParsedLookupTable{} <- MDS.pblockTermStmt parsedBlock = True + | otherwise = False + where isRelocatableTerminatorType IndirectJump{} = False + isRelocatableTerminatorType _ = True {- Note [Redirection] diff --git a/renovate/src/Renovate/Redirect/Symbolize.hs b/renovate/src/Renovate/Redirect/Symbolize.hs index 2349dea8..51f63e86 100644 --- a/renovate/src/Renovate/Redirect/Symbolize.hs +++ b/renovate/src/Renovate/Redirect/Symbolize.hs @@ -1,6 +1,9 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TypeApplications #-} + -- | Lift concrete blocks into relocatable symbolic blocks module Renovate.Redirect.Symbolize ( SymbolicAddressAllocator, @@ -12,10 +15,14 @@ module Renovate.Redirect.Symbolize ( import qualified Data.Foldable as F import qualified Data.List.NonEmpty as DLN import qualified Data.Macaw.CFG as MM +import qualified Data.Macaw.Discovery as MC import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe, mapMaybe, listToMaybe ) import qualified Data.Traversable as T import Data.Word ( Word64 ) +import qualified Data.Vector as Vec +import Data.Parameterized.Some + import Prelude @@ -85,6 +92,7 @@ symbolizeJumps isa mem symAddrMap (cb, symAddr) = where lookupSymAddr ca = M.lookup ca symAddrMap insns = fmap symbolize (instructionAddresses isa cb) + pblock = concreteDiscoveryBlock cb symbolize (i, addr) = case isaJumpType isa i mem addr of @@ -95,9 +103,20 @@ symbolizeJumps isa mem symAddrMap (cb, symAddr) = let symTarget = lookupSymbolicAddress (addr `addressAddOffset` offset) in isaSymbolizeAddresses isa mem lookupSymAddr addr (Just symTarget) i IndirectJump _ -> - -- We do not know the destination of indirect jumps, so we - -- can't tag them (or rewrite them later) - isaSymbolizeAddresses isa mem lookupSymAddr addr Nothing i + case getParsedJumpTableTarget symAddrMap addr pblock of + -- If the indirect jump is known to be a lookup + -- into a jump table, rewrite according ot that. + Just tableInfo -> + maybe + (error $ + "isaSymbolizeLookupJump failed to " + ++ "symbolize lookup jump at "++show addr) + id + (isaSymbolizeLookupJump isa tableInfo) + -- Otherwise we do not know the destination of + -- indirect jumps, so we can't tag them (or + -- rewrite them later) + Nothing -> isaSymbolizeAddresses isa mem lookupSymAddr addr Nothing i DirectCall _ offset -> let symTarget = lookupSymbolicAddress (addr `addressAddOffset` offset) in isaSymbolizeAddresses isa mem lookupSymAddr addr (Just symTarget) i @@ -107,6 +126,87 @@ symbolizeJumps isa mem symAddrMap (cb, symAddr) = lookupSymbolicAddress target = fromMaybe (StableAddress target) (M.lookup target symAddrMap) + +-- | Given a ConcreteAddress and a ParsedBlock from Macaw, +-- check if the given address is both the final instruction +-- in the block and corresponds to a ParsedLookupTable that +-- Macaw discovered, and if so return the reachable +-- addresses. +getParsedJumpTableTarget :: forall arch . + (InstructionConstraints arch, MM.MemWidth (MM.ArchAddrWidth arch)) + => M.Map (ConcreteAddress arch) (SymbolicAddress arch) + -> ConcreteAddress arch + -> Some (MC.ParsedBlock arch) + -> Maybe (SymbolicLookupTableInfo arch) +getParsedJumpTableTarget symAddrMap caddr (Some b) = + case (maybeLastStmtOffset, MC.pblockTermStmt b) of + (Nothing, _) -> + error $ + "Renovate could not locate the last instruction" + ++ " in the Macaw ParsedBlock associated with address " + ++show caddr + ( Just offset + , (MC.ParsedLookupTable + regState + idxAddr + tgtAddrs)) + | Just caddr == concretizeMacawStmtAddr addr0 offset -> + let tgtCAddrs = Vec.fromList + $ mapMaybe concretizeMacawAddr + $ Vec.toList tgtAddrs + in if Vec.length tgtCAddrs == Vec.length tgtAddrs + then Just $ SymbolicLookupTableInfo + { symbolicLookupRegs = regState + , symbolicLookupIdx = idxAddr + , symbolicLookupAddrs = fmap lookupSymAddr tgtCAddrs + } + else error $ + "Renovate could not calculate concrete addresses for all of the " + ++show (Vec.length tgtAddrs) + ++" jump table targets associated with " + ++show caddr + ++"\n Macaw Segment Offsets: " + ++show (Vec.toList tgtAddrs) + ++"\n Converted concrete addresses: " + ++show (map (concretizeMacawAddr @arch) $ Vec.toList tgtAddrs) + + (_,_) -> Nothing + where -- | Block starting address + addr0 = MC.pblockAddr b + -- | The last statement offset (if one was found) + maybeLastStmtOffset = listToMaybe + $ mapMaybe (\case + MM.InstructionStart a _ -> Just a + _ -> Nothing) + $ reverse $ MC.pblockStmts b + -- | Convert a ConcreteAddress to a SymbolicAddress (if possible) + lookupSymAddr tgt = fromMaybe (StableAddress tgt) (M.lookup tgt symAddrMap) + -- | Register containing index for + +concretizeMacawAddr :: forall arch . + (MM.MemWidth (MM.ArchAddrWidth arch)) + => MM.MemSegmentOff (MM.ArchAddrWidth arch) + -> Maybe (ConcreteAddress arch) +concretizeMacawAddr x = concreteFromAbsolute <$> mw + where mw :: Maybe (MM.MemWord (MM.ArchAddrWidth arch)) + mw = MM.segoffAsAbsoluteAddr x + + +concretizeMacawStmtAddr :: forall arch . + (MM.MemWidth (MM.ArchAddrWidth arch)) + => MM.MemSegmentOff (MM.ArchAddrWidth arch) + -- ^ The containing block's start address. + -> MM.MemWord (MM.ArchAddrWidth arch) + -- ^ The statements offset from the block start. + -> Maybe (ConcreteAddress arch) +concretizeMacawStmtAddr y0 yOffset = concreteFromAbsolute <$> MM.asAbsoluteAddr y + where y :: MM.MemAddr (MM.ArchAddrWidth arch) + y = MM.segmentOffAddr segment newOffset + segment :: MM.MemSegment (MM.ArchAddrWidth arch) + segment = MM.segoffSegment y0 + newOffset :: MM.MemWord (MM.ArchAddrWidth arch) + newOffset = (MM.segoffOffset y0) + yOffset + {- Note [Jump Promotion] While we are making addresses symbolic, we also have to promote short jump