123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- #!/usr/local/bin/python3
- """
- Copyright (C) All Rights Reserved
- Written by Wazakindjes
- Website: https://gitgud.malvager.net/Wazakindjes/lg-lbf-extractor
- License: https://gitgud.malvager.net/Wazakindjes/lg-lbf-extractor/raw/master/LICENSE
- """
- import mmap
- import os
- import sys
- DEBUG = False # If true, output additional messages
- DEBUG_MAX = 10 # Stop after up to DEBUG_MAX files lol (0 to disable)
- DEBUG_SKIP = 10 # Skip the first N files (0 to disable)
- # Better not touch these my mane
- MUHVERSION = 'v1.10 b20180805'
- MAXZIP = 54
- ZIPHEAD = b'PK\x03\x04\x14' # \x50\x4B\x03\x04\x14
- ZIPTAIL = b'PK\x05\x06\x00' # \x50\x4B\x05\x06\x00
- MUHEXT = '.tar'
- USTARHEAD = b'\x75\x73\x74\x61\x72\x20\x20\x00'
- USTARTAIL = b'' # USTAR should always end with 1024 null bytes =]
- for i in range(0, 1024):
- USTARTAIL += b'\x00'
- def gibsize(byets):
- suffix = 'B'
- sizelol = byets
- while sizelol >= 1024:
- if suffix == 'B':
- suffix = 'kiB'
- elif suffix == 'kiB':
- suffix = 'MiB'
- elif suffix == 'MiB':
- suffix = 'GiB'
- sizelol /= 1024
- return (sizelol, suffix)
- if len(sys.argv) < 3 or sys.argv[1].lower() in ['h', 'halp', 'help', '-h', '--halp', '--help']:
- print(f"Usage: {sys.argv[0]} <LGBackup*.lbf file path> <extraction directory>")
- print(f"\tThis tool will extract concatenated ZIP/USTAR files from the specified .lbf file into <extraction dir>.")
- print(f"\tEvery ZIP/USTAR will be transformed into a zero-padded numbered {MUHEXT} file cuz I feel tar > unzip for broken archives. ;]")
- print(f"\tVersion: {MUHVERSION}")
- sys.exit(0)
- lbfpath = sys.argv[1]
- tdir = sys.argv[2]
- print(f"** Gonna extract '{lbfpath}' into '{tdir}'")
- if not os.path.exists(tdir): # Make sure the directories exist yo
- os.makedirs(tdir)
- maxcount = { 'ZIP': 0, 'USTAR': 0 } # Count em separately obv
- maxwidth = 0 # One value for both ;]
- filesize = 0 # File itself lol
- muhpos = [] # Gonna store em positions yo
- print("** But will first count the amount of special file headers, starting with some ZIPs")
- with open(lbfpath, "rb+") as lbf:
- # Need mmap here because just lbf.find would return an AttributeError: '_io.BufferedRandom' object has no attribute 'find'
- mm = mmap.mmap(lbf.fileno(), 0) # Map entire file into mamm0ry =]
- p1 = -1 # For easy logic below
- dskip = { 'ZIP': 0, 'USTAR': 0 }
- for i in range(1, MAXZIP + 1):
- p1 = mm.find(ZIPHEAD, p1 + 1) # Make sure the next search starts after the 'P'
- p2 = mm.find(ZIPTAIL, p1 + 1)
- if p1 < 0 or p2 < 0: # No more headers
- break
- if DEBUG and DEBUG_SKIP > 0 and dskip['ZIP'] < DEBUG_SKIP: # Maybe we need to skip the first N?
- print(f"\t* DEBUG: Skipping #{i} due to DEBUG_SKIP ({DEBUG_SKIP})")
- dskip['ZIP'] += 1
- continue
- maxcount['ZIP'] += 1
- muhpos.append(['ZIP', p1, p2]) # Store this file slice imo tbh
- if DEBUG and DEBUG_MAX > 0 and maxcount['ZIP'] >= DEBUG_MAX: # Maybe we got enough already?
- print(f"\t* DEBUG: Skipping the rest due to DEBUG_MAX ({DEBUG_MAX})")
- break
- print(f"\t- Found {maxcount['ZIP']} (apparent) files")
- print("** USTAR is up next y0")
- p1 = -1
- while True:
- # Let's look between USTAR headers imo tbh fam (thanks LG)
- p1 = mm.find(USTARHEAD, p1 + 1)
- p2 = mm.find(USTARHEAD, p1 + 1)
- if p1 < 0 or p2 < 0: # No more headers
- break
- if DEBUG and DEBUG_SKIP > 0 and dskip['USTAR'] < DEBUG_SKIP: # Every file type has their own skip limits =]
- dskip['USTAR'] += 1
- print(f"\t* DEBUG: Skipping #{dskip['USTAR']} due to DEBUG_SKIP ({DEBUG_SKIP})")
- continue
- maxcount['USTAR'] += 1
- muhpos.append(['USTAR', p1, p2])
- if DEBUG and DEBUG_MAX > 0 and maxcount['USTAR'] >= DEBUG_MAX:
- print(f"\t* DEBUG: Skipping the rest due to DEBUG_MAX ({DEBUG_MAX})")
- break
- print(f"\t- Found {maxcount['USTAR']} (apparent) files")
- # Get max filesize lol
- lbf.seek(0, 2)
- filesize = lbf.tell()
- if maxcount['ZIP'] <= 0 and maxcount['USTAR'] <= 0:
- print("** No valid headers found at all, bailing out")
- sys.exit(1)
- # Get total size in human readable format etc
- (f_sizelol, f_suffix) = gibsize(filesize)
- print(f"** Beginning extraction of {f_sizelol:.2f} {f_suffix} backup file ({filesize} B)")
- maxwidth = len(str(DEBUG_SKIP + maxcount['ZIP'] + maxcount['USTAR']))
- totalbyets = { 'ZIP': 0, 'USTAR': 0, 'grand': 0 }
- count = DEBUG_SKIP
- with open(lbfpath, "rb+") as lbf:
- mm = mmap.mmap(lbf.fileno(), 0)
- for posinfo in muhpos:
- ext = posinfo[0]
- p1 = posinfo[1]
- p2 = posinfo[2]
- count += 1
- base = str(count)
- if len(base) < maxwidth:
- base = base.zfill(maxwidth)
- muhpath = f"{tdir}/{base}{MUHEXT}"
- byets = 0 # Total read for this file etc
- chunksize = 512 # Default value for USTAR
- if ext == 'USTAR':
- lbf.seek(p1 - 257) # Let's include (possibly br0kne) USTAR header lol
- else:
- lbf.seek(p1) # Otherwise just go to ZIPHEAD
- chunksize = p2 - p1 # Read everything at once imo
- # Gotta open the target file before reading the LBF slice cuz we write in chunks my mane
- print(f"\t- Writing: {base}{MUHEXT}... ", end='', flush=True)
- with open(muhpath, "wb") as datarchive:
- while True:
- # Make sure a USTAR doesn't get bytes from the next one
- if p2 - p1 - byets < chunksize:
- chunksize = p2 - p1 - byets
- if chunksize <= 0: # File happened to be cleanly divisible by 512 :>
- break
- data = lbf.read(chunksize)
- if not data:
- print(f"UNEXPECTED ERROR: no data returned from lbf.read()")
- if DEBUG:
- print(f"\t\t* DEBUG: p1 = {p1}, p2 = {p2}, chunksize = {chunksize}, byets = {byets}")
- break
- # Write the chunk to target file
- byets += chunksize
- datarchive.write(data)
- if ext == 'USTAR' and data == USTARTAIL: # Maybe we sometimes get a proper archive ending with 1024 bytes (2 blocks of 512) of \x00
- chunksize = 0
- # Gotta make sure our USTAR follows the spec ;];];];]
- if ext == 'USTAR':
- # It seems LG's format does end with some nullbytes, so we just have to pad 1024 - what they did
- remainder = byets % 512
- if remainder > 0:
- if DEBUG:
- print(f" [inserting {remainder} null bytes] ", end='', flush=True)
- byets += remainder
- nullem = b''
- for i in range(0, remainder):
- nullem += b'\x00'
- datarchive.write(nullem)
- # Print some size info for dis slice if we got ne
- if byets > 0:
- totalbyets[ext] += byets
- totalbyets['grand'] += byets
- (sizelol, suffix) = gibsize(byets)
- (t_sizelol, t_suffix) = gibsize(totalbyets[ext])
- print(f" {sizelol:.2f} {suffix}, total {ext} size read so far: {t_sizelol:.2f} {t_suffix}")
- if DEBUG:
- print(f"\t\t* DEBUG: p1 = {p1}, p2 = {p2}, byets = {byets}")
- (gt_sizelol, gt_suffix) = gibsize(totalbyets['grand'])
- print(f"** Aye we done writing {gt_sizelol:.2f} {gt_suffix} worth of data =]]]")
|