Add Support for Unpacking from Streams.

This commit is contained in:
Alexandre Normand
2014-11-07 22:33:33 -08:00
parent 120f91a729
commit fbd3af6890
3 changed files with 141 additions and 43 deletions

View File

@@ -11,3 +11,19 @@ This Go library allows you to easily unpack the following files:
There are not CGO involved nor hard dependencies of any type.
## Usage
Unpack a file:
```
file, err := os.Open(test.filepath)
ok(t, err)
defer file.Close()
destPath, err := unzipit.Unpack(file, tempDir)
```
Unpack a stream (such as a http.Response):
```
res, err := http.Get(url)
destPath, err := unzipit.UnpackStream(res.Body, tempDir)
```

View File

@@ -6,6 +6,7 @@ package unzipit
import (
"archive/tar"
"archive/zip"
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
@@ -28,14 +29,19 @@ var (
// Check whether a file has the magic number for tar, gzip, bzip2 or zip files
//
// Note that this function does not advance the Reader.
//
// 50 4b 03 04 for pkzip format
// 1f 8b for .gz
// 42 5a for bzip
// 75 73 74 61 72 at offset 257 for tar files
func magicNumber(reader io.ReaderAt, offset int64) (string, error) {
magic := make([]byte, 5, 5)
func magicNumber(reader *bufio.Reader, offset int) (string, error) {
headerBytes, err := reader.Peek(offset + 5)
if err != nil {
return "", err
}
reader.ReadAt(magic, offset)
magic := headerBytes[offset : offset+5]
if bytes.Equal(magicTAR, magic) {
return "tar", nil
@@ -81,46 +87,50 @@ func Unpack(file *os.File, destPath string) (string, error) {
// Makes sure despPath exists
os.MkdirAll(destPath, 0740)
// Makes sure file cursor is at index 0
_, err = file.Seek(0, 0)
r := bufio.NewReader(file)
return UnpackStream(r, destPath)
}
// UnpackStream unpacks a compressed stream. Note that if the stream is a using ZIP
// compression (but only ZIP compression), it's going to get buffered in its entirety
// to memory prior to decompression.
func UnpackStream(reader io.Reader, destPath string) (string, error) {
r := bufio.NewReader(reader)
// Reads magic number from the stream so we can better determine how to proceed
ftype, err := magicNumber(r, 0)
if err != nil {
return "", err
}
// Reads magic number from file so we can better determine how to proceed
ftype, err := magicNumber(file, 0)
if err != nil {
return "", err
}
data := bytes.NewBuffer(nil)
var decompressingReader *bufio.Reader
switch ftype {
case "gzip":
data, err = Gunzip(file)
decompressingReader, err = GunzipStream(r)
if err != nil {
return "", err
}
case "bzip":
data, err = Bunzip2(file)
decompressingReader, err = Bunzip2Stream(r)
if err != nil {
return "", err
}
case "zip":
// Like TAR, ZIP is also an archiving format, therefore we can just return
// after it finishes
return Unzip(file, destPath)
return UnzipStream(r, destPath)
default:
io.Copy(data, file)
decompressingReader = r
}
// Check magic number in offset 257 too see if this is also a TAR file
ftype, err = magicNumber(bytes.NewReader(data.Bytes()), 257)
ftype, err = magicNumber(decompressingReader, 257)
if ftype == "tar" {
return Untar(data, destPath)
return Untar(decompressingReader, destPath)
}
// If it's not a TAR archive then save it to disk as is.
destRawFile := filepath.Join(destPath, sanitize(path.Base(file.Name())))
destRawFile := filepath.Join(destPath, sanitize(path.Base("tarstream")))
// Creates destination file
destFile, err := os.Create(destRawFile)
@@ -130,48 +140,86 @@ func Unpack(file *os.File, destPath string) (string, error) {
defer destFile.Close()
// Copies data to destination file
if _, err := io.Copy(destFile, data); err != nil {
if _, err := io.Copy(destFile, decompressingReader); err != nil {
return "", err
}
return destPath, nil
}
// Decompresses a bzip2 data stream and returns the decompressed stream
func Bunzip2(file *os.File) (*bytes.Buffer, error) {
data := bzip2.NewReader(file)
buffer := bytes.NewBuffer(nil)
io.Copy(buffer, data)
return buffer, nil
}
// Decompresses a gzip data stream and returns the decompressed stream
func Gunzip(file *os.File) (*bytes.Buffer, error) {
data, err := gzip.NewReader(file)
if err != nil && err != io.EOF {
// Decompresses a bzip2 file and returns the decompressed stream
func Bunzip2(file *os.File) (*bufio.Reader, error) {
freader := bufio.NewReader(file)
bzip2Reader, err := Bunzip2Stream(freader)
if err != nil {
return nil, err
}
buffer := bytes.NewBuffer(nil)
io.Copy(buffer, data)
return bufio.NewReader(bzip2Reader), nil
}
return buffer, nil
// Bunzip2Stream unpacks a bzip2 stream
func Bunzip2Stream(reader io.Reader) (*bufio.Reader, error) {
return bufio.NewReader(bzip2.NewReader(reader)), nil
}
// Decompresses a gzip file and returns the decompressed stream
func Gunzip(file *os.File) (*bufio.Reader, error) {
freader := bufio.NewReader(file)
gunzipReader, err := GunzipStream(freader)
if err != nil {
return nil, err
}
return bufio.NewReader(gunzipReader), nil
}
// GunzipStream unpacks a gzipped stream
func GunzipStream(reader io.Reader) (*bufio.Reader, error) {
if decompressingReader, err := gzip.NewReader(reader); err != nil {
return nil, err
} else {
return bufio.NewReader(decompressingReader), nil
}
}
// Decompresses and unarchives a ZIP archive, returning the final path or an error
func Unzip(file *os.File, destPath string) (string, error) {
// Open a zip archive for reading.
r, err := zip.OpenReader(file.Name())
fstat, err := file.Stat()
if err != nil {
return "", err
}
defer r.Close()
zr, err := zip.NewReader(file, fstat.Size())
if err != nil {
return "", err
}
return unpackZip(zr, destPath)
}
// UnzipStream unpacks a ZIP stream. Because of the nature of the ZIP format,
// the stream is copied to memory before decompression.
func UnzipStream(r io.Reader, destPath string) (string, error) {
memoryBuffer := new(bytes.Buffer)
_, err := io.Copy(memoryBuffer, r)
if err != nil {
return "", err
}
memReader := bytes.NewReader(memoryBuffer.Bytes())
zr, err := zip.NewReader(memReader, int64(memoryBuffer.Len()))
if err != nil {
return "", err
}
return unpackZip(zr, destPath)
}
func unpackZip(zr *zip.Reader, destPath string) (string, error) {
// Iterate through the files in the archive,
// printing some of their contents.
for _, f := range r.File {
for _, f := range zr.File {
rc, err := f.Open()
if err != nil {
return "", err

View File

@@ -5,6 +5,7 @@ package unzipit
import (
"archive/tar"
"bufio"
"bytes"
"fmt"
"io/ioutil"
@@ -76,10 +77,43 @@ func TestUnpack(t *testing.T) {
}
}
func TestUnpackStream(t *testing.T) {
var tests = []struct {
filepath string
files int
}{
{"./fixtures/test.tar.bzip2", 2},
{"./fixtures/test.tar.gz", 2},
{"./fixtures/test.zip", 2},
{"./fixtures/test.tar", 2},
{"./fixtures/cfgdrv.iso", 1},
{"./fixtures/test2.tar.gz", 4},
}
for _, test := range tests {
tempDir, err := ioutil.TempDir(os.TempDir(), "unpackit-tests-"+path.Base(test.filepath)+"-")
ok(t, err)
defer os.RemoveAll(tempDir)
file, err := os.Open(test.filepath)
ok(t, err)
defer file.Close()
destPath, err := UnpackStream(bufio.NewReader(file), tempDir)
ok(t, err)
finfo, err := ioutil.ReadDir(destPath)
ok(t, err)
length := len(finfo)
assert(t, length == test.files, fmt.Sprintf("%d != %d for %s", length, test.files, destPath))
}
}
func TestMagicNumber(t *testing.T) {
var tests = []struct {
filepath string
offset int64
offset int
ftype string
}{
{"./fixtures/test.tar.bzip2", 0, "bzip"},
@@ -92,7 +126,7 @@ func TestMagicNumber(t *testing.T) {
file, err := os.Open(test.filepath)
ok(t, err)
ftype, err := magicNumber(file, test.offset)
ftype, err := magicNumber(bufio.NewReader(file), test.offset)
file.Close()
ok(t, err)