Files
lancet/fileutil/file.go
2024-01-24 11:54:42 +08:00

810 lines
17 KiB
Go

// Copyright 2021 dudaodong@gmail.com. All rights reserved.
// Use of this source code is governed by MIT license.
// Package fileutil implements some basic functions for file operations
package fileutil
import (
"archive/zip"
"bufio"
"bytes"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"encoding/csv"
"errors"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"github.com/duke-git/lancet/v2/validator"
)
// FileReader is a reader supporting offset seeking and reading one
// line at a time, this is especially useful for large files
type FileReader struct {
*bufio.Reader
file *os.File
offset int64
}
// NewFileReader creates the FileReader struct for reading
func NewFileReader(path string) (*FileReader, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
return &FileReader{
file: f,
Reader: bufio.NewReader(f),
offset: 0,
}, nil
}
// ReadLine reads and returns one line at a time excluding the trailing '\r' and '\n'
func (f *FileReader) ReadLine() (string, error) {
data, err := f.Reader.ReadBytes('\n')
f.offset += int64(len(data))
if err == nil || err == io.EOF {
for len(data) > 0 && (data[len(data)-1] == '\r' || data[len(data)-1] == '\n') {
data = data[:len(data)-1]
}
return string(data), err
}
return "", err
}
// Offset returns the current offset of the file
func (f *FileReader) Offset() int64 {
return f.offset
}
// Seek sets the current offset of the reading
func (f *FileReader) Seek(offset int64) error {
_, err := f.file.Seek(offset, 0)
if err != nil {
return err
}
f.Reader = bufio.NewReader(f.file)
f.offset = offset
return nil
}
// Close takes care of the opened file
func (f *FileReader) Close() error {
return f.file.Close()
}
// IsExist checks if a file or directory exists.
// Play: https://go.dev/play/p/nKKXt8ZQbmh
func IsExist(path string) bool {
_, err := os.Stat(path)
if err == nil {
return true
}
if errors.Is(err, os.ErrNotExist) {
return false
}
return false
}
// CreateFile create a file in path.
// Play: https://go.dev/play/p/lDt8PEsTNKI
func CreateFile(path string) bool {
file, err := os.Create(path)
if err != nil {
return false
}
defer file.Close()
return true
}
// CreateDir create directory in absolute path. param `absPath` like /a/, /a/b/.
// Play: https://go.dev/play/p/qUuCe1OGQnM
func CreateDir(absPath string) error {
// return os.MkdirAll(path.Dir(absPath), os.ModePerm)
return os.MkdirAll(absPath, os.ModePerm)
}
// IsDir checks if the path is directory or not.
// Play: https://go.dev/play/p/WkVwEKqtOWk
func IsDir(path string) bool {
file, err := os.Stat(path)
if err != nil {
return false
}
return file.IsDir()
}
// RemoveFile remove the path file.
// Play: https://go.dev/play/p/P2y0XW8a1SH
func RemoveFile(path string) error {
return os.Remove(path)
}
// CopyFile copy src file to dest file.
// Play: https://go.dev/play/p/Jg9AMJMLrJi
func CopyFile(srcPath string, dstPath string) error {
srcFile, err := os.Open(srcPath)
if err != nil {
return err
}
defer srcFile.Close()
distFile, err := os.Create(dstPath)
if err != nil {
return err
}
defer distFile.Close()
var tmp = make([]byte, 1024*4)
for {
n, err := srcFile.Read(tmp)
if err != nil {
if err == io.EOF {
return nil
}
return err
}
_, err = distFile.Write(tmp[:n])
if err != nil {
return err
}
}
}
// ClearFile write empty string to path file.
// Play: https://go.dev/play/p/NRZ0ZT-G94H
func ClearFile(path string) error {
f, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0777)
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString("")
return err
}
// ReadFileToString return string of file content.
// Play: https://go.dev/play/p/cmfwp_5SQTp
func ReadFileToString(path string) (string, error) {
bytes, err := os.ReadFile(path)
if err != nil {
return "", err
}
return string(bytes), nil
}
// ReadFileByLine read file line by line.
// Play: https://go.dev/play/p/svJP_7ZrBrD
func ReadFileByLine(path string) ([]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
result := make([]string, 0)
buf := bufio.NewReader(f)
for {
line, _, err := buf.ReadLine()
l := string(line)
if err == io.EOF {
break
}
if err != nil {
continue
}
result = append(result, l)
}
return result, nil
}
// ListFileNames return all file names in the path.
// Play: https://go.dev/play/p/Tjd7Y07rejl
func ListFileNames(path string) ([]string, error) {
if !IsExist(path) {
return []string{}, nil
}
fs, err := os.ReadDir(path)
if err != nil {
return []string{}, err
}
sz := len(fs)
if sz == 0 {
return []string{}, nil
}
result := []string{}
for i := 0; i < sz; i++ {
if !fs[i].IsDir() {
result = append(result, fs[i].Name())
}
}
return result, nil
}
// IsZipFile checks if file is zip or not.
// Play: https://go.dev/play/p/9M0g2j_uF_e
func IsZipFile(filepath string) bool {
f, err := os.Open(filepath)
if err != nil {
return false
}
defer f.Close()
buf := make([]byte, 4)
if n, err := f.Read(buf); err != nil || n < 4 {
return false
}
return bytes.Equal(buf, []byte("PK\x03\x04"))
}
// Zip create zip file, fpath could be a single file or a directory.
// Play: https://go.dev/play/p/j-3sWBp8ik_P
func Zip(path string, destPath string) error {
if IsDir(path) {
return zipFolder(path, destPath)
}
return zipFile(path, destPath)
}
func zipFile(filePath string, destPath string) error {
zipFile, err := os.Create(destPath)
if err != nil {
return err
}
defer zipFile.Close()
archive := zip.NewWriter(zipFile)
defer archive.Close()
return addFileToArchive1(filePath, archive)
}
func zipFolder(folderPath string, destPath string) error {
outFile, err := os.Create(destPath)
if err != nil {
return err
}
defer outFile.Close()
w := zip.NewWriter(outFile)
err = addFileToArchive2(w, folderPath, "")
if err != nil {
return err
}
err = w.Close()
if err != nil {
return err
}
return nil
}
func addFileToArchive1(fpath string, archive *zip.Writer) error {
err := filepath.Walk(fpath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
header.Name = strings.TrimPrefix(path, filepath.Dir(fpath)+"/")
if info.IsDir() {
header.Name += "/"
} else {
header.Method = zip.Deflate
writer, err := archive.CreateHeader(header)
if err != nil {
return err
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
if _, err := io.Copy(writer, file); err != nil {
return err
}
}
return nil
})
return err
}
func addFileToArchive2(w *zip.Writer, basePath, baseInZip string) error {
files, err := os.ReadDir(basePath)
if err != nil {
return err
}
if !strings.HasSuffix(basePath, "/") {
basePath = basePath + "/"
}
for _, file := range files {
if !file.IsDir() {
dat, err := os.ReadFile(basePath + file.Name())
if err != nil {
return err
}
f, err := w.Create(baseInZip + file.Name())
if err != nil {
return err
}
_, err = f.Write(dat)
if err != nil {
return err
}
} else if file.IsDir() {
newBase := basePath + file.Name() + "/"
addFileToArchive2(w, newBase, baseInZip+file.Name()+"/")
}
}
return nil
}
// UnZip unzip the file and save it to destPath.
// Play: https://go.dev/play/p/g0w34kS7B8m
func UnZip(zipFile string, destPath string) error {
zipReader, err := zip.OpenReader(zipFile)
if err != nil {
return err
}
defer zipReader.Close()
for _, f := range zipReader.File {
//issue#62: fix ZipSlip bug
path, err := safeFilepathJoin(destPath, f.Name)
if err != nil {
return err
}
if f.FileInfo().IsDir() {
err = os.MkdirAll(path, os.ModePerm)
if err != nil {
return err
}
} else {
err = os.MkdirAll(filepath.Dir(path), os.ModePerm)
if err != nil {
return err
}
inFile, err := f.Open()
if err != nil {
return err
}
defer inFile.Close()
outFile, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
}
defer outFile.Close()
_, err = io.Copy(outFile, inFile)
if err != nil {
return err
}
}
}
return nil
}
// ZipAppendEntry append a single file or directory by fpath to an existing zip file.
// Play: https://go.dev/play/p/cxvaT8TRNQp
func ZipAppendEntry(fpath string, destPath string) error {
tempFile, err := os.CreateTemp("", "temp.zip")
if err != nil {
return err
}
defer os.Remove(tempFile.Name())
zipReader, err := zip.OpenReader(destPath)
if err != nil {
return err
}
archive := zip.NewWriter(tempFile)
for _, zipItem := range zipReader.File {
zipItemReader, err := zipItem.Open()
if err != nil {
return err
}
header, err := zip.FileInfoHeader(zipItem.FileInfo())
if err != nil {
return err
}
header.Name = zipItem.Name
targetItem, err := archive.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(targetItem, zipItemReader)
if err != nil {
return err
}
}
err = addFileToArchive1(fpath, archive)
if err != nil {
return err
}
err = zipReader.Close()
if err != nil {
return err
}
err = archive.Close()
if err != nil {
return err
}
err = tempFile.Close()
if err != nil {
return err
}
return CopyFile(tempFile.Name(), destPath)
}
func safeFilepathJoin(path1, path2 string) (string, error) {
relPath, err := filepath.Rel(".", path2)
if err != nil || strings.HasPrefix(relPath, "..") {
return "", fmt.Errorf("(zipslip) filepath is unsafe %q: %v", path2, err)
}
if path1 == "" {
path1 = "."
}
return filepath.Join(path1, filepath.Join("/", relPath)), nil
}
// IsLink checks if a file is symbol link or not.
// Play: https://go.dev/play/p/TL-b-Kzvf44
func IsLink(path string) bool {
fi, err := os.Lstat(path)
if err != nil {
return false
}
return fi.Mode()&os.ModeSymlink != 0
}
// FileMode return file's mode and permission.
// Play: https://go.dev/play/p/2l2hI42fA3p
func FileMode(path string) (fs.FileMode, error) {
fi, err := os.Lstat(path)
if err != nil {
return 0, err
}
return fi.Mode(), nil
}
// MiMeType return file mime type
// param `file` should be string(file path) or *os.File.
// Play: https://go.dev/play/p/bd5sevSUZNu
func MiMeType(file any) string {
var mediatype string
readBuffer := func(f *os.File) ([]byte, error) {
buffer := make([]byte, 512)
_, err := f.Read(buffer)
if err != nil {
return nil, err
}
return buffer, nil
}
if filePath, ok := file.(string); ok {
f, err := os.Open(filePath)
if err != nil {
return mediatype
}
buffer, err := readBuffer(f)
if err != nil {
return mediatype
}
return http.DetectContentType(buffer)
}
if f, ok := file.(*os.File); ok {
buffer, err := readBuffer(f)
if err != nil {
return mediatype
}
return http.DetectContentType(buffer)
}
return mediatype
}
// CurrentPath return current absolute path.
// Play: https://go.dev/play/p/s74a9iBGcSw
func CurrentPath() string {
var absPath string
_, filename, _, ok := runtime.Caller(1)
if ok {
absPath = filepath.Dir(filename)
}
return absPath
}
// FileSize returns file size in bytes.
// Play: https://go.dev/play/p/H9Z05uD-Jjc
func FileSize(path string) (int64, error) {
f, err := os.Stat(path)
if err != nil {
return 0, err
}
return f.Size(), nil
}
// DirSize walks the folder recursively and returns folder size in bytes.
func DirSize(path string) (int64, error) {
var size int64
err := filepath.WalkDir(path, func(_ string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() {
info, err := d.Info()
if err != nil {
return err
}
size += info.Size()
}
return err
})
return size, err
}
// MTime returns file modified time.
// Play: https://go.dev/play/p/s_Tl7lZoAaY
func MTime(filepath string) (int64, error) {
f, err := os.Stat(filepath)
if err != nil {
return 0, err
}
return f.ModTime().Unix(), nil
}
// Sha returns file sha value, param `shaType` should be 1, 256 or 512.
// Play: https://go.dev/play/p/VfEEcO2MJYf
func Sha(filepath string, shaType ...int) (string, error) {
file, err := os.Open(filepath)
if err != nil {
return "", err
}
defer file.Close()
h := sha1.New()
if len(shaType) > 0 {
if shaType[0] == 1 {
h = sha1.New()
} else if shaType[0] == 256 {
h = sha256.New()
} else if shaType[0] == 512 {
h = sha512.New()
} else {
return "", errors.New("param `shaType` should be 1, 256 or 512")
}
}
_, err = io.Copy(h, file)
if err != nil {
return "", err
}
sha := fmt.Sprintf("%x", h.Sum(nil))
return sha, nil
}
// ReadCsvFile read file content into slice.
// Play: https://go.dev/play/p/OExTkhGEd3_u
func ReadCsvFile(filepath string, delimiter ...rune) ([][]string, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
reader := csv.NewReader(f)
if len(delimiter) > 0 {
reader.Comma = delimiter[0]
}
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
return records, nil
}
// WriteCsvFile write content to target csv file.
// append: append to existing csv file
// delimiter: specifies csv delimiter
// Play: https://go.dev/play/p/dAXm58Q5U1o
func WriteCsvFile(filepath string, records [][]string, append bool, delimiter ...rune) error {
flag := os.O_RDWR | os.O_CREATE
if append {
flag = flag | os.O_APPEND
}
f, err := os.OpenFile(filepath, flag, 0644)
if err != nil {
return err
}
defer f.Close()
writer := csv.NewWriter(f)
// 设置默认分隔符为逗号,除非另外指定
if len(delimiter) > 0 {
writer.Comma = delimiter[0]
} else {
writer.Comma = ','
}
// 遍历所有记录并处理包含分隔符或双引号的单元格
for i := range records {
for j := range records[i] {
records[i][j] = escapeCSVField(records[i][j], writer.Comma)
}
}
return writer.WriteAll(records)
}
// WriteStringToFile write string to target file.
// Play: https://go.dev/play/p/GhLS6d8lH_g
func WriteStringToFile(filepath string, content string, append bool) error {
var flag int
if append {
flag = os.O_RDWR | os.O_CREATE | os.O_APPEND
} else {
flag = os.O_RDWR | os.O_CREATE | os.O_TRUNC
}
f, err := os.OpenFile(filepath, flag, 0644)
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString(content)
return err
}
// WriteBytesToFile write bytes to target file.
// Play: https://go.dev/play/p/s7QlDxMj3P8
func WriteBytesToFile(filepath string, content []byte) error {
f, err := os.OpenFile(filepath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
}
defer f.Close()
_, err = f.Write(content)
return err
}
// ReadFile get file reader by a url or a local file
// Play: https://go.dev/play/p/uNep3Tr8fqF
func ReadFile(path string) (reader io.ReadCloser, closeFn func(), err error) {
switch {
case validator.IsUrl(path):
resp, err := http.Get(path)
if err != nil {
return nil, func() {}, err
}
return resp.Body, func() { resp.Body.Close() }, nil
case IsExist(path):
reader, err := os.Open(path)
if err != nil {
return nil, func() {}, err
}
return reader, func() { reader.Close() }, nil
default:
return nil, func() {}, errors.New("unknown file type")
}
}
// escapeCSVField 处理单元格内容,如果包含分隔符,则用双引号包裹
func escapeCSVField(field string, delimiter rune) string {
// 替换所有的双引号为两个双引号
escapedField := strings.ReplaceAll(field, "\"", "\"\"")
// 如果字段包含分隔符、双引号或换行符,用双引号包裹整个字段
if strings.ContainsAny(escapedField, string(delimiter)+"\"\n") {
escapedField = fmt.Sprintf("\"%s\"", escapedField)
}
return escapedField
}
// WriteMapsToCsv write slice of map to csv file.
// Play: todo
// filepath: Path to the CSV file.
// records: Slice of maps to be written. the value of map should be basic type.
// the maps will be sorted by key in alphabeta order, then be written into csv file.
// appendToExistingFile: If true, data will be appended to the file if it exists.
// delimiter: Delimiter to use in the CSV file.
// headers: order of the csv column headers, needs to be consistent with the key of the map.
func WriteMapsToCsv(filepath string, records []map[string]any, appendToExistingFile bool, delimiter rune,
headers ...[]string) error {
for _, record := range records {
for _, value := range record {
if !isCsvSupportedType(value) {
return errors.New("unsupported value type detected; only basic types are supported: \nbool, rune, string, int, int64, float32, float64, uint, byte, complex128, complex64, uintptr")
}
}
}
var columnHeaders []string
if len(headers) > 0 {
columnHeaders = headers[0]
} else {
for key := range records[0] {
columnHeaders = append(columnHeaders, key)
}
// sort keys in alphabeta order
sort.Strings(columnHeaders)
}
var datasToWrite [][]string
if !appendToExistingFile {
datasToWrite = append(datasToWrite, columnHeaders)
}
for _, record := range records {
var row []string
for _, h := range columnHeaders {
row = append(row, fmt.Sprintf("%v", record[h]))
}
datasToWrite = append(datasToWrite, row)
}
return WriteCsvFile(filepath, datasToWrite, appendToExistingFile, delimiter)
}
// check if the value of map which to be written into csv is basic type.
func isCsvSupportedType(v interface{}) bool {
switch v.(type) {
case bool, rune, string, int, int64, float32, float64, uint, byte, complex128, complex64, uintptr:
return true
default:
return false
}
}