PoshCode Archive  Artifact Content

Artifact b5c115ce51de7e65d41a697d390d2e5f18c4c3c2ff229cce7e51b10296a78256:

  • File Split-File.ps1 — part of check-in [440be607cf] at 2018-06-10 13:56:36 on branch trunk — Splits a file into many files based on a regular expression (user: Joel Bennett size: 3362)

# encoding: ascii
# api: powershell
# title: Split-File
# description: Splits a file into many files based on a regular expression
# version: 0.1
# author: Joel Bennett
# license: CC0
# x-poshcode-id: 5522
# x-archived: 2015-01-31T20:29:05
# x-published: 2015-10-17T19:36:00
#
#
#.Synopsis
#   Split a file into many files, based on a regular expression boundary
#.Example
#   Split-File huge.sql -Pattern '^GO$'
#
#   Splits a SQL file into many pieces, with "GO" as the last line in each resulting file
#.Example
#   Split-File -Path .\huge.sql -Pattern "^print 'Processed \d+ total records'$" -Encoding ([Text.Encoding]::Unicode) -Verbose
#
#   Splits the sql file based on the "processed 100 total records" line, using unicode encoding, while streaming to verbose output the number of lines in each file.

[CmdletBinding()]
param(
    # The path to the text file to split
    [Parameter(Mandatory=$true, Position=0)]
    [string]$Path,

    # The encoding to use to read and write files (uses [Text.Encoding]::Default by default)
    # You may want to use [Text.Encoding]::Unicode
    [Text.Encoding]$Encoding = [Text.Encoding]::Default,

    # A Regular Expression pattern to split on
    [Parameter(ParameterSetName="Pattern", Mandatory=$true, Position=0)]
    [string]$Pattern,

    # An optional header to add to every file
    [String]$Header

    # [Parameter(ParameterSetName="LineCount", Mandatory=$true)]
    # [int]$LineCount,
)
$Path = Convert-Path $Path
Write-Verbose "Opening Reader for $Path"
$Reader = New-Object IO.StreamReader $Path, $Encoding
$Extension = [IO.Path]::GetExtension($Path)

$FileCount = 0
$LineIndex = 0
try {
    if($Pattern) {
        sls -Path $Path -pattern $Pattern | ForEach {
            $Match = $_

            $FileCount += 1
            $FileName = [IO.Path]::ChangeExtension($Path, ".${FileCount}${Extension}")
            $Writer = New-Object IO.StreamWriter $FileName, $false, $Encoding

            Write-Verbose "Writing $($Match.LineNumber - $LineIndex) lines to $(Resolve-Path $FileName -Relative)"
            if($Header) { $Writer.Write($Header + "`r`n") }
            try {
                for(; $LineIndex -lt $Match.LineNumber; $LineIndex++) {
                    $Writer.Write( $Reader.ReadLine() + "`r`n")
                }
            } catch {
                throw $_
            } finally {
                Write-Debug "Closing Writer"
                $Writer.Close()
            }
        }

        # Catch the tail end of the file:
        $FileCount += 1
        $FileName = [IO.Path]::ChangeExtension($Path, ".${FileCount}${Extension}")
        $Writer = New-Object IO.StreamWriter $FileName, $false, $Encoding
        Write-Verbose "Writing the rest to $(Resolve-Path $FileName -Relative)"
        $LastFile = $LineIndex
        try {
            while($Reader.Peek() -ge 0) {
                $LineIndex++
                $Writer.Write( $Reader.ReadLine() + "`r`n")
            }
        } catch {
            throw $_
        } finally {
            Write-Verbose "Wrote $($LineIndex -$LastFile) lines to $(Resolve-Path $FileName -Relative)"
            Write-Debug "Closing Writer"
            $Writer.Close()
        }
    }
} finally {
    Write-Debug "Closing Reader"
    $Reader.Close()
}