# encoding: ascii # api: powershell # title: Split-File # description: Splits a file into many files based on a regular expression # version: 0.1 # author: Joel Bennett # license: CC0 # x-poshcode-id: 5522 # x-archived: 2015-01-31T20:29:05 # x-published: 2015-10-17T19:36:00 # # #.Synopsis # Split a file into many files, based on a regular expression boundary #.Example # Split-File huge.sql -Pattern '^GO$' # # Splits a SQL file into many pieces, with "GO" as the last line in each resulting file #.Example # Split-File -Path .\huge.sql -Pattern "^print 'Processed \d+ total records'$" -Encoding ([Text.Encoding]::Unicode) -Verbose # # Splits the sql file based on the "processed 100 total records" line, using unicode encoding, while streaming to verbose output the number of lines in each file. [CmdletBinding()] param( # The path to the text file to split [Parameter(Mandatory=$true, Position=0)] [string]$Path, # The encoding to use to read and write files (uses [Text.Encoding]::Default by default) # You may want to use [Text.Encoding]::Unicode [Text.Encoding]$Encoding = [Text.Encoding]::Default, # A Regular Expression pattern to split on [Parameter(ParameterSetName="Pattern", Mandatory=$true, Position=0)] [string]$Pattern, # An optional header to add to every file [String]$Header # [Parameter(ParameterSetName="LineCount", Mandatory=$true)] # [int]$LineCount, ) $Path = Convert-Path $Path Write-Verbose "Opening Reader for $Path" $Reader = New-Object IO.StreamReader $Path, $Encoding $Extension = [IO.Path]::GetExtension($Path) $FileCount = 0 $LineIndex = 0 try { if($Pattern) { sls -Path $Path -pattern $Pattern | ForEach { $Match = $_ $FileCount += 1 $FileName = [IO.Path]::ChangeExtension($Path, ".${FileCount}${Extension}") $Writer = New-Object IO.StreamWriter $FileName, $false, $Encoding Write-Verbose "Writing $($Match.LineNumber - $LineIndex) lines to $(Resolve-Path $FileName -Relative)" if($Header) { $Writer.Write($Header + "`r`n") } try { for(; $LineIndex -lt $Match.LineNumber; $LineIndex++) { $Writer.Write( $Reader.ReadLine() + "`r`n") } } catch { throw $_ } finally { Write-Debug "Closing Writer" $Writer.Close() } } # Catch the tail end of the file: $FileCount += 1 $FileName = [IO.Path]::ChangeExtension($Path, ".${FileCount}${Extension}") $Writer = New-Object IO.StreamWriter $FileName, $false, $Encoding Write-Verbose "Writing the rest to $(Resolve-Path $FileName -Relative)" $LastFile = $LineIndex try { while($Reader.Peek() -ge 0) { $LineIndex++ $Writer.Write( $Reader.ReadLine() + "`r`n") } } catch { throw $_ } finally { Write-Verbose "Wrote $($LineIndex -$LastFile) lines to $(Resolve-Path $FileName -Relative)" Write-Debug "Closing Writer" $Writer.Close() } } } finally { Write-Debug "Closing Reader" $Reader.Close() }