PoshCode Archive  Artifact [5b065982cc]

Artifact 5b065982cc3be25c44f0a5f7991b945be5b08fc9a6728da380ce4f3c1e36dd1c:

  • File Import-Delimited-2.ps1 — part of check-in [216326bcd0] at 2018-06-10 13:03:13 on branch trunk — Convert and import from delimited text files. (user: Joel Bennett size: 5543)

# encoding: utf-8
# api: powershell
# title: Import-Delimited 2
# description: Convert and import from delimited text files.
# version: 1.0
# type: function
# author: Joel Bennett
# license: CC0
# function: Convert-Delimiter
# x-poshcode-id: 195
# x-derived-from-id: 496
# x-archived: 2009-03-27T18:09:02
#
# Includes two functions: Convert-Delimiter and Import-Delimited
#
################################################################################
## Convert-Delimiter - A function to convert between different delimiters. 
## E.g.: commas to tabs, tabs to spaces, spaces to commas, etc.
################################################################################
## Written primarily as a way of enabling the use of Import-CSV when
## the source file was a columnar text file with data like services.txt:
##         ip              service         port
##         13.13.13.1      http            8000
##         13.13.13.2      https           8001
##         13.13.13.1      irc             6665-6669
## 
## Sample Use:  
##    Get-Content services.txt | Convert-Delimiter " +" "," | Set-Content services.csv
##         would convert the file above into something that could passed to:
##         Import-Csv services.csv
##
##    Get-Content Delimited.csv | Convert-Delimiter "," "`t" | Set-Content Delimited.tab
##         would convert a simple comma-separated-values file to a tab-delimited file
################################################################################
## Version History
## Version 1.0
##    First working version
## Version 2.0
##    Fixed bug in final call to Write-Output
################################################################################
Function Convert-Delimiter([regex]$from,[string]$to) 
{ 
   process
   {  
      ## replace the original delimiter with the new one, wrapping EVERY block in �
      ## if there's quotes around some text with a delimiter, assume it doesn't count
      ## if there are two quotes "" stuck together inside quotes, assume they're an 'escaped' quote
      $_ = $_ -replace "(?:`"((?:(?:[^`"]|`"`"))+)(?:`"$from|`"`$))|(?:((?:.(?!$from))*.)(?:$from|`$))","�`$1`$2�$to" 
      ## clean up the end where there might be duplicates
      $_ = $_ -replace "�(?:$to|�)?`$","�"
      ## normalize quotes so that they're all double "" quotes
      $_ = $_ -replace "`"`"","`"" -replace "`"","`"`"" 
      ## remove the � wrappers if there are no quotes inside them
      $_ = $_ -replace "�((?:[^�`"](?!$to))+)�($to|`$)","`$1`$2"
      ## replace the � with quotes, and explicitly emit the result
      write-output ($_ -replace "�","`"")
   }
}

################################################################################
## Import-Delimited - A replacement function for Import-Csv that can handle other 
## delimiters, and can import text (and collect it together) from the pipeline!!
## Dependends on the Convert-Delimiter function.
################################################################################
## NOTICE that this means you can use this to import multitple CSV files as one:
## Sample Use:
##        ls ..\*.txt | export-csv textfiles.csv
##        ls *.doc | export-csv docs.csv
##        ls C:\Windows\System32\*.hlp | export-csv helpfiles.csv
##
##       $files = ls *.csv | Import-Delimited
## OR
##     Import-Delimited " +" services1.txt 
## OR
##     gc *.txt | Import-Delimited "  +"
################################################################################
## Version History
## Version 1.0
##    First working version
## Version 2.0
##    Filter #TYPE lines
##    Remove dependency on Convert-Delimiter if the files are already CSV
##    Change to use my Template-Pipeline format (removing the nested Import-String function)
## Version 2.1
##    Fix a stupid bug ...
##    Add filtering for lines starting with "--", hopefully that's not a problem for other people...
##    Added Write-DEBUG output for filtered lines...

Function Import-Delimited([regex]$delimiter=",", [string]$PsPath="")
{
    BEGIN {
        if ($PsPath.Length -gt 0) { 
            write-output ($PsPath | &($MyInvocation.InvocationName) $delimiter); 
        } else {
            $script:tmp = [IO.Path]::GetTempFileName()
            write-debug "Using tempfile $($script:tmp)"
        }
    }
    PROCESS {
        if($_ -and $_.Length -gt 0 ) {
            if(Test-Path $_) {
                if($delimiter -eq ",") {
                    Get-Content $_ | Where-Object {if($_.StartsWith("#TYPE") -or $_.StartsWith("--")){ write-debug "SKIPPING: $_"; $false;} else { $true }} | Add-Content $script:tmp
                } else {
                    Get-Content $_ | Convert-Delimiter $delimiter "," | Where-Object { if( $_.StartsWith("--") ) { write-debug "SKIPPING: $_"; $false;} else { $true }} | Add-Content $script:tmp
                }
            }
            else {
                if($delimiter -eq ",") {
                    $_ | Where-Object {-not $_.StartsWith("#TYPE")} | Add-Content $script:tmp
                } else {
                    $_ | Convert-Delimiter $delimiter "," | Add-Content $script:tmp
                }
            }
        }
    }
    END {
        # Need to guard against running this twice when you pass PsPath
        if ($PsPath.Length -eq 0) {
            Write-Output (Import-Delimited $delimiter $script:tmp)
        }
    }
}