PoshCode Archive  Artifact [e8c8749afd]

Artifact e8c8749afd5cb4812d9c7ba786d157126fb91c3c02ea0021240849f942b32375:

  • File Import-Delimited.ps1 — part of check-in [c349628477] at 2018-06-10 12:58:12 on branch trunk — Convert and import from delimited text files. Includes two functions: Convert-Delimiter and Import-Delimited. (user: Joel Bennett size: 6346)

# encoding: utf-8
# api: powershell
# title: Import-Delimited
# description: Convert and import from delimited text files. Includes two functions: Convert-Delimiter and Import-Delimited.
# version: 2.4
# type: function
# author: Joel Bennett
# license: CC0
# function: Convert-Delimiter
# x-poshcode-id: 1528
# x-derived-from-id: 6063
# x-archived: 2016-10-18T10:42:45
# x-published: 2010-12-14T08:29:00
#
# This version doesn’t quote empty columns in Convert-Delimiter
#
################################################################################
## Convert-Delimiter - A function to convert between different delimiters. 
## E.g.: commas to tabs, tabs to spaces, spaces to commas, etc.
################################################################################
## Written primarily as a way of enabling the use of Import-CSV when
## the source file was a columnar text file with data like services.txt:
##         ip              service         port
##         13.13.13.1      http            8000
##         13.13.13.2      https           8001
##         13.13.13.1      irc             6665-6669
## 
## Sample Use:  
##    Get-Content services.txt | Convert-Delimiter " +" "," | Set-Content services.csv
##         would convert the file above into something that could passed to:
##         Import-Csv services.csv
##
##    Get-Content Delimited.csv | Convert-Delimiter "," "`t" | Set-Content Delimited.tab
##         would convert a simple comma-separated-values file to a tab-delimited file
################################################################################
## Version History
## Version 1.0
##	  First working version
## Version 2.0
##    Fixed the quoting so it adds quotes in case they're neeeded
## Version 2.1
##    Remove quotes which aren't needed
## Version 2.2
##    Trim spaces off the ends, they confuse things
## Version 2.3
##    Allow empty columns: as in: there,are,six,"comma, delimited",,columns
## Version 2.4
##    Allow QUOTED empty columns: as in: there,are,"",seven,"comma, delimited",,columns
##    Also, if not -QuoteEmpty, then fix empty columns so they aren't quoted
Function Convert-Delimiter([regex]$from,[string]$to,[switch]$quoteEmpty) 
{
   begin
   {
      ## The Magic Delimiter Þ (something nobody ever uses in text)
      $z = [char](222)
   }
   process
   {
      $_ = $_.Trim()
      ## We want to clean out the empty strings:
      $_ = $_ -replace "(^|$from)`"`"($from|`$)","`$1`$2"
      
      ## replace the original delimiter with the new one, wrapping EVERY block in Þ
      ## if there's quotes around some text with a delimiter, assume it doesn't count
      ## if there are two quotes "" stuck together inside quotes, assume they're an 'escaped' quote
      $_ = $_ -replace "(?:`"((?:(?:[^`"]|`"`"))+)(?:`"$from|`"`$))|(?:$from)|(?:((?:.(?!$from))*.)(?:$from|`$))","$z`$1`$2$z$to"
      ## clean up the end where there might be duplicates
      $_ = $_ -replace "$z(?:$to|$z)?`$","$z"
      ## normalize quotes so that they're all double "" quotes
      $_ = $_ -replace "`"`"","`"" -replace "`"","`"`"" 
      ## remove the Þ wrappers if there are no quotes inside them
      $_ = $_ -replace "$z((?:[^$z`"](?!$to))+)$z($to|`$)","`$1`$2"
      ## (un)quote the empty fields:
      if(!$quoteEmpty) {
         $_ = $_ -replace "$to$z$z$to","$to$to"
      }
      ## replace the Þ with quotes, and explicitly emit the result
      $_ = $_ -replace "$z","`"" -replace "$z","`""
      
      $_
   }
}

################################################################################
## Import-Delimited - A replacement function for Import-Csv that can handle other 
## delimiters, and can import text (and collect it together) from the pipeline!!
## Dependends on the Convert-Delimiter function.
################################################################################
## NOTICE that this means you can use this to import multitple CSV files as one:
## Sample Use:
##        ls ..\*.txt | export-csv textfiles.csv
##        ls *.doc | export-csv docs.csv
##        ls C:\Windows\System32\*.hlp | export-csv helpfiles.csv
##
##       $files = ls *.csv | Import-Delimited
## OR
##     Import-Delimited " +" services1.txt 
## OR
##     gc *.txt | Import-Delimited "  +"
################################################################################
## Version History
## Version 1.0
##    First working version
## Version 2.0
##    Filter #TYPE lines
##    Remove dependency on Convert-Delimiter if the files are already CSV
##    Change to use my Template-Pipeline format (removing the nested Import-String function)
## Version 2.1
##    Fix a stupid bug ...
##    Add filtering for lines starting with "--", hopefully that's not a problem for other people...
##    Added Write-DEBUG output for filtered lines...

Function Import-Delimited([regex]$delimiter=",", [string]$PsPath="")
{
    BEGIN {
        if ($PsPath.Length -gt 0) { 
            write-output ($PsPath | &($MyInvocation.InvocationName) $delimiter); 
        } else {
            $script:tmp = [IO.Path]::GetTempFileName()
            write-debug "Using tempfile $($script:tmp)"
        }
    }
    PROCESS {
        if($_ -and $_.Length -gt 0 ) {
            if(Test-Path $_) {
                if($delimiter -eq ",") {
                    Get-Content $_ | Where-Object {if($_.StartsWith("#TYPE") -or $_.StartsWith("--")){ write-debug "SKIPPING: $_"; $false;} else { $true }} | Add-Content $script:tmp
                } else {
                    Get-Content $_ | Convert-Delimiter $delimiter "," | Where-Object { if( $_.StartsWith("--") ) { write-debug "SKIPPING: $_"; $false;} else { $true }} | Add-Content $script:tmp
                }
            } 
            else {
                if($delimiter -eq ",") {
                    $_ | Where-Object {-not $_.StartsWith("#TYPE")} | Add-Content $script:tmp
                } else {
                    $_ | Convert-Delimiter $delimiter "," | Add-Content $script:tmp
                }
            }
        }
    }
    END {
        # Need to guard against running this twice when you pass PsPath
        if ($PsPath.Length -eq 0) {
            Import-Csv $script:tmp
        }
    }
}