PoshCode Archive  Artifact [4ead229444]

Artifact 4ead229444a5377dcf07617741bb997f8b754a675b5f5bed4d78e303cfac3a08:

  • File Get-FileEncoding.ps1 — part of check-in [ffb9b4efc9] at 2018-06-10 13:04:44 on branch trunk — Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM). (user: Chad Miller size: 2523)

# encoding: ascii
# api: powershell
# title: Get-FileEncoding
# description: Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
# version: 1.0
# type: function
# author: Chad Miller
# license: CC0
# function: Get-FileEncoding
# x-poshcode-id: 2075
# x-derived-from-id: 5724
# x-archived: 2017-05-17T23:55:09
# x-published: 2011-08-16T13:23:00
#
#
function Get-FileEncoding {
<#
.SYNOPSIS
Gets file encoding.
.DESCRIPTION
The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx
.EXAMPLE
Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'}
This command gets ps1 files in current directory where encoding is not ASCII
.EXAMPLE
Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII}
Same as previous example but fixes encoding using set-content
.NOTES
Version History
v1.0   - 2010/08/10, Chad Miller - Initial release
v1.1   - 2010/08/16, Jason Archer - Improved pipeline support and added detection of little endian BOMs.
#>
    [CmdletBinding()]
    param (
        [Alias("PSPath")]
        [Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
        [String]$Path
    )

    process {
        $Encoding = "ASCII"
        [Byte[]]$byte = Get-Content -Encoding Byte -ReadCount 4 -TotalCount 4 -Path $Path

        if ($byte[0] -eq 0xEF -and $byte[1] -eq 0xBB -and $byte[2] -eq 0xBF) {
            $Encoding = "UTF8"
        } elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xFE -and $byte[3] -eq 0xFF) {
            ## UTF-32 Big-Endian
            $Encoding = "UTF32"
        } elseif ($byte[0] -eq 0xFF -and $byte[1] -eq 0xFE -and $byte[2] -eq 0 -and $byte[3] -eq 0) {
            ## UTF-32 Little-Endian
            $Encoding = "UTF32"
        } elseif ($byte[0] -eq 0xFE -and $byte[1] -eq 0xFF) {
            ## 1201 UTF-16 Big-Endian
            $Encoding = "Unicode"
        } elseif ($byte[0] -eq 0xFF -and $byte[1] -eq 0xFE) {
            ## 1200 UTF-16 Little-Endian
            $Encoding = "Unicode"
        } elseif ($byte[0] -eq 0x2B -and $byte[1] -eq 0x2F -and $byte[2] -eq 0x76) {
            $Encoding = "UTF7"
        }

        $Encoding
    }
}