PoshCode Archive  Artifact [248e0a4457]

Artifact 248e0a4457ddeab57d1ad0fedf335c937312c2ed8cc9f1ce5fe0aedc69985067:

  • File ConvertFrom-Html.ps1 — part of check-in [1a8d70a38a] at 2018-06-10 13:46:45 on branch trunk — A simplistic way to parse an HTML table into objects (user: Joel Bennett size: 1735)

# encoding: ascii
# api: powershell
# title: ConvertFrom-Html
# description: A simplistic way to parse an HTML table into objects
# version: 0.1
# type: function
# author: Joel Bennett
# license: CC0
# function: ConvertFrom-Html
# x-poshcode-id: 4850
# x-archived: 2017-05-22T02:32:22
# x-published: 2014-01-29T16:19:00
#
#
function ConvertFrom-Html {
   #.Synopsis
   #   Convert a table from an HTML document to a PSObject
   #.Example
   #   Get-ChildItem | Where { !$_.PSIsContainer } | ConvertTo-Html | ConvertFrom-Html -TypeName Deserialized.System.IO.FileInfo
   #   Demonstrates round-triping files through HTML
   param(
      # The HTML content
      [Parameter(ValueFromPipeline=$true)]
      [string]$html,

      # A TypeName to inject to PSTypeNames 
      [string]$TypeName
   )
   begin { $content = "$html" }
   process { $content += "$html" }
   end {
      [xml]$table = $content -replace '(?s).*<table[^>]*>(.*)</table>.*','<table>$1</table>'

      $header = $table.table.tr[0]  
      $data = $table.table.tr[1..1e3]

      foreach($row in $data){ 
         $item = @{}

         $h = "th"
         if(!$header.th) {
            $h = "td"
         }
         for($i=0; $i -lt $header.($h).Count; $i++){
            if($header.($h)[$i] -is [string]) {
               $item.($header.($h)[$i]) = $row.td[$i]
            } else {
               $item.($header.($h)[$i].InnerText) = $row.td[$i]
            }
         }
         Write-Verbose ($item | Out-String)
         $object = New-Object PSCustomObject -Property $item 
         if($TypeName) {
            $Object.PSTypeNames.Insert(0,$TypeName)
         }
         Write-Output $Object
      }
   }
}