# encoding: ascii
# api: powershell
# title: HTML Parse Demo
# description: This is a very meaningless demo showing how to get and work with a html document in parsed form. The actual script gets a html-table from www.apk.se (listing cheap alcohol in sweden) and parses and converts that to a object and hands that of to the pipe.
# version: 0.1
# type: function
# author: Daniel Srlv
# license: CC0
# function: ConvertFrom-HTMLTable
# x-poshcode-id: 6005
# x-archived: 2016-05-17T11:09:13
# x-published: 2016-09-08T18:12:00
#
#
Function ConvertFrom-HTMLTable {
<#
.SYNOPSIS
Convert HTML tables to Powershell objects
.DESCRIPTION
Scrapes HTML from web site, and parses elements of table, converting each line into a PSCustomObject
.PARAMETER Name
Uri
.EXAMPLE
ConvertFrom-HTMLTable -Uri 'www.webpage.com'
.Notes
Based on code from 'Daniel Srlv'.
http://poshcode.org/3664
#>
[CmdletBinding()]
[OutputType('System.PSCustomObject')]
Param (
[Parameter(Mandatory=$true,
Position=0,
ValueFromPipeline=$true,
ValueFromPipelineByPropertyName=$true)]
[ValidateNotNullOrEmpty()]
[uri]$Uri
)
Process {
$WebResponse = Invoke-WebRequest -Uri $URI
$HTMl = $WebResponse.ParsedHtml
$Elements = $HTMl.body.getElementsByTagName('tr')
$Headers = @()
foreach ($Element in $Elements) {
$ColumnID = 0
$HeaderRow = $false
$Object = New-Object -TypeName PSCustomObject
foreach ($Child in $Element.children) {
if ($Child.tagName -eq "th") {
$Headers += @($Child.outerText)
$HeaderRow = $true
}
if ($Child.tagName -eq "td") {
$Object | Add-Member -MemberType NoteProperty -Name $Headers[$ColumnID] -Value $Child.outerText
}
$ColumnID++
}
if (-not $HeaderRow) {
Write-Output -InputObject $Object
}
}
}
}