PoshCode Archive  Artifact [49bde099f5]

Artifact 49bde099f585f861d85a6f283507d9ed91a1cf9ede70a535c374e8d5de6003fc:

  • File HTML-Parse-Demo.ps1 — part of check-in [c501d971bf] at 2018-06-10 14:05:56 on branch trunk — This is a very meaningless demo showing how to get and work with a html document in parsed form. The actual script gets a html-table from www.apk.se (listing cheap alcohol in sweden) and parses and converts that to a object and hands that of to the pipe. (user: Daniel Srlv size: 2264)

# encoding: ascii
# api: powershell
# title: HTML Parse Demo
# description: This is a very meaningless demo showing how to get and work with a html document in parsed form. The actual script gets a html-table from www.apk.se (listing cheap alcohol in sweden) and parses and converts that to a object and hands that of to the pipe.
# version: 0.1
# type: function
# author: Daniel Srlv
# license: CC0
# function: ConvertFrom-HTMLTable
# x-poshcode-id: 6005
# x-archived: 2016-05-17T11:09:13
# x-published: 2016-09-08T18:12:00
#
#
Function ConvertFrom-HTMLTable {
    <#
        .SYNOPSIS
            Convert HTML tables to Powershell objects

        .DESCRIPTION
            Scrapes HTML from web site, and parses elements of table, converting each line into a PSCustomObject

        .PARAMETER Name
            Uri
        
        .EXAMPLE
            ConvertFrom-HTMLTable -Uri 'www.webpage.com'

        .Notes
            Based on code from 'Daniel Srlv'.
            http://poshcode.org/3664
    #>

    [CmdletBinding()]
    [OutputType('System.PSCustomObject')]
        Param (
            [Parameter(Mandatory=$true,
            Position=0,
            ValueFromPipeline=$true,
            ValueFromPipelineByPropertyName=$true)]
			[ValidateNotNullOrEmpty()]
			[uri]$Uri
        )

    Process {
        $WebResponse = Invoke-WebRequest -Uri $URI
        $HTMl = $WebResponse.ParsedHtml
        $Elements = $HTMl.body.getElementsByTagName('tr')
        $Headers = @()

        foreach ($Element in $Elements) {
            $ColumnID = 0
            $HeaderRow = $false
            $Object = New-Object -TypeName PSCustomObject
        
            foreach ($Child in $Element.children) {
                if ($Child.tagName -eq "th") {
                    $Headers += @($Child.outerText)
                    $HeaderRow = $true
                }
                if ($Child.tagName -eq "td") {
                    $Object | Add-Member -MemberType NoteProperty -Name $Headers[$ColumnID] -Value $Child.outerText
                }
                $ColumnID++
            }
            if (-not $HeaderRow) {
                Write-Output -InputObject $Object
            }
        }
	}
}