PoshCode Archive  Artifact [1b5cce6442]

Artifact 1b5cce6442fbed540d210c153601e353c392e76c2f70bdd6f71c840835957ec8:

  • File HTML-Parse.ps1 — part of check-in [03dbed6faf] at 2018-06-10 14:13:26 on branch trunk — This is a very meaningless demo showing how to get and work with a html document in parsed form. The actual script gets a html-table from www.apk.se (listing cheap alcohol in sweden) and parses and converts that to a object and hands that of to the pipe. (user: Daniel Srlv size: 1046)

# encoding: ascii
# api: powershell
# title: HTML Parse
# description: This is a very meaningless demo showing how to get and work with a html document in parsed form. The actual script gets a html-table from www.apk.se (listing cheap alcohol in sweden) and parses and converts that to a object and hands that of to the pipe.
# version: 0.1
# author: Daniel Srlv
# license: CC0
# x-poshcode-id: 6337
# x-archived: 2016-05-19T19:59:19
# x-published: 2016-05-10T04:17:00
#
#
$page = Invoke-WebRequest "http://www.apk.se"
$html = $page.parsedHTML
$products = $html.body.getElementsByTagName("TR")
$headers = @()
foreach($product in $products)
{
	$colID = 0;
	$hRow = $false
	$returnObject = New-Object Object
	foreach($child in $product.children)
	{	
		if ($child.tagName -eq "TH")
		{
			$headers += @($child.outerText)
			$hRow = $true
		}

		if ($child.tagName -eq "TD")
		{
			$returnObject | Add-Member NoteProperty $headers[$colID] $child.outerText
		}
		$colID++

	}
	if (-not $hRow) { $returnObject }
}