Dim DataTable_Scraping As New DataTable DataGridView1.DataSource = DataTable_Scraping
Private Async Sub WebView2_NavigationCompleted(sender As Object, e As CoreWebView2NavigationCompletedEventArgs) Handles WebView21.NavigationCompleted Dim js As New System.Text.StringBuilder js.AppendLine("const hyou=[];let i=0;") js.AppendLine("var TrElems = document.getElementsByClassName('Tableが含まれるクラス名')[0].getElementsByTagName('tr');") js.AppendLine("Array.prototype.forEach.call(TrElems, function(TrElem) {") js.AppendLine(" hyou.push([]);") js.AppendLine(" var TdElems = TrElem.getElementsByTagName('td');") js.AppendLine(" Array.prototype.forEach.call(TdElems, function(TdElem) {") js.AppendLine(" hyou[i].push(TdElem.textContent);") js.AppendLine(" });") js.AppendLine(" i=i+1;") js.AppendLine("});") js.AppendLine("return hyou;")
DataTable_Scraping = Await WebView21.ExecuteScriptAsync(js.ToString()) '読み込み結果を判定 If e.IsSuccess Then ‘ここで次のページへの移動用の要素をクリックしている。動作確認済み ‘getElementsByClassName('****')の要素数が1の場合は、終わり Await WebView21.ExecuteScriptAsync( "document.getElementsByClassName('****')[1].getElementsByTagName('a')[0].click();") Else Console.WriteLine(e.WebErrorStatus) End If End Sub
■No35535に返信(Wanさんの記事)
> スクレ―ピング
「スクレーピング」が
「スクレ―ピング」になっていて
似非日本語感を微妙に覚えるなど(
> WebView2を使って、スクレ―ピングを考えています。
RSS をクロールするだけでで要件を満たせるなら、
XDocument だけで簡単に済むのですけれどね…。
ひとまず、ここの掲示板の RSS を拾ってみた例。
Imports System.Xml.Linq
Public Class Form1
Private WithEvents dgv As DataGridView
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
dgv = New DataGridView() With {.Dock = DockStyle.Fill, .ReadOnly = True, .AllowUserToAddRows = False}
Controls.Add(dgv)
Dim doc = XDocument.Load("https://dobon.net/cgi-bin/vbbbs/rss.cgi?ver=2.0")
Dim items = From item In doc...<item>
Select item.<title>.Value,
item.<link>.Value,
pubDate = Date.Parse(item.<pubDate>.Value),
item.<description>.Value
dgv.DataSource = items.ToArray()
End Sub
End Class
こちらは、Web ページからスクレイピングする場合。
Imports Microsoft.Web.WebView2.Core
Imports Microsoft.Web.WebView2.WinForms
Public Class Form1
Private WithEvents wv As WebView2
Private WithEvents ds As DataSet
Private WithEvents tbl As DataTable
Private WithEvents dgv As DataGridView
Private Async Sub Form1_Load(sender As Object, e As EventArgs) Handles Me.Load
ds = New DataSet()
tbl = ds.Tables.Add("dobon")
wv = New WebView2 With {.Visible = False}
dgv = New DataGridView() With {.Dock = DockStyle.Fill, .ReadOnly = True, .AllowUserToAddRows = False}
dgv.DataSource = tbl
tbl.PrimaryKey = New DataColumn() {tbl.Columns.Add("Id", GetType(Integer))}
tbl.Columns.Add("Solved", GetType(Boolean)).DefaultValue = False
tbl.Columns.Add("Title")
tbl.Columns.Add("Category")
tbl.Columns.Add("FirstAuthor")
tbl.Columns.Add("FirstPostAt")
tbl.Columns.Add("LastAuthor")
tbl.Columns.Add("LastPostAt")
Controls.AddRange(New Control() {dgv, wv})
Await wv.EnsureCoreWebView2Async()
'wv.CoreWebView2.Navigate("https://dobon.net/cgi-bin/vbbbs/rss.cgi?ver=2.0")
wv.CoreWebView2.Navigate("https://dobon.net/cgi-bin/vbbbs/cbbs.cgi?H=F&no=0")
End Sub
Private Async Sub wv_NavigationCompleted(sender As Object, e As CoreWebView2NavigationCompletedEventArgs) Handles wv.NavigationCompleted
Dim js = "(()=>{
const table=[];
document.querySelectorAll('TABLE.topiclist').forEach(t=>{
[...t.rows].slice(1).forEach(tr=>{
const cols=[...tr.cells];
const d=[];
d[0]=cols[1].querySelector('small>font').innerText.substr(1)*1;
d[1]=cols[6].innerText.includes('済');
d[2]=cols[1].firstChild.innerText;
d[3]=cols[0].innerText;
d[4]=cols[3].innerText;
d[5]=cols[1].querySelector('small').lastChild.substringData(5,16);
d[6]=cols[4].innerText
d[7]=cols[5].innerText;
table.push(d);
});
});
return table;
})();"
Dim result = Await wv.CoreWebView2.ExecuteScriptAsync(js)
Dim ary = Newtonsoft.Json.JsonConvert.DeserializeObject(Of Object()())(result)
ds.EnforceConstraints = False
Array.ForEach(ary, AddressOf tbl.Rows.Add)
ds.EnforceConstraints = True
End Sub
End Class