■No35535に返信(Wanさんの記事)
> スクレ―ピング
「スクレーピング」が
「スクレ―ピング」になっていて
似非日本語感を微妙に覚えるなど(
> WebView2を使って、スクレ―ピングを考えています。
RSS をクロールするだけでで要件を満たせるなら、
XDocument だけで簡単に済むのですけれどね…。
ひとまず、ここの掲示板の RSS を拾ってみた例。
Imports System.Xml.Linq
Public Class Form1
Private WithEvents dgv As DataGridView
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
dgv = New DataGridView() With {.Dock = DockStyle.Fill, .ReadOnly = True, .AllowUserToAddRows = False}
Controls.Add(dgv)
Dim doc = XDocument.Load("https://dobon.net/cgi-bin/vbbbs/rss.cgi?ver=2.0")
Dim items = From item In doc...<item>
Select item.<title>.Value,
item.<link>.Value,
pubDate = Date.Parse(item.<pubDate>.Value),
item.<description>.Value
dgv.DataSource = items.ToArray()
End Sub
End Class
こちらは、Web ページからスクレイピングする場合。
Imports Microsoft.Web.WebView2.Core
Imports Microsoft.Web.WebView2.WinForms
Public Class Form1
Private WithEvents wv As WebView2
Private WithEvents ds As DataSet
Private WithEvents tbl As DataTable
Private WithEvents dgv As DataGridView
Private Async Sub Form1_Load(sender As Object, e As EventArgs) Handles Me.Load
ds = New DataSet()
tbl = ds.Tables.Add("dobon")
wv = New WebView2 With {.Visible = False}
dgv = New DataGridView() With {.Dock = DockStyle.Fill, .ReadOnly = True, .AllowUserToAddRows = False}
dgv.DataSource = tbl
tbl.PrimaryKey = New DataColumn() {tbl.Columns.Add("Id", GetType(Integer))}
tbl.Columns.Add("Solved", GetType(Boolean)).DefaultValue = False
tbl.Columns.Add("Title")
tbl.Columns.Add("Category")
tbl.Columns.Add("FirstAuthor")
tbl.Columns.Add("FirstPostAt")
tbl.Columns.Add("LastAuthor")
tbl.Columns.Add("LastPostAt")
Controls.AddRange(New Control() {dgv, wv})
Await wv.EnsureCoreWebView2Async()
'wv.CoreWebView2.Navigate("https://dobon.net/cgi-bin/vbbbs/rss.cgi?ver=2.0")
wv.CoreWebView2.Navigate("https://dobon.net/cgi-bin/vbbbs/cbbs.cgi?H=F&no=0")
End Sub
Private Async Sub wv_NavigationCompleted(sender As Object, e As CoreWebView2NavigationCompletedEventArgs) Handles wv.NavigationCompleted
Dim js = "(()=>{
const table=[];
document.querySelectorAll('TABLE.topiclist').forEach(t=>{
[...t.rows].slice(1).forEach(tr=>{
const cols=[...tr.cells];
const d=[];
d[0]=cols[1].querySelector('small>font').innerText.substr(1)*1;
d[1]=cols[6].innerText.includes('済');
d[2]=cols[1].firstChild.innerText;
d[3]=cols[0].innerText;
d[4]=cols[3].innerText;
d[5]=cols[1].querySelector('small').lastChild.substringData(5,16);
d[6]=cols[4].innerText
d[7]=cols[5].innerText;
table.push(d);
});
});
return table;
})();"
Dim result = Await wv.CoreWebView2.ExecuteScriptAsync(js)
Dim ary = Newtonsoft.Json.JsonConvert.DeserializeObject(Of Object()())(result)
ds.EnforceConstraints = False
Array.ForEach(ary, AddressOf tbl.Rows.Add)
ds.EnforceConstraints = True
End Sub
End Class