• 쇼핑몰
  • 커뮤니티
  • 북마크

vb6.0/vba




[vb6.0/vba] [vb6.0/vba] 서울교차로(newspaper.seoulkcr.com)구인 크롤링

하나를하더라도최선을
2020.07.16 18:26 185 0

본문





서울교차로(newspaper.seoulkcr.com)구인 크롤링을 해보자

지식인 답변내용입니다.


Private Sub program1472_com()
    Dim elem As Object
    Dim oHTML As Object
    Set oHTML = CreateObject("htmlfile")
    Dim URL As String, Cookie As String, T As String, P As Integer
    Cells.Clear
    Do
        P = P + 1
        With CreateObject("WinHttp.WinHttpRequest.5.1")
            .Open "GET", URL
            .SetRequestHeader "Accept""text/html, application/xhtml+xml, */*"
            .SetRequestHeader "Accept-Language""ko-KR"
            .SetRequestHeader "User-Agent""Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
            .SetRequestHeader "Host""newspaper.seoulkcr.com"
            .SetRequestHeader "Connection""Keep-Alive"
            If Len(Cookie) Then .SetRequestHeader "Cookie", Cookie
            .Send
            .WaitForResponse: DoEvents
            T = .ResponseText
            'T = StrConv(.ResponseBody, vbUnicode)
        End With
        oHTML.body.innerHTML = T
        Dim O As Object, i As Integer
        Set O = oHTML.getElementById("container")
        Set elem = getXPathElement("/div/div/ul", O)
        Dim V(2As Variant
        For i = 1 To 20
            Set O = getXPathElement("/li[" & i & "]", elem)
            If O Is Nothing Then GoTo ExitSub
            V(0= (P - 1* 20 + i
            V(1= getXPathElement("/ul/li[1]", O).innerTEXT
            V(2= getXPathElement("/ul/li[2]", O).innerTEXT
            Cells(Rows.Count, "A").End(3)(2).Resize(, 3).Value = V
        Next
    Loop
ExitSub:
    MsgBox "완료!!"
End Sub
 
Public Function getXPathElement(sXPath As String, objElement As ObjectAs Object
    On Error GoTo ErrPass
    Dim sXPathArray() As String
    Dim sNodeName As String, sNodeNameIndex As String
    Dim sRestOfXPath As String
    Dim lNodeIndex As Long, lCount As Long
    sXPathArray = Split(sXPath, "/")
    sNodeNameIndex = sXPathArray(1)
    If Not InStr(sNodeNameIndex, "["> 0 Then
        sNodeName = sNodeNameIndex
        lNodeIndex = 1
    Else
        sXPathArray = Split(sNodeNameIndex, "[")
        sNodeName = sXPathArray(0)
        lNodeIndex = CLng(Left(sXPathArray(1), Len(sXPathArray(1)) - 1))
    End If
    sRestOfXPath = Right(sXPath, Len(sXPath) - (Len(sNodeNameIndex) + 1))
    Set getXPathElement = Nothing
    For lCount = 0 To objElement.ChildNodes().Length - 1
        If UCase(objElement.ChildNodes().Item(lCount).nodeName) = UCase(sNodeName) Then
            If lNodeIndex = 1 Then
                If sRestOfXPath = "" Then
                    Set getXPathElement = objElement.ChildNodes().Item(lCount)
                Else
                    Set getXPathElement = getXPathElement(sRestOfXPath, objElement.ChildNodes().Item(lCount))
                End If
            End If
            lNodeIndex = lNodeIndex - 1
        End If
    Next lCount
ErrPass:
End Function
 


아래와 같은 결과를 뽑아냈다.


image.png


 

무엇보다 크롤링에는 많은 경험과 노하우가 필요한 분야중 하나라고 생각한다.

이와 같은 작업에 앞서 많은 사람들은 간단히 작업이 되는 줄 안다.

아무튼 도움이 되었길 바라면서....

 



댓글목록 0

등록된 댓글이 없습니다.