Jump to content
UBot Underground

Having trouble threading (linkedin)


Recommended Posts

Please could somebody help with this, I am trying to scrape linkedin profile data into a table, the way I have threaded this is not right because, the data doesn't match the profile scraped on some rows (profile list attached):

 

ui drop down("Threads""5,10,20,30,40,50"#INPUT Threads)
ui stat monitor("Threads (Active/Total):""<b>{#THREADS Active}/{#THREADS Max}</b>")
ui stat monitor("COUNT:"#COUNT)
ui stat monitor("Records:"$table total rows(&li database))
plugin command("SocketCommands.dll""socket container") {
    set(#FOLDER Root$special folder("Application"), "Global")
    clear table(&li database)
    set table cell(&li database, 0, 0, "Company Name")
    set table cell(&li database, 0, 1, "First Name")
    set table cell(&li database, 0, 2, "Last Name")
    set table cell(&li database, 0, 3, "Current Role")
    set table cell(&li database, 0, 4, "Industry")
    set table cell(&li database, 0, 5, "Employees")
    set table cell(&li database, 0, 6, "Founded")
    set table cell(&li database, 0, 7, "Website")
    set table cell(&li database, 0, 8, "Company Profile")
    set table cell(&li database, 0, 9, "Personal Profile")
    set(#THREADS Max#INPUT Threads"Global")
    set(#RESULTS ROW, 1, "Global")
    clear list(%PROFILES)
    add list to list(%PROFILES$list from file("{#FOLDER Root}\\profiles.txt"), "Delete""Global")
    set(#PROFILES ROW, 0, "Global")
    set(#COUNT$list total(%PROFILES), "Global")
    if($comparison(#THREADS Max">"#COUNT)) {
        then {
            alert("You need to have more profiles than threads.")
            stop script
        }
        else {
        }
    }
    comment("Reset counter to 0.")
    set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""reset"), "Global")
    comment("Main Threading Loop")
    loop while($comparison(#COUNT">", 0)) {
        if($comparison($plugin function("Threads Counter.dll""threads counter""read"), "<"#THREADS Max)) {
            then {
                comment("Increment Number Of Threads And Store Value to UBot Variable.")
                set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""increment"), "Global")
                if($comparison(#PROFILES ROW">="$list total(%PROFILES))) {
                    then {
                        set(#PROFILES ROW, 0, "Global")
                    }
                    else {
                    }
                }
                THREAD START(#PROFILES ROW#RESULTS ROW)
                increment(#PROFILES ROW)
                increment(#RESULTS ROW)
                decrement(#COUNT)
            }
            else {
                wait(0.2)
            }
        }
    }
    comment("Thread Command")
define THREAD START(#PROFILES ROW#RESULTS ROW) {
        thread {
            SCRAPE LINKEDIN(#PROFILES ROW#RESULTS ROW#PROXY)
            set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""decrement"), "Global")
        }
    }
}
comment("Wait For Threads To Close")
set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""read"), "Global")
loop while($comparison(#THREADS Active">", 0)) {
    set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""read"), "Global")
    wait(2)
}
save to file("{#FOLDER Root}\\results.csv"&li database)
stop script
define SCRAPE LINKEDIN(#PROFILES ROW#RESULTS ROW#PROXY) {
    set(#li profile$plugin function("HTTP post.dll""$http get"$list item(%PROFILES#PROFILES ROW), "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0"""""""), "Global")
    set(#li profile$replace regular expression($text from list($list from text(#li profile$new line), " "), "\\s+"" "), "Global")
    set(#company_name$find regular expression(#li profile"(?<=<span class=\"org summary\">).*?(?=\\<\\/span>.*<dt id=\"overview-summary-past-title\")|(?<=<span class=\"at\">at <\\/span>).*?(?=<.*dt id=\"overview-summary-past-title\")"), "Global")
    set(#first_name$find regular expression(#li profile"(?<=class=\"given-name\">).*?(?=<)"), "Global")
    set(#last_name$find regular expression(#li profile"(?<=class=\"family-name\">).*?(?=<)"), "Global")
    set(#current_role$trim($replace regular expression($trim($find regular expression(#li profile"(?<=<ul class=\"current\">).*?(?=<span)")), "<li>"$nothing)), "Global")
    set(#check$find regular expression(#li profile"(?<=class=\"company-profile-public\" href=\").*?(?=\"><span class=\"org summary\">.*<dt id=\"overview-summary-past-title\")"), "Global")
    if($comparison(#check"="$nothing)) {
        then {
            set(#company_profile"-""Global")
            clear list(%website)
            add list to list(%website$list from text($find regular expression(#liC profile"(?<=\\/redir\\/redirect\\?url=)http%3A%2F%2F(www%2(E|F)|).*?(?=(&url|%2F|&))"), $new line), "Delete""Global")
            if($comparison($list total(%website), "=", 0)) {
                then {
                    set(#website"-""Global")
                }
                else {
                    set(#website$list item(%website, 0), "Global")
                }
            }
        }
        else {
            set(#company_profile"http://www.linkedin.com{$find regular expression(#li profile"(?<=class=\"company-profile-public\" href=\").*?(?=\"><span class=\"org summary\">.*<dt id=\"overview-summary-past-title\")")}""Global")
            plugin command("HTTP post.dll""http auto redirect""Yes")
            plugin command("HTTP post.dll""http max redirects", 2)
            plugin command("HTTP post.dll""http set headers""azccept""text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
            set(#company_profile$replace(#company_profile"amp;"$nothing), "Global")
            set(#liC profile$plugin function("HTTP post.dll""$http get"#company_profile"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0"$list item(%PROFILES#PROFILES ROW), """"), "Global")
            set(#liC profile$replace regular expression($text from list($list from text(#liC profile$new line), " "), "\\s+"" "), "Global")
            set(#website$find regular expression(#liC profile"(?<=nofollow\">).*?(?=</a>.*(Industry|Branche)<)"), "Global")
            set(#industry$trim($replace regular expression($trim($find regular expression(#liC profile"(?<=<dt>Industry</dt>).*?(?=<\\/dd>)|(?<=<dt>Branche</dt>).*?(?=<\\/dd>)")), "<dd>"$nothing)), "Global")
            set(#employees$trim($find regular expression(#liC profile"(?<=Company Size</dt> <dd>).*?(?=\\s*.employee)|(?<=Unternehmensgröße</dt> <dd>).*?(?=\\s*.Mitarbeiter)")), "Global")
            set(#founded$trim($find regular expression(#liC profile"(?<=Founded</dt> <dd>).*?(?=<\\/dd>)|(?<=Gründungsdatum</dt> <dd>).*?(?=<\\/dd>)")), "Global")
        }
    }
set(#company_name$replace(#company_name"amp;"$nothing), "Global")
set(#first_name$replace(#first_name"amp;"$nothing), "Global")
set(#last_name$replace(#last_name"amp;"$nothing), "Global")
set(#current_role$replace(#current_role"amp;"$nothing), "Global")
set(#industry$replace(#industry"amp;"$nothing), "Global")
set(#employees$replace(#employees"amp;"$nothing), "Global")
set(#founded$replace(#founded"amp;"$nothing), "Global")
set table cell(&li database#RESULTS ROW, 0, #company_name)
set table cell(&li database#RESULTS ROW, 1, #first_name)
set table cell(&li database#RESULTS ROW, 2, #last_name)
set table cell(&li database#RESULTS ROW, 3, #current_role)
set table cell(&li database#RESULTS ROW, 4, #industry)
set table cell(&li database#RESULTS ROW, 5, #employees)
set table cell(&li database#RESULTS ROW, 6, #founded)
set table cell(&li database#RESULTS ROW, 7, #website)
set table cell(&li database#RESULTS ROW, 8, #company_profile)
set table cell(&li database#RESULTS ROW, 9, $list item(%PROFILES#PROFILES ROW))
}

Link to post
Share on other sites

Please could somebody help with this, I am trying to scrape linkedin profile data into a table, the way I have threaded this is not right because, the data doesn't match the profile scraped on some rows (profile list attached):

 

ui drop down("Threads""5,10,20,30,40,50"#INPUT Threads)

ui stat monitor("Threads (Active/Total):""<b>{#THREADS Active}/{#THREADS Max}</b>")

ui stat monitor("COUNT:"#COUNT)

ui stat monitor("Records:"$table total rows(&li database))

plugin command("SocketCommands.dll""socket container") {

    set(#FOLDER Root$special folder("Application"), "Global")

    clear table(&li database)

    set table cell(&li database, 0, 0, "Company Name")

    set table cell(&li database, 0, 1, "First Name")

    set table cell(&li database, 0, 2, "Last Name")

    set table cell(&li database, 0, 3, "Current Role")

    set table cell(&li database, 0, 4, "Industry")

    set table cell(&li database, 0, 5, "Employees")

    set table cell(&li database, 0, 6, "Founded")

    set table cell(&li database, 0, 7, "Website")

    set table cell(&li database, 0, 8, "Company Profile")

    set table cell(&li database, 0, 9, "Personal Profile")

    set(#THREADS Max#INPUT Threads"Global")

    set(#RESULTS ROW, 1, "Global")

    clear list(%PROFILES)

    add list to list(%PROFILES$list from file("{#FOLDER Root}\\profiles.txt"), "Delete""Global")

    set(#PROFILES ROW, 0, "Global")

    set(#COUNT$list total(%PROFILES), "Global")

    if($comparison(#THREADS Max">"#COUNT)) {

        then {

            alert("You need to have more profiles than threads.")

            stop script

        }

        else {

        }

    }

    comment("Reset counter to 0.")

    set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""reset"), "Global")

    comment("Main Threading Loop")

    loop while($comparison(#COUNT">", 0)) {

        if($comparison($plugin function("Threads Counter.dll""threads counter""read"), "<"#THREADS Max)) {

            then {

                comment("Increment Number Of Threads And Store Value to UBot Variable.")

                set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""increment"), "Global")

                if($comparison(#PROFILES ROW">="$list total(%PROFILES))) {

                    then {

                        set(#PROFILES ROW, 0, "Global")

                    }

                    else {

                    }

                }

                THREAD START(#PROFILES ROW#RESULTS ROW)

                increment(#PROFILES ROW)

                increment(#RESULTS ROW)

                decrement(#COUNT)

            }

            else {

                wait(0.2)

            }

        }

    }

    comment("Thread Command")

define THREAD START(#PROFILES ROW#RESULTS ROW) {

        thread {

            SCRAPE LINKEDIN(#PROFILES ROW#RESULTS ROW#PROXY)

            set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""decrement"), "Global")

        }

    }

}

comment("Wait For Threads To Close")

set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""read"), "Global")

loop while($comparison(#THREADS Active">", 0)) {

    set(#THREADS Active$plugin function("Threads Counter.dll""threads counter""read"), "Global")

    wait(2)

}

save to file("{#FOLDER Root}\\results.csv"&li database)

stop script

define SCRAPE LINKEDIN(#PROFILES ROW#RESULTS ROW#PROXY) {

    set(#li profile$plugin function("HTTP post.dll""$http get"$list item(%PROFILES#PROFILES ROW), "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0"""""""), "Global")

    set(#li profile$replace regular expression($text from list($list from text(#li profile$new line), " "), "\\s+"" "), "Global")

    set(#company_name$find regular expression(#li profile"(?<=<span class=\"org summary\">).*?(?=\\<\\/span>.*<dt id=\"overview-summary-past-title\")|(?<=<span class=\"at\">at <\\/span>).*?(?=<.*dt id=\"overview-summary-past-title\")"), "Global")

    set(#first_name$find regular expression(#li profile"(?<=class=\"given-name\">).*?(?=<)"), "Global")

    set(#last_name$find regular expression(#li profile"(?<=class=\"family-name\">).*?(?=<)"), "Global")

    set(#current_role$trim($replace regular expression($trim($find regular expression(#li profile"(?<=<ul class=\"current\">).*?(?=<span)")), "<li>"$nothing)), "Global")

    set(#check$find regular expression(#li profile"(?<=class=\"company-profile-public\" href=\").*?(?=\"><span class=\"org summary\">.*<dt id=\"overview-summary-past-title\")"), "Global")

    if($comparison(#check"="$nothing)) {

        then {

            set(#company_profile"-""Global")

            clear list(%website)

            add list to list(%website$list from text($find regular expression(#liC profile"(?<=\\/redir\\/redirect\\?url=)http%3A%2F%2F(www%2(E|F)|).*?(?=(&url|%2F|&))"), $new line), "Delete""Global")

            if($comparison($list total(%website), "=", 0)) {

                then {

                    set(#website"-""Global")

                }

                else {

                    set(#website$list item(%website, 0), "Global")

                }

            }

        }

        else {

            set(#company_profile"http://www.linkedin.com{$find regular expression(#li profile"(?<=class=\"company-profile-public\" href=\").*?(?=\"><span class=\"org summary\">.*<dt id=\"overview-summary-past-title\")")}""Global")

            plugin command("HTTP post.dll""http auto redirect""Yes")

            plugin command("HTTP post.dll""http max redirects", 2)

            plugin command("HTTP post.dll""http set headers""azccept""text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")

            set(#company_profile$replace(#company_profile"amp;"$nothing), "Global")

            set(#liC profile$plugin function("HTTP post.dll""$http get"#company_profile"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0"$list item(%PROFILES#PROFILES ROW), """"), "Global")

            set(#liC profile$replace regular expression($text from list($list from text(#liC profile$new line), " "), "\\s+"" "), "Global")

            set(#website$find regular expression(#liC profile"(?<=nofollow\">).*?(?=</a>.*(Industry|Branche)<)"), "Global")

            set(#industry$trim($replace regular expression($trim($find regular expression(#liC profile"(?<=<dt>Industry</dt>).*?(?=<\\/dd>)|(?<=<dt>Branche</dt>).*?(?=<\\/dd>)")), "<dd>"$nothing)), "Global")

            set(#employees$trim($find regular expression(#liC profile"(?<=Company Size</dt> <dd>).*?(?=\\s*.employee)|(?<=Unternehmensgröße</dt> <dd>).*?(?=\\s*.Mitarbeiter)")), "Global")

            set(#founded$trim($find regular expression(#liC profile"(?<=Founded</dt> <dd>).*?(?=<\\/dd>)|(?<=Gründungsdatum</dt> <dd>).*?(?=<\\/dd>)")), "Global")

        }

    }

set(#company_name$replace(#company_name"amp;"$nothing), "Global")

set(#first_name$replace(#first_name"amp;"$nothing), "Global")

set(#last_name$replace(#last_name"amp;"$nothing), "Global")

set(#current_role$replace(#current_role"amp;"$nothing), "Global")

set(#industry$replace(#industry"amp;"$nothing), "Global")

set(#employees$replace(#employees"amp;"$nothing), "Global")

set(#founded$replace(#founded"amp;"$nothing), "Global")

set table cell(&li database#RESULTS ROW, 0, #company_name)

set table cell(&li database#RESULTS ROW, 1, #first_name)

set table cell(&li database#RESULTS ROW, 2, #last_name)

set table cell(&li database#RESULTS ROW, 3, #current_role)

set table cell(&li database#RESULTS ROW, 4, #industry)

set table cell(&li database#RESULTS ROW, 5, #employees)

set table cell(&li database#RESULTS ROW, 6, #founded)

set table cell(&li database#RESULTS ROW, 7, #website)

set table cell(&li database#RESULTS ROW, 8, #company_profile)

set table cell(&li database#RESULTS ROW, 9, $list item(%PROFILES#PROFILES ROW))

}

 

Hi Troubleshooting the whole thing is probably a little bit time consuming. Could you please break down your question a little bit?

What exactly is not working as expected?

 

Do you get the wrong results from a regex command? Or which part is failing?

 

Dan

Link to post
Share on other sites

I have just had a 10 second scan over your code, at first glance

1.your using the socket container and the http post together these are separate plugins one the internal and the other aymens http plugin. You need to use http container instead of socket.

2.you do not need to put everything in the http container just the http commands. 

3.I see your not storing any cookies this could be causing issues but from the looks of your code your only scraping so not sure.

4.use the local dictionary plugin and follow example for http and local variables sometimes ubot shares data between variables its a issue.

 

if your still having issues post code back broken down.

 

many thanks

kev123

Link to post
Share on other sites
  • 1 month later...

Thanks you both, the dictionary plugin really helped and it is scraping great now. The only thing I can't get to work now is reading/writing with a local list, it never seems to work no matter what I do. Anyone have an example using the dictionary plugin?

Link to post
Share on other sites

note linkedin may end up showing a captcha after simultaneously viewing numerous pages together from the same ip address/cookies/cache.  It has been this way for 6+ months within there site when hitting pages to fast, and simultaneously I can only assume will cause it to trigger faster.

Link to post
Share on other sites
  • 2 months later...

Thank you LoWrIdErTJ - BotGuru, I have been running multiple instances my compiled bot because I can't get local lists to work. I have been running the bots 24/7 with no captchas or proxies :) The bot creates 5 possible email addresses from the info found then runs them through a verifier.

 

Does anyone have a local list threading example because I am still stuck on this.

Link to post
Share on other sites

can you post your code/define only not the multithread part and what goes wrong it will be easier for us all to check.

 

many thanks

kev

Link to post
Share on other sites

Thank you LoWrIdErTJ - BotGuru, I have been running multiple instances my compiled bot because I can't get local lists to work. I have been running the bots 24/7 with no captchas or proxies :) The bot creates 5 possible email addresses from the info found then runs them through a verifier.

 

Does anyone have a local list threading example because I am still stuck on this.

 

Not sure exactly sure what you are looking for, but here is a very quick and simple example:

 

plugin command("LocalDictionary.dll""clear global dictionary")

loop(5) {

    thread {

        plugin command("LocalDictionary.dll""init local list")

        plugin command("LocalDictionary.dll""local list from text""xx"$random text(5), "")

        alert($plugin function("LocalDictionary.dll""$local list item""xx", 0))

    }

}

 

Does that help?

 

One Alternative would be the Large Table Plugin from Kev:

loop(5) {

    thread {

        plugin command("Bigtable.dll""Clear large list"$plugin function("Bigtable.dll""Thread Id"))

        plugin command("Bigtable.dll""Add item to large list"$plugin function("Bigtable.dll""Thread Id"), $random text(5))

        alert($plugin function("Bigtable.dll""Large list return"$plugin function("Bigtable.dll""Thread Id")))

    }

}

 

 

Dan

  • Like 1
Link to post
Share on other sites

Cheers Dan, I will test that out. Just trying to get my head round threading with local lists.

Link to post
Share on other sites
Guest
This topic is now closed to further replies.
×
×
  • Create New...