Jump to content
UBot Underground

Scraping issue (If,else...tables and stuff) Please help!


Recommended Posts

So this is the relevant code,i am trying to scrape something from Yelp but issues begin when Ubot starts mixing up tables and entiers

 

This is what i need:

we have 7 variables

 

title

company

address1

address2

address3

address4

Phone

 

Now sometimes these appear on the site and SOMETIMES NOT (VERY IMPORTANT)

What i need to do is this:

If Variable managed to find some text to scrape

Then Save it inside the variable

Or Else Just put in $nothing  (I tried to use normal text like "not available")

So when script tries to place item to list it will be either Real scraped Info-----or "not available" or $nothing whichever you prefer

And then when we add list to list again variable will be "not available" if nothing was scraped

Ultimately in the end when its time to add these lists to Table it will show something like this

 

TITLE1, COPMANY1, ADDRESS1, ADDRESS2, ADDRESS3, ADDRESS4, PHONE

 

now sometimes scraped info will be there,sometimes not what i need is for the script to add a "not available" text if variable is empty i.e when nothing was scraped.

 

before this my information's were jumbled  all the wrong things were in columns because Ubot populated empty spaces with information.

 

i understand there is an issue with my logic somewhere i just cant see it???

 

Please help!

 

 

 

 

ui text box("Keyword",#keyword)
ui drop down("How Many Pages to Scrape?","2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40",#dropdown)
set user agent("Internet Explorer 6")
navigate("http://www.yelp.com/","Wait")
wait(4)
type text(<name="find_desc">,#keyword,"Standard")
click(<id="header-search-submit">,"Left Click","No")
wait(5)
set(#row,0,"Global")
loop(#dropdown) {
    set(#searchresults,$scrape attribute(<outerhtml=w"<a class=\"biz-name\" href=\"/biz/*\">*</a>">,"href"),"Global")
    add item to list(%SEARCHRESULTS,#searchresults,"Delete","Global")
    add list to list(%SEARCHRESULTS1,$list from text(#searchresults,$new line),"Delete","Global")
    if($exists(<class="page-option prev-next">)) {
        then {
            click(<class="page-option prev-next">,"Left Click","No")
        }
        else {
            stop script
        }
    }
    wait(5)
}
loop($list total(%SEARCHRESULTS1)) {
    navigate("http://www.yelp.com/{$next list item(%SEARCHRESULTS1)}","Wait")
    wait(5)
    if($exists(<class="biz-phone">)) {
        then {
            set(#Phone,$scrape attribute(<class="biz-phone">,"innertext"),"Global")
        }
        else {
            set(#Phone,$nothing,"Global")
        }
    }
    if($exists(<itemprop="name">)) {
        then {
            set(#title,$scrape attribute(<itemprop="name">,"innertext"),"Global")
        }
        else {
            set(#title,$nothing,"Global")
        }
    }
    if($exists(<itemprop="postalCode">)) {
        then {
            set(#address4,$scrape attribute(<itemprop="postalCode">,"innertext"),"Global")
        }
        else {
            set(#address4,$nothing,"Global")
        }
    }
    if($exists(<itemprop="addressRegion">)) {
        then {
            set(#address3,$scrape attribute(<itemprop="addressRegion">,"innertext"),"Global")
        }
        else {
            set(#address3,$nothing,"Global")
        }
    }
    if($exists(<class="category-str-list">)) {
        then {
            set(#category,$scrape attribute(<class="category-str-list">,"innertext"),"Global")
        }
        else {
            set(#category,$nothing,"Global")
        }
    }
    if($exists(<itemprop="addressLocality">)) {
        then {
            set(#address2,$scrape attribute(<itemprop="addressLocality">,"innertext"),"Global")
        }
        else {
            set(#address2,$nothing,"Global")
        }
    }
    if($exists(<itemprop="streetAddress">)) {
        then {
            set(#address1,$scrape attribute(<itemprop="streetAddress">,"innertext"),"Global")
        }
        else {
            set(#address1,$nothing,"Global")
        }
    }
    add item to list(%TITLE,#title,"Delete","Global")
    add item to list(%CATEGORY,#category,"Delete","Global")
    add item to list(%ADDRESS1,#address1,"Delete","Global")
    add item to list(%ADDRESS2,#address2,"Delete","Global")
    add item to list(%ADDRESS3,#address3,"Delete","Global")
    add item to list(%ADDRESS4,#address4,"Delete","Global")
    add item to list(%PHONE,#Phone,"Delete","Global")
    if($comparison(#title,"=",$nothing)) {
        then {
            add list to list(%TITLE1,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#category,"=",$nothing)) {
        then {
            add list to list(%CATEGORY1,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#address1,"=",$nothing)) {
        then {
            add list to list(%ADDRESS11,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#address2,"=",$nothing)) {
        then {
            add list to list(%ADDRESS22,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#address3,"=",$nothing)) {
        then {
            add list to list(%ADDRESS33,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#address4,"=",$nothing)) {
        then {
            add list to list(%ADDRESS44,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    if($comparison(#Phone,"=",$nothing)) {
        then {
            add list to list(%PHONE,$list from text("not available",$new line),"Delete","Global")
        }
        else {
        }
    }
    add list to table as column(&YELPINFO,#row,0,%TITLE1)
    add list to table as column(&YELPINFO,#row,1,%CATEGORY1)
    add list to table as column(&YELPINFO,#row,2,%ADDRESS11)
    add list to table as column(&YELPINFO,#row,3,%ADDRESS22)
    add list to table as column(&YELPINFO,#row,4,%ADDRESS33)
    add list to table as column(&YELPINFO,#row,5,%ADDRESS44)
    add list to table as column(&YELPINFO,#row,6,%PHONE1)
}

 

 

Link to post
Share on other sites

Here you go mate:

ui text box("Keyword", #keyword)
ui drop down("How Many Pages to Scrape?", "2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40", #dropdown)
set user agent("Internet Explorer 6")
clear table(&Results)
navigate("http://www.yelp.com/", "Wait")
wait for element(<name="find_desc">, "", "Appear")
type text(<name="find_desc">, #keyword, "Standard")
click(<id="header-search-submit">, "Left Click", "No")
wait for element(<class="pagination-results-window">, "", "Appear")
clear list(%SEARCHRESULTS1)
loop(#dropdown) {
    add list to list(%SEARCHRESULTS1, $scrape attribute(<outerhtml=w"<a class=\"biz-name\" href=\"/biz/*\">*</a>">, "href"), "Delete", "Global")
    if($exists(<class="page-option prev-next">)) {
        then {
            click(<class="page-option prev-next">, "Left Click", "No")
        }
        else {
            stop script
        }
    }
    wait(5)
}
divider
set(#row, 1, "Global")
set table cell(&Results, 0, 0, "Title")
set table cell(&Results, 0, 1, "Company")
set table cell(&Results, 0, 2, "Address 1")
set table cell(&Results, 0, 3, "Address 2")
set table cell(&Results, 0, 4, "Address 3")
set table cell(&Results, 0, 5, "Address 4")
set table cell(&Results, 0, 6, "Phone")
set table cell(&Results, 0, 7, "Category")
divider
set list position(%SEARCHRESULTS1, 0)
loop($list total(%SEARCHRESULTS1)) {
    navigate("http://www.yelp.com/{$next list item(%SEARCHRESULTS1)}", "Wait")
    wait for element(<class="mapbox">, "", "Appear")
    set table cell(&Results, #row, 0, $scrape attribute(<itemprop="name">, "innertext"))
    set table cell(&Results, #row, 1, $scrape attribute(<itemprop="name">, "innertext"))
    set table cell(&Results, #row, 2, $scrape attribute(<itemprop="streetAddress">, "innertext"))
    set table cell(&Results, #row, 3, $scrape attribute(<itemprop="addressLocality">, "innertext"))
    set table cell(&Results, #row, 4, $scrape attribute(<itemprop="addressRegion">, "innertext"))
    set table cell(&Results, #row, 5, $scrape attribute(<itemprop="postalCode">, "innertext"))
    set table cell(&Results, #row, 6, $scrape attribute(<class="biz-phone">, "innertext"))
    set table cell(&Results, #row, 7, $scrape attribute(<class="category-str-list">, "innertext"))
    increment(#row)
}
comment("Clean Up Table")
set(#Row, 0, "Global")
loop($table total rows(&Results)) {
    set(#Col, 0, "Global")
    loop($table total columns(&Results)) {
        if($comparison($table cell(&Results, #Row, #Col), "=", "")) {
            then {
                set table cell(&Results, #Row, #Col, "not available")
            }
            else {
            }
        }
        increment(#Col)
    }
    increment(#Row)
}

Try not to mix &tables and %lists use one or the other for whatever you are trying to build. Also the problem that you were probably having is that the lists have "Delete Duplicates" set to "Delete" so if the data scraped is not there it will not add an empty line to the lists which will mess your table up at the end. 

 

Look through the code and learn from it! Also use your debugger, to find problems. 

 

Carl  ;)

Link to post
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
×
×
  • Create New...