Portfolio for Aaron Dale

Projects - Feedback - Code

Web Parser Example

An example of a web parser

Once the data is parsed from the webpage, a CSV file is generated and uploaded to the user.
//iterate through all the listings for($i = $listing_start; $i < $listing_stop; $i++){ $listing = file_get_contents("http://www.asha.org/proserv/viewonelisting.asp?programid=$i"); $start_search_string = ""; $stop_search_string = ""; $start = 0; $stop = 0; //check breaks on page, this helps with determining what content is available $listing_part = substr($listing, 0, strpos($listing, "Email")); $break_count = substr_count($listing_part, "<br>"); //if there are no breaks, the listing has been deleted if($break_count != 0){ //find name $start_search_string = "<strong>"; $stop_search_string = "</strong>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $name = clean_entry(substr($listing, $start, $stop - $start)); //find address $start_search_string = "<br>"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $address = clean_entry(substr($listing, $start, $stop - $start)); if($break_count == 6){ //find address2 $start_search_string = "<br>"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $address2 = clean_entry(substr($listing, $start, $stop - $start)); } else { $address2 = ""; } //find city_state_zip $start_search_string = "<br>"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $city_state_zip = clean_entry(substr($listing, $start, $stop - $start)); $city = substr($city_state_zip, 0, strpos($city_state_zip, ",")); $state = substr($city_state_zip, strpos($city_state_zip, ",") + 1, 2); $zip = substr($city_state_zip, strpos($city_state_zip, ",") + 4); //find map url $start_search_string = "<br>"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $map = clean_entry(substr($listing, $start, $stop - $start)); //find phone $start_search_string = "<br>"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string); $stop = strpos($listing, $stop_search_string, $start); $phone = clean_entry(substr($listing, $start, $stop - $start)); //find email $start_search_string = "Email"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string) + 11; $stop = strpos($listing, $stop_search_string, $start); $email = clean_entry(substr($listing, $start, $stop - $start)); //find director $start_search_string = "Director"; $stop_search_string = "<br>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string) + 11; $stop = strpos($listing, $stop_search_string, $start); $director = clean_entry(substr($listing, $start, $stop - $start)); //find areas of practice $start_search_string = "Areas of Practice"; $stop_search_string = "</td>"; $start = strpos($listing, $start_search_string, $stop) + strlen($start_search_string) + 14; $stop = strpos($listing, $stop_search_string, $start) - 9; $areas_of_practice = clean_entry(substr($listing, $start, $stop - $start)); //does this listing handle Autism $autism = strpos($listing, "Autism"); if($autism > 0){ $autism = "YES"; } else { $autism = "NO"; } //does this listing handle language $language = strpos($language, "Language"); if($language > 0){ $language = "YES"; } else { $language = "NO"; } //create CSV $datarow = ""; $datarow .= $name . "\t"; $datarow .= $address . "\t"; $datarow .= $address2 . "\t"; $datarow .= $city . "\t"; $datarow .= $state . "\t"; $datarow .= $zip . "\t"; $datarow .= $phone . "\t"; $datarow .= $email . "\t"; $datarow .= $director . "\t"; $datarow .= $autism . "\t"; $datarow .= $language . "\t"; $datarow .= $areas_of_practice . "\t"; $datarow .= "\r\n"; echo $datarow; } }

Portfolio - Feedback - Code