C++ Code:
#include <curl/curl.h>#include <boost/algorithm/string.hpp>#include <iostream>#include <string>#include <fstream>#include <map>#include <vector>#include <boost/regex.hpp>using namespace std
;//Variables..string DataHolding
;//Function-Prototypes..string HtmlDecode
(string str
);void removeBadURLs
(vector
<string
> &vec
);void GrabInfo
(string url
, short &result
);void removeDuplicates
(vector
<string
> &vec
);void CheckURLs
(vector
<string
> &urllist
, size_t arraysize
);void preg_match_all
(string Source
, boost
::regex &expression
, string
&ID
);static size_t WriteBuffer
(void *contents
, size_t size
, size_t nmemb
, void *userp
);static size_t strpos
(string Data
, string Regex
, int pos
, int SizeOf_Regex
, int additional
);//Create a struct to hold the data..struct MemoryStruct
{ char *memory
; size_t size
;};int main
(){ ifstream file
; string line
, hsmatch
, scmatch
, mmatch
; file.
open("Stock.ini"); boost
::regex hashcomment
("((^|(\\s*))|(^(\\s*)))#(.*)$"); //Comment using #.. boost
::regex scomment
("((^|(\\s*))|(^(\\s*)))//(.*)$"); //Comment using //.. boost
::regex mcomment
("((^|(\\s*))|(^(\\s*)))(/\\*)(.*)(\\*/)$"); vector
<string
>urls
; //Create Vector to hold all Links.. while(getline
(file
, line
)) { preg_match_all
(line
, hashcomment
, hsmatch
); preg_match_all
(line
, scomment
, scmatch
); preg_match_all
(line
, mcomment
, mmatch
); if((line
!= hsmatch
) && (line
!= scmatch
) && (line
!= mmatch
)) urls.
push_back(line
); //Add Url To Vector.. } file.
close(); if(urls.
size() == 0) { cout
<<"The File Is Empty! Please populate it with valid URLs.\n\n"; cout
<<"This program will now Terminate in 5 seconds..\n\n\n"; Sleep
(5000); return 0; } removeDuplicates
(urls
); size_t urlsize
= urls.
size(); //CheckURLs(urls, urlsize); //Validate URLs.. while(1) { Sleep
(2000); #ifdef _WIN32 std
::system ("CLS"); #else std
::system ("clear"); #endif for(unsigned short i
= 0; i
< urls.
size(); i
++) { short result
; GrabInfo
(urls
[i
], result
); DataHolding
= HtmlDecode
(DataHolding
); //Strip HTML Special Chars.. try
{ size_t Start
, End
; /** Get Stock Names **/ boost
::regex SnExpression
("<[a-z]+ class=\"wsod_smallSubHeading\"", boost
::regex::icase); boost
::regex SxExpression
("<h1 class=\"wsod_fLeft(.*)\" style=\"margin-top:6px;\">", boost
::regex::icase); string StockID
, StockX
; preg_match_all
(DataHolding
, SnExpression
, StockID
); preg_match_all
(DataHolding
, SxExpression
, StockX
); Start
= strpos
(DataHolding
, StockX
, 0, StockX.
size(), StockX.
size()); End
= strpos
(DataHolding
, StockID
, Start
, StockID.
size(), -1); string Final
= DataHolding.
substr(Start
, End
-Start
); //From the Start Pos, Copy Everything Until the End Pos to a string.. string urlnames
[urlsize
]; urlnames
[i
] = Final
+ ": "; /** Get Stock Values **/ boost
::regex SvExpression
("<span stream=\"last_[0-9]+\" streamFormat=\"ToHundredth\" streamFeed=\"[A-Z]+\">", boost
::regex::icase); preg_match_all
(DataHolding
, SvExpression
, StockID
); Start
= strpos
(DataHolding
, StockID
, 0, StockID.
size(), StockID.
size()); End
= strpos
(DataHolding
, "</span>", Start
, 7, 0); Final
= DataHolding.
substr(Start
, End
-Start
); SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 3); //Console Colours.. cout
<<urlnames
[i
]; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 7); cout
<<Final
<<"\n\n"; } catch
(exception
&e
) { SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\n\n\n--------------------------------------------------------------------------------"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 12); cout
<<"\n\nException.. Html File is empty -- substring Out of Range! Details:\n\n"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\n\n\n"; } SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 8); } } return 0;}string HtmlDecode
(string str
){ string subs
[] = {"& #34;", """, "& #39;", "'", "& #38;", "&", "& #60;", "<", "& #62;", ">", "&34;", "&39;", "&38;", "&60;", "&62;"}; string reps
[] = {"\"", "\"", "'", "'", "&", "&", "<", "<", ">", ">", "\"", "'", "&", "<", ">"}; size_t found
; for(int i
= 0; i
< 15; i
++) { do { found
= str.
find(subs
[i
]); if (found
!= string
::npos) str.
replace (found
,subs
[i
].
length(),reps
[i
]); } while (found
!= string
::npos); } return str
;}static size_t WriteBuffer
(void *contents
, size_t size
, size_t nmemb
, void *userp
){ size_t realsize
= size
*nmemb
; struct MemoryStruct
*mem
= (struct MemoryStruct
*)userp
; mem
->memory
= (char*) realloc(mem
->memory
, mem
->size
+ realsize
+ 1); if (mem
->memory
== NULL
) { printf("Cannot Allocated Enough Memory (ReAlloc is NULL).\n"); exit(EXIT_FAILURE
); } memcpy(&(mem
->memory
[mem
->size
]), contents
, realsize
); mem
->size
+= realsize
; mem
->memory
[mem
->size
] = 0; return realsize
;}void GrabInfo
(string url
, short &result
){ CURL
*curl_handle
; CURLcode res
; result
= 0; struct MemoryStruct data
; data.
memory = (char*) malloc(1); data.
size = 0; curl_global_init
(CURL_GLOBAL_ALL
); curl_handle
= curl_easy_init
(); if(curl_handle
) { curl_easy_setopt
(curl_handle
, CURLOPT_URL
, url.
c_str()); //URL To Grab.. curl_easy_setopt
(curl_handle
, CURLOPT_FAILONERROR
, true); //Incase of 400+ error, Don't return the page.. curl_easy_setopt
(curl_handle
, CURLOPT_WRITEFUNCTION
, WriteBuffer
); //Send Data to the Function.. curl_easy_setopt
(curl_handle
, CURLOPT_WRITEDATA
, (void*)&data
); //Pass Struct Chunk to the Function.. curl_easy_setopt
(curl_handle
, CURLOPT_USERAGENT
, "Mozilla/5.0 (Windows NT 5.1; rv:2.0) Gecko/20100101 Firefox/4.0"); //Use a UserAgent.. res
= curl_easy_perform
(curl_handle
); if(res
) { result
= res
; } curl_easy_cleanup
(curl_handle
); //Perform-Execute.. } //cout<<"Size Of WebPage: "<< ((float)data.size/1000) <<" kb.\n\n"; //Print the SizeOf webpage in bytes.. if(data.
memory) { DataHolding
= data.
memory; //Write Data to a String.. free(data.
memory); data.
memory = NULL
; } curl_global_cleanup
();}static size_t strpos
(string Data
, string Regex
, int pos
, int SizeOf_Regex
, int additional
){ size_t Found
= 0; try
{ Found
= Data.
find(Regex.
c_str(), pos
, SizeOf_Regex
) + additional
; } catch
(exception
&e
) { SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\n\n\n--------------------------------------------------------------------------------"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 12); cout
<<"\n\nException.. Html File is empty -- substring Out of Range! Details:\n\n"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\n\n\n"; } return Found
;}void removeDuplicates
(vector
<string
> &vec
){ std
::sort(vec.
begin(), vec.
end()); vec.
erase(std
::unique(vec.
begin(), vec.
end()), vec.
end());}void removeBadURLs
(vector
<string
> &vec
){ cout
<<"Would you like to remove the bad URL's permanently? (y / n): "; char response
; cin
>> response
; cin.
ignore(); while(cin.
fail()) { cin.
clear(); cin.
ignore(std
::numeric_limits<int
>::max(),'\n'); cout
<<"Invalid choice.. Please Try Again.\n\n"; cout
<<"Would you like to remove the bad URL's permanently? (y / n): "; cin
>> response
; cin.
ignore(); if(!cin.
fail() || cin.
good()) break; } if(response
== 'y' || response
== 'Y') { ofstream file
; file.
open("Stock.ini"); file.
clear(); file.
close(); file.
open("Stock.ini", ios
::app); for(unsigned short i
= 0; i
< vec.
size(); i
++) file
<< vec
[i
]<<endl
; file.
close(); }}void preg_match_all
(string Source
, boost
::regex &expression
, string
&ID
){ try
{ std
::string::const_iterator start
, end
; start
= Source.
begin(); end
= Source.
end(); boost
::smatch what
; boost
::match_flag_type flags
= boost
::match_default; while(boost
::regex_search(start
, end
, what
, expression
, flags
)) { //Destination = boost::regex_replace(Source, expression, ""); ID
= what
[0]; start
= what
[0].
second; } } catch
(exception
&e
) { cout
<<"Exception Caught.. Function: preg_match_all.\n\n"; } return;}void CheckURLs
(vector
<string
> &urllist
, size_t arraysize
){ boost
::regex MarketUrl
("(http|https)://([a-z]+.)*(/[a-z]+/)*/markets/[a-z]+/", boost
::regex::icase); boost
::regex MarketUrlEx
("(http|https)://([a-z]+.)*(/[a-z]+/)*/markets/[a-z]+/(\\?([a-z]+)=H_MKT_Data)", boost
::regex::icase); boost
::regex QuoteUrl
("(http|https)://([a-z]+.)*(/[a-z]+/)*/quote.([a-z]+)(\\?([a-z]+)=[a-z]+)", boost
::regex::icase); bool badurls
= false; cout
<<"Checking for invalid URLs. Please wait..\n\n"; for(unsigned short i
= 0; i
< urllist.
size(); i
++) { try
{ if(!boost
::regex_match(urllist
[i
], MarketUrl
) && !boost
::regex_match(urllist
[i
], QuoteUrl
) && !boost
::regex_match(urllist
[i
], MarketUrlEx
)) { SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 12); cout
<<"Bad Url Format Found.. \n"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\tBad-URL: "<<urllist
[i
]; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 2); cout
<<" ---------- URL Temporarily Removed!\n\n"; badurls
= true; urllist.
erase(urllist.
begin() + i
); i
= 0; } else if(boost
::regex_match(urllist
[i
], MarketUrl
) || boost
::regex_match(urllist
[i
], QuoteUrl
) || boost
::regex_match(urllist
[i
], MarketUrlEx
)) { short result
; GrabInfo
(urllist
[i
], result
); boost
::regex LostStock
("<title>Symbol not found Stock quote - CNNMoney.com</title>", boost
::regex::icase); string QuoteLost
, LostLink
, LostServer
; preg_match_all
(DataHolding
, LostStock
, QuoteLost
); boost
::regex BadLink
("<span class=\"breadcrumbmain\">404 Page Not Found</span>", boost
::regex::icase); preg_match_all
(DataHolding
, BadLink
, LostLink
); boost
::regex ServerLost
("<h1 id=\"errorTitleText\">Server not found</h1>", boost
::regex::icase); preg_match_all
(DataHolding
, ServerLost
, LostServer
); if((LostServer.
length() != 0) || (LostLink.
length() != 0) || (QuoteLost.
length() != 0) || (result
!= 0)) { SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 12); cout
<<"Bad Url Found.. \n"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 15); cout
<<"\tBad-URL: "<<urllist
[i
]<<"\n"; SetConsoleTextAttribute
(GetStdHandle
(STD_OUTPUT_HANDLE
), 2); badurls
= true; urllist.
erase(urllist.
begin() + i
); i
= 0; } } } catch
(exception
&e
) { cout
<<"Cannot Match URL Complexity Of Regex for Matching Exceeds it's limits.\n\n"; } } if(badurls
== true) removeBadURLs
(urllist
);}