Недавно нашел интересную статейку и решил поделиться с вами:
Программа перехватывает на веб-сайт Google, поиск по ключевому слову, и получает первые 100 сайтов.
Листинг:
unit FindTh;
interface
uses
Classes, IdComponent, SysUtils, IdHTTP;
type
TFindWebThread = class(TThread)
protected
Addr, Text, Status: string;
procedure Execute; override;
procedure AddToList;
procedure ShowStatus;
procedure GrabHtml;
procedure HtmlToList;
procedure HttpWork (Sender: TObject; AWorkMode: TWorkMode;
const AWorkCount: Integer);
public
strUrl: string;
strRead: string;
end;
implementation
{ TFindWebThread }
uses
WebFindF;
procedure TFindWebThread.AddToList;
begin
if Form1.ListBox1.Items.IndexOf (Addr) < 0 then
begin
Form1.ListBox1.Items.Add (Addr);
Form1.DetailsList.Add (Text);
end;
end;
procedure TFindWebThread.Execute;
begin
GrabHtml;
HtmlToList;
Status := 'Done with ' + StrUrl;
Synchronize (ShowStatus);
end;
procedure TFindWebThread.GrabHtml;
var
Http1: TIdHTTP;
begin
Status := 'Sending query: ' + StrUrl;
Synchronize (ShowStatus);
Http1 := TIdHTTP.Create (nil);
try
Http1.Request.UserAgent := 'User-Agent: NULL';
Http1.OnWork := HttpWork;
strRead := Http1.Get (StrUrl);
finally
Http1.Free;
end;
end;
procedure TFindWebThread.HtmlToList;
var
strAddr, strText: string;
nText: integer;
nBegin, nEnd: Integer;
begin
Status := 'Extracting data for: ' + StrUrl;
Synchronize (ShowStatus);
strRead := LowerCase (strRead);
repeat
// find the initial part HTTP reference
nBegin := Pos ('href=http', strRead);
if nBegin <> 0 then
begin
// get the remaining part of the string, starting with 'http'
strRead := Copy (strRead, nBegin + 5, 1000000);
// find the end of the HTTP reference
nEnd := Pos ('>', strRead);
strAddr := Copy (strRead, 1, nEnd - 1);
// move on
strRead := Copy (strRead, nEnd + 1, 1000000);
// add the URL if 'google' is not in it
if Pos ('google', strAddr) = 0 then
begin
nText := Pos ('</a>', strRead);
strText := copy (strRead, 1, nText - 1);
// remove cached references and duplicates
if (Pos ('cached', strText) = 0) then
begin
Addr := strAddr;
Text := strText;
AddToList;
end;
end;
end;
until nBegin = 0;
end;
procedure TFindWebThread.HttpWork(Sender: TObject; AWorkMode: TWorkMode;
const AWorkCount: Integer);
begin
Status := 'Received ' + IntToStr (AWorkCount) + ' for ' + strUrl;
Synchronize (ShowStatus);
end;
procedure TFindWebThread.ShowStatus;
begin
Form1.StatusBar1.SimpleText := Status;
end;
end.