Retrieve all image links from an HTML document

uses mshtml, ActiveX, COMObj, IdHTTP, idURI;

{ .... }

procedure GetImageLinks(AURL: string; AList: TStrings);
var
IDoc: IHTMLDocument2;
strHTML: string;
v: Variant;
x: Integer;
ovLinks: OleVariant;
DocURL: string;
URI: TidURI;
ImgURL: string;
idHTTP: TidHTTP;
begin
AList.Clear;
URI := TidURI.Create(AURL);
try
DocURL := 'http://' + URI.Host;
if URI.Path <> '/' then
DocURL := DocURL + URI.Path;
finally
URI.Free;
end;
Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2;
try
IDoc.designMode := 'on';
while IDoc.readyState <> 'complete' do
Application.ProcessMessages;
v := VarArrayCreate([0, 0], VarVariant);
idHTTP := TidHTTP.Create(nil);
try
strHTML := idHTTP.Get(AURL);
finally
idHTTP.Free;
end;
v[0] := strHTML;
IDoc.Write(PSafeArray(System.TVarData(v).VArray));
IDoc.designMode := 'off';
while IDoc.readyState <> 'complete' do
Application.ProcessMessages;
ovLinks := IDoc.all.tags('IMG');
if ovLinks.Length > 0 then
begin
for x := 0 to ovLinks.Length - 1 do
begin
ImgURL := ovLinks.Item(x).src;
// The stuff below will probably need a little tweaking
// Deteriming and turning realtive URLs into absolute URLs
// is not that difficult but this is all I could come up with
// in such a short notice.
if (ImgURL[1] = '/') then
begin
// more than likely a relative URL so
// append the DocURL
ImgURL := DocURL + ImgUrl;
end
else
begin
if (Copy(ImgURL, 1, 11) = 'about:blank') then
begin
ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL));
end;
end;
AList.Add(ImgURL);
end;
end;
finally
IDoc := nil;
end;
end;


// Beispiel:
// Example:
procedure TForm1.Button1Click(Sender: TObject);
begin
GetImageLinks('http://delphimagic.blogspot.com', Memo1.Lines);
end;

No hay comentarios:

Publicar un comentario

Simulación del movimiento de los electrones en un campo electrico

Espectacular simulación realizada con OpenGL del movimiento de los electrones cuando atraviesan un campo eléctrico. Como muestra la image...