123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
// 函数说明: 获取当前页面的Html源代码// @param AURL : 页面URL地址// @param ACharset: 页面字符集// @param AState : 成功状态//// @returns 返回成功状态(True表示成功,False表示失败)function GetHtmlContent(const AUrl, ACharset: string; var AState: Boolean): string;varvStrStream: TStringStream; //用于接收数据的流vNetHandle, vURLHandle: HINTERNET;vBuffer: PChar;vReadSize: Cardinal;vCharsetInt: Integer;begintryif UpperCase(ACharset) = 'UTF-8' thenvCharsetInt := 65001//4else if UpperCase(ACharset) = 'GB2312' thenvCharsetInt := 20936//2else if Pos(UpperCase(ACharset), 'MacRoman,MacCroatian') > 0 thenvCharsetInt := 20127//1else if Pos(UpperCase(ACharset), 'GB-2312,ASCII, KOI8-R,KOI8-U, ISO-8859-1..ISO-8859-16,') > 0 thenvCharsetInt := 20936//2else if Pos(UpperCase(ACharset), 'UTF8,EUC-KR') > 0 thenvCharsetInt := 51932//3else if Pos(UpperCase(ACharset), 'EUC-JP,UTF-32LE,UTF-32BE') > 0 thenvCharsetInt := 65001//4else if Pos(UpperCase(ACharset), 'UTF-7,UTF7,') > 0 thenvCharsetInt := 65000 //6elsevCharsetInt := 65001; // 4vStrStream := TStringStream.Create('', vCharsetInt);tryGetMem(vBuffer, 65536);vReadSize := 0;vNetHandle := InternetOpen('Delphi', INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);vURLHandle := InternetOpenUrl(vNetHandle, PChar(AUrl), nil, 0, INTERNET_FLAG_RELOAD, 0);repeatInternetReadFile(vURLHandle, vBuffer, 1000, vReadSize);if vReadSize <> 0 thenvStrStream.Write(vBuffer^, vReadSize);until vReadSize = 0;InternetCloseHandle(vURLHandle);InternetCloseHandle(vNetHandle);FreeMem(vBuffer);AState := True;Result := vStrStream.DataString;finallyvStrStream.Free;end;excepton e: Exception dobeginAState := False;Result := '[Error]'+AUrl + '|' + e.Message;end;end;end;
1234567891011121314151617181920212223242526272829303132
...function GetCPInfo(CodePage: Cardinal; var lpCPInfo: TCPInfo): Boolean;beginResult := True;case CodePage of// Code page identifiers understood directly by iconv_open()154, 367, 437, 737, 775, 819, 850, 852,853, 855..858, 860..866, 869, 874, 922: lpCPInfo.MaxCharSize := 1;932, 936, 943, 949, 950: lpCPInfo.MaxCharSize := 2;1046, 1124, 1125, 1129, 1133, 1161, 1162, 1163, 1250..1258: lpCPInfo.MaxCharSize := 1;1361: lpCPInfo.MaxCharSize := 2;// Code page indentifiers translated to iconv_open() encoding names (by LocaleNameFromCodePage)10000, 10004..10007, 10010, 10017, 10021,10029, 10079, 10081, 10082: lpCPInfo.MaxCharSize := 1; // MacRoman .. MacCroatian12000, 12001: lpCPInfo.MaxCharSize := 4; // UTF-32LE, UTF-32BE20127, 20866: lpCPInfo.MaxCharSize := 1; // ASCII, KOI8-R20932: lpCPInfo.MaxCharSize := 3; // EUC-JP20936: lpCPInfo.MaxCharSize := 2; // GB2312, EUC-KR21866, 28591..28601, 28603..28606: lpCPInfo.MaxCharSize := 1; // KOI8-U, ISO-8859-1..ISO-8859-1650221: lpCPInfo.MaxCharSize := 9; // ISO-2022-JP50225: lpCPInfo.MaxCharSize := 7; // ISO-2022-KR50227: lpCPInfo.MaxCharSize := 8; // ISO-2022-CN51932: lpCPInfo.MaxCharSize := 3; // EUC-JP51936, 51949: lpCPInfo.MaxCharSize := 2; // GB2312, EUC-KR51950, 52936, 54936: lpCPInfo.MaxCharSize := 4; // EUC-TW, HZ-GB-2312, GB1803065000: lpCPInfo.MaxCharSize := 6; // UTF-765001: lpCPInfo.MaxCharSize := 4; // UTF-8elseResult := False;end;end;...

