// {$I DICopyright.inc }

{ Contains classes to create MIME messages with inline attachments from HTML documents. }
unit DIHtmlMimeMessage;

{$I DICompilers.inc}

interface

uses
  DISystemCompat,
  {$IFDEF HAS_UNITSCOPE}System.Classes{$ELSE}Classes{$ENDIF},
  DIHtmlParser, // Download DIHtmlParser from http://www.yunqa.de
  DIHtmlCharSetPlugin, DIHtmlLinksPlugin, DIHtmlWriterPlugin,
  DIUnicode, DIUnicodeString2Vector, DIHtmlMisc;

type
  //------------------------------------------------------------------------------
  // TDIAbstractHtmlMimeMessage
  //------------------------------------------------------------------------------

  { }
  TDIAbstractHtmlMimeMessage = class(TComponent)
  private
    FHtmlParser: TDIHtmlParser;
    FCharSetPlugin: TDIHtmlCharSetPlugin;
    FLinksPlugin: TDIHtmlLinksPlugin;
    FWriterPlugin: TDIHtmlWriterPlugin;

    FAttatchedFiles: TDIUnicodeString2Vector;
    FDocUri: UnicodeString;
    FStylesInline: Boolean;
    FWriteTag: Boolean;

    function GetNormalizeWhiteSpace: Boolean;
    procedure OnHtmlCharSetPluginCharSetChange(
      const Sender: TDIHtmlCharSetPlugin;
      const CharSet: UnicodeString;
      var ReadMethods: TDIUnicodeReadMethods;
      var AllowChange: Boolean);
    procedure OnHtmlLinksPluginLink(
      const Sender: TDIHtmlLinksPlugin;
      const TagID: TDITagID;
      const AttribId: TDIAttribID;
      var Link: UnicodeString;
      var Accept, Show: Boolean);
    procedure SetNormalizeWhiteSpace(const Value: Boolean);
  protected
    function DoAttachment(
      const FullFileName, FileExt: UnicodeString;
      var ContentID: UnicodeString;
      const TagID: TDITagID;
      const AttribId: TDIAttribID): Boolean; virtual; abstract;
    procedure DoBegin; virtual; abstract;
    procedure DoEnd; virtual; abstract;
  public
    constructor Create(AOwner: TComponent); override;
    destructor Destroy; override;
    procedure FillFromFile(HtmlFileName: string);
    procedure FillFromStream(const HtmlStream: TStream; const ADocumentUri: UnicodeString);
  published
    property NormalizeWhiteSpace: Boolean read GetNormalizeWhiteSpace write SetNormalizeWhiteSpace;
    property StylesInline: Boolean read FStylesInline write FStylesInline;
  end;

  //------------------------------------------------------------------------------
  // TDIHtmlMimeMessage
  //------------------------------------------------------------------------------

  { }
  TDIHtmlMimeMessage = class;

  { }
  TDIOnHtmlMimeMessageAttachmentEvent = procedure(
    const Sender: TDIHtmlMimeMessage;
    const AFullFileName, AFileExt: UnicodeString;
    var AContentID: UnicodeString;
    const TagID: TDITagID;
    const AttribId: TDIAttribID;
    var Attach: Boolean) of object;

  { }
  TDIOnHtmlMimeMessageBeginEvent = procedure of object;

  { }
  TDIOnHtmlMimeMessageEndEvent = procedure(
    const AHtmlBuffer: Pointer;
    const AHtmlBufferSize: Cardinal) of object;

  { }
  TDIHtmlMimeMessage = class(TDIAbstractHtmlMimeMessage)
  private
    FOnAttachment: TDIOnHtmlMimeMessageAttachmentEvent;
    FOnBegin: TDIOnHtmlMimeMessageBeginEvent;
    FOnEnd: TDIOnHtmlMimeMessageEndEvent;
  protected
    function DoAttachment(
      const FullFileName, FileExt: UnicodeString;
      var ContentID: UnicodeString;
      const TagID: TDITagID;
      const AttribId: TDIAttribID): Boolean; override;
    procedure DoBegin; override;
    procedure DoEnd; override;
  published
    property OnAttachment: TDIOnHtmlMimeMessageAttachmentEvent read FOnAttachment write FOnAttachment;
    property OnBegin: TDIOnHtmlMimeMessageBeginEvent read FOnBegin write FOnBegin;
    property OnEnd: TDIOnHtmlMimeMessageEndEvent read FOnEnd write FOnEnd;
  end;

implementation

uses
  {$IFDEF HAS_UNITSCOPE}System.SysUtils{$ELSE}SysUtils{$ENDIF},
  DIUri_3986, DIUtils;

//------------------------------------------------------------------------------
// TDIAbstractHtmlMimeMessage
//------------------------------------------------------------------------------

constructor TDIAbstractHtmlMimeMessage.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);

  FHtmlParser := TDIHtmlParser.Create(Self);
  FHtmlParser.FilterHtmlTags.StartTags := fiShow;

  { Set up the CharSet Plugin. }
  FCharSetPlugin := TDIHtmlCharSetPlugin.Create(Self);
  FCharSetPlugin.OnCharSetChange := OnHtmlCharSetPluginCharSetChange;
  FCharSetPlugin.HtmlParser := FHtmlParser;

  { Set up the Links Plugin. }
  FLinksPlugin := TDIHtmlLinksPlugin.Create(Self);
  FLinksPlugin.BASE_href := True;
  FLinksPlugin.Link_BODY_background := True;
  FLinksPlugin.Link_IMAGE_src := True;
  FLinksPlugin.Link_IMG_lowsrc := True;
  FLinksPlugin.Link_IMG_src := True;
  FLinksPlugin.Link_IMG_usemap := True;
  FLinksPlugin.Link_LINK_href := True;
  FLinksPlugin.Link_SCRIPT_src := True;
  FLinksPlugin.Link_TABLE_background := True;
  FLinksPlugin.Link_TD_background := True;
  FLinksPlugin.Link_TH_background := True;
  FLinksPlugin.Link_TR_background := True;
  FLinksPlugin.OnLink := OnHtmlLinksPluginLink;
  FLinksPlugin.HtmlParser := FHtmlParser;

  { Set up the Writer Plugin. }
  FWriterPlugin := TDIHtmlWriterPlugin.Create(Self);
  { We must quote attribute values! Why?
    * Eudora's internal HTML parser does not recognize unquoted attribute
      values which contain ':' or '_'. I don't know exactly which character
      is responsible, but it causes inline images not to be displayed. }
  FWriterPlugin.QuoteHtmlTags := qtAlways;
  FWriterPlugin.FilterDTDs := fiShow;
  FWriterPlugin.FilterHtmlPIs := fiShow;
  FWriterPlugin.FilterHtmlTags.EndTags := fiShow;
  FWriterPlugin.FilterScripts := fiShow;
  FWriterPlugin.FilterStyles := fiShow;
  FWriterPlugin.FilterText := fiShow;
  FWriterPlugin.FilterTitles := fiShow;
  FWriterPlugin.FilterXmlPIs := fiShow;
  FWriterPlugin.HtmlParser := FHtmlParser;

  FAttatchedFiles := NewDIUnicodeString2Vector;
end;

//------------------------------------------------------------------------------

destructor TDIAbstractHtmlMimeMessage.Destroy;
begin
  FAttatchedFiles.Free;
  FWriterPlugin.Free;
  FLinksPlugin.Free;
  FCharSetPlugin.Free;
  FHtmlParser.Free;
  inherited Destroy;
end;

//------------------------------------------------------------------------------

procedure TDIAbstractHtmlMimeMessage.FillFromFile(HtmlFileName: string);
var
  HtmlStream: TFileStream;
  DocUri: UnicodeString;
begin
  HtmlFileName := ExpandFileName(HtmlFileName);
  HtmlStream := TFileStream.Create(HtmlFileName, fmOpenRead or fmShareDenyWrite);
  try
    DocUri := FileNameToUriW(HtmlFileName);
    FillFromStream(HtmlStream, DocUri);
  finally
    HtmlStream.Free;
  end;
end;

//------------------------------------------------------------------------------

procedure TDIAbstractHtmlMimeMessage.FillFromStream(const HtmlStream: TStream; const ADocumentUri: UnicodeString);
begin
  FAttatchedFiles.Clear;

  // Prepare Parser
  FHtmlParser.Reset;
  FHtmlParser.SourceStream := HtmlStream;

  // Store Document URI and use as Base URI to resolve links
  FDocUri := ADocumentUri;
  StrTrimUriFragmentW(FDocUri);
  FLinksPlugin.BaseUri := FDocUri;

  // Prepare Writer Plugin
  FWriterPlugin.Clear;

  DoBegin;

  FWriteTag := True;
  while FHtmlParser.ParseNextPiece do
    if FWriteTag then
      FWriterPlugin.WriteCurrentHtmlTagStart
    else
      FWriteTag := True;

  DoEnd;

  FAttatchedFiles.Clear;
end;

//------------------------------------------------------------------------------

function TDIAbstractHtmlMimeMessage.GetNormalizeWhiteSpace: Boolean;
begin
  Result := FHtmlParser.NormalizeWhiteSpace;
end;

//------------------------------------------------------------------------------

procedure TDIAbstractHtmlMimeMessage.OnHtmlCharSetPluginCharSetChange(
  const Sender: TDIHtmlCharSetPlugin;
  const CharSet: UnicodeString;
  var ReadMethods: TDIUnicodeReadMethods;
  var AllowChange: Boolean);
begin
  Sender.HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_CONTENT_ID] := 'text/html;charset=iso-8859-1';
end;

//------------------------------------------------------------------------------

procedure TDIAbstractHtmlMimeMessage.OnHtmlLinksPluginLink(
  const Sender: TDIHtmlLinksPlugin;
  const TagID: TDITagID;
  const AttribId: TDIAttribID;
  var Link: UnicodeString;
  var Accept, Show: Boolean);
label
  ModifyLink;
var
  URI, UriNoFrag, UriFrag: UnicodeString;
  FullFileName: UnicodeString;
  FileName: UnicodeString;
  FileExt: UnicodeString;
  s: UnicodeString;
  i: Integer;
  StyleTag: TDIHtmlTag;
  RM: TDIUnicodeReadMethods;
begin
  if not ResolveRelativeUriW(Sender.BaseUri, Link, URI) then
    URI := Link;

  // Split URI into no fraction part and fraction part.
  i := StrPosCharW(URI, UCP_NUMBER_SIGN);
  if i > 0 then
    begin
      UriNoFrag := Copy(URI, 1, i);
      UriFrag := Copy(URI, i, MaxInt);
    end
  else
    begin
      UriNoFrag := URI;
      UriFrag := '';
    end;

  // Test if link is a reference to the document itself.
  if not StrSameW(UriNoFrag, FDocUri) then
    begin
      if UriToFileNameW(UriNoFrag, FullFileName) then
        begin

          { Is this is a StyleSheet Link? }
          if FStylesInline and (TagID = TAG_LINK_ID) then
            begin
              i := Sender.HtmlParser.HtmlTag.IndexOfNumber(ATTRIB_REL_ID);
              if (i >= 0) and
                StrContainsIW('stylesheet', Sender.HtmlParser.HtmlTag.ValueAt[i]) and
                FileExistsW(FullFileName) then
                begin

                  StyleTag := NewDIHtmlTag;
                  try
                    StyleTag.TagName := TAG_STYLE;
                    StyleTag.CopyAttrib(Sender.HtmlParser.HtmlTag, ATTRIB_MEDIA_ID);
                    StyleTag.CopyAttrib(Sender.HtmlParser.HtmlTag, ATTRIB_TITLE_ID);
                    StyleTag.CopyAttrib(Sender.HtmlParser.HtmlTag, ATTRIB_TYPE_ID);
                    FWriterPlugin.WriteHtmlTag(StyleTag);

                    FWriterPlugin.Writer.WriteBufW('<!--', 4);

                    { Determine character set for style sheet file. }
                    RM := CharSetReadMethods(Sender.HtmlParser.HtmlTag.ValueOfNumber[ATTRIB_CHARSET_ID]);
                    FWriterPlugin.Writer.WriteFile(FullFileName, RM);

                    FWriterPlugin.Writer.WriteBufW('-->', 3);

                    StyleTag.TagType := ttEndTag;
                    FWriterPlugin.WriteHtmlTag(StyleTag);

                    FWriteTag := False; // We replaced this tag, do not write it!
                  finally
                    StyleTag.Free;
                  end;
                  Exit;
                end;
            end;

          { Is the file already attached? }
          i := FAttatchedFiles.IndexOfNameCI(FullFileName);
          if i < 0 then
            begin

              if {$IFNDEF DIH_Test}FileExistsW(FullFileName){$ELSE}True{$ENDIF} then
                begin
                  FileName := ExtractFileNameW(FullFileName);
                  FileExt := ExtractFileExtW(FileName);

                  // Check for duplicate file name / Content ID.
                  if FAttatchedFiles.ExistsValueCI(FileName) then
                    begin
                      i := 0;
                      s := ChangeFileExtW(FileName, '');
                      repeat // Loop until unique file name / Content ID found.
                        Inc(i);
                        FileName := s + '_' + IntToStrW(i) + FileExt;
                      until not FAttatchedFiles.ExistsValueCI(FileName);
                    end;

                  if DoAttachment(FullFileName, FileExt, FileName, TagID, AttribId) then
                    begin
                      FAttatchedFiles.InsertNameValueLast(FullFileName, FileName);
                      goto ModifyLink;
                    end;
                end;

            end
          else // if i < 0 then -- File is already attatched, modify link only.
            begin
              FileName := FAttatchedFiles.ValueAt[i];
              // Modify Link in HTML document.
              ModifyLink:
              Link := 'cid:' + FileName;
              if Pointer(UriFrag) <> nil then
                Link := Link + '#' + UriFrag;
            end;

        end;
    end
  else
    begin
      Link := UriFrag; // Adjust link reference to document itself.
    end;
end;

//------------------------------------------------------------------------------

procedure TDIAbstractHtmlMimeMessage.SetNormalizeWhiteSpace(const Value: Boolean);
begin
  FHtmlParser.NormalizeWhiteSpace := Value;
end;

//------------------------------------------------------------------------------
// TDIHtmlMimeMessage
//------------------------------------------------------------------------------

function TDIHtmlMimeMessage.DoAttachment(
  const FullFileName, FileExt: UnicodeString;
  var ContentID: UnicodeString;
  const TagID: TDITagID;
  const AttribId: TDIAttribID): Boolean;
begin
  Result := True;
  if Assigned(FOnAttachment) then
    FOnAttachment(Self, FullFileName, FileExt, ContentID, TagID, AttribId, Result);
end;

//------------------------------------------------------------------------------

procedure TDIHtmlMimeMessage.DoBegin;
begin
  if Assigned(FOnBegin) then
    FOnBegin;
end;

//------------------------------------------------------------------------------

procedure TDIHtmlMimeMessage.DoEnd;
begin
  if Assigned(FOnEnd) then
    FOnEnd(FWriterPlugin.Writer.Data, FWriterPlugin.Writer.DataSize);
end;

end.
