Home >Backend Development >C++ >How to Eliminate White Space When Merging PDFs?

How to Eliminate White Space When Merging PDFs?

Mary-Kate Olsen
Mary-Kate OlsenOriginal
2024-12-25 22:43:09417browse

How to Eliminate White Space When Merging PDFs?

How to eliminate whitespace when merging

Question:

I have some code that contains 3 different PDF byte arrays , and merge them. This code runs fine. Some people have problems because each PDF is treated as a full page (if printed), even though there's only about 4 inches of content on it, leaving 7 inches of vertical white space. Then the middle document is inserted and may have no vertical whitespace at the end. The footer is then also placed on its own page.

Here is the code:

byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings);
List<byte[]> MergeSets = // 在此代码之前填充

// 将任何其他页面附加到此主要信件
if (MergeSets.Count > 0)
{
    MemoryStream ms = new MemoryStream();
    Document document = new Document();
    PdfCopy copy = new PdfCopy(document, ms);
    document.Open();
    PdfImportedPage page;
    PdfReader reader = new PdfReader(Bytes); // 读取生成的正文信
    int pages = reader.NumberOfPages;

    for (int i = 0; i < pages;)
    {
        page = copy.GetImportedPage(reader, ++i);
        copy.AddPage(page);
    } // 遍历所有正文信的页面

    // 现在附加合并集
    foreach (byte[] ba in MergeSets)
    {
        reader = new PdfReader(ba);
        pages = reader.NumberOfPages;

        for (int i = 0; i < pages;)
        {
            page = copy.GetImportedPage(reader, ++i);
            copy.AddPage(page);
        } // 遍历当前合并集中的所有页面
    } // 遍历数据集中所有内容

    document.Close();

    ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName);
} // 如果有待合并的任何内容

Question:

Is there a way to cut/delete/delete as I merge each page Vertical white space at the end of each PDF so it appears as one seamless document?

Update:

The following are sample .pdf files that I'm trying to merge.

Title, body, footer

Update 2: Using answer:

I have converted @mkl's code to C# as shown below .

The utility class:

public class PdfVeryDenseMergeTool
{

    private Rectangle PageSize;
    private float TopMargin;
    private float BottomMargin;
    private float Gap;
    private Document Document = null;
    private PdfWriter Writer = null;
    private float YPosition = 0;

    public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap)
    {
        this.PageSize = size;
        this.TopMargin = top;
        this.BottomMargin = bottom;
        this.Gap = gap;
    } // PdfVeryDenseMergeTool

    public void Merge(MemoryStream outputStream, List<PdfReader> inputs)
    {
        try
        {
            this.OpenDocument(outputStream);

            foreach (PdfReader reader in inputs)
            {
                this.Merge(reader);
            } // 遍历待合并的 PDF 文件
        }
        finally
        {
            this.CloseDocument();
        } // try-catch-finally
    } // Merge

    public void OpenDocument(MemoryStream outputStream)
    {
        this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin);
        this.Writer = PdfWriter.GetInstance(Document, outputStream);

        this.Document.Open();
        this.NewPage();
    } // OpenDocument

    public void CloseDocument()
    {
        try
        {
            this.Document.Close();
        }
        finally
        {
            this.Document = null;
            this.Writer = null;
            this.YPosition = 0;
        } // try-finally
    } // CloseDocument

    public void NewPage()
    {
        this.Document.NewPage();
        this.YPosition = PageSize.GetTop(this.TopMargin);
    } // Merge

    public void Merge(PdfReader reader)
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);

        for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++)
        {
            this.Merge(reader, parser, pageIndex);
        } // 遍历当前 PDF 的所有页面
    } // Merge

    public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex)
    {
        PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex);
        PdfContentByte directContent = Writer.DirectContent;

        PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer());

        if (finder.VerticalFlips.Count < 2)
            return;

        Rectangle pageSizeToImport = reader.GetPageSize(pageIndex);

        int startFlip = finder.VerticalFlips.Count - 1;
        bool first = true;

        while (startFlip > 0)
        {
            if (!first)
                this.NewPage();

            float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin);
            int endFlip = startFlip + 1;

            while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace))
                endFlip -= 2;

            if (endFlip < startFlip)
            {
                float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip];

                directContent.SaveState();
                directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height);
                directContent.Clip();
                directContent.NewPath();

                this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom));

                directContent.RestoreState();
                this.YPosition -= height + this.Gap;
                startFlip = endFlip - 1;
            }
            else if (!first)
            {
                throw new ArgumentException(string.Format("Page {0} content too large", pageIndex));
            } // if

            first = false;
        } // while
    } // Merge
} // PdfVeryDenseMergeTool

The render listener class:

Update 3: Fixed 1 line of code now Works fine: See comments in the code

public class PageVerticalAnalyzer : IRenderListener
{

    public PageVerticalAnalyzer()
    { }

    public List<float> VerticalFlips = new List<float>();

    public void AddVerticalUseSection(float from, float to)
    {
        if (to < from)
        {
            float temp = to;
            to = from;
            from = temp;
        }

        int i = 0;
        int j = 0;

        for (i = 0; i < VerticalFlips.Count; i++)
        {
            float flip = VerticalFlips[i];
            if (flip < from)
                continue;

            for (j = i; j < VerticalFlips.Count; j++)
            {
                flip = VerticalFlips[j];
                if (flip < to)
                    continue;
                break;
            }
            break;
        } // 遍历所有垂直翻转

        bool fromOutsideInterval = i % 2 == 0;
        bool toOutsideInterval = j % 2 == 0;

        while (j-- > i)
            VerticalFlips.RemoveAt(j); // 这是存在问题的代码行,只使用了 .Remove(j)
        if (toOutsideInterval)
            VerticalFlips.Insert(i, to);
        if (fromOutsideInterval)
            VerticalFlips.Insert(i, from);
    } // AddVerticalUseSection

    public void BeginTextBlock()
    { /* Do nothing */ }

    public void EndTextBlock()
    { /* Do nothing */ }

    public void RenderImage(ImageRenderInfo renderInfo)
    {
        Matrix ctm = renderInfo.GetImageCTM();
        List<float> YCoords = new List<float>(4) { 0, 0, 0, 0 };

        for (int x = 0; x < 2; x++)
        {
            for (int y = 0; y < 2; y++)
            {
                Vector corner = new Vector(x, y, 1).Cross(ctm);
                YCoords[2 * x + y] = corner[Vector.I2];
            }
        }

        YCoords.Sort();

The above is the detailed content of How to Eliminate White Space When Merging PDFs?. For more information, please follow other related articles on the PHP Chinese website!

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn