package extraction; import model.TextElement; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class PositionedTextStripper extends PDFTextStripper { private final List elements = new ArrayList<>(); private int currentPage = 0; private float currentPageWidth = 0; private float currentPageHeight = 0; @Override protected void startPage(PDPage page) throws IOException { currentPage++; PDRectangle mediaBox = page.getMediaBox(); currentPageWidth = mediaBox.getWidth(); currentPageHeight = mediaBox.getHeight(); super.startPage(page); } @Override protected void writeString(String text, List textPositions) throws IOException { if (textPositions == null || textPositions.isEmpty()) { super.writeString(text, textPositions); return; } TextPosition first = textPositions.get(0); TextPosition last = textPositions.get(textPositions.size() - 1); TextElement elem = new TextElement(); elem.setText(text.trim()); elem.setPageNum(currentPage); elem.setX(first.getX()); elem.setY(first.getY()); elem.setWidth(Math.abs((last.getX() + last.getWidth()) - first.getX())); elem.setHeight(Math.max(1, Math.abs(first.getHeight()))); elem.setFontSize(first.getFontSizeInPt()); elem.setPageWidth(currentPageWidth); elem.setPageHeight(currentPageHeight); if (!elem.getText().isEmpty()) { elements.add(elem); } super.writeString(text, textPositions); } public List getElements() { return elements; } }