saçma sapan convert siteleriyle uğraşmak yerine şu betiği kullanmanız yeterli olacaktır,
ben ubuntunun varsayılan dosya yöneticisini kullandım seçim işlemi için,
def pdf_to_excel(pdf_path, excel_path):
with pdfplumber.open(pdf_path) as pdf:
dfs = []
for page in pdf.pages:
tables = page.extract_tables()
for table in tables:
if len(table) > 1: # Ensure there is at least one row of data
# Use the first row as headers and the rest as data
df = pd.DataFrame(table[1:], columns=table[0])
# Ensure column names are unique by appending a number if necessary
df.columns = [f"{col}_{i}" if df.columns.duplicated().any() and df.columns.duplicated()[j] else col
for i, col in enumerate(df.columns) for j, _ in enumerate(df.columns) if i == j]
dfs.append(df)
if dfs: # Check if any DataFrames were created
combined_df = pd.concat(dfs, ignore_index=True)
combined_df.to_excel(excel_path, index=False)
else:
print("No tables found in the PDF.")