Worksheet
Contents
- 1 Import CSV with Auto Settings
- 2 Import CSV with Selected Rows
- 3 Import Excel Sheet
- 4 Import HTML Table from the Web
- 5 Import MATLAB
- 6 Import Multiple Files using Pandas into One Worksheet
- 7 Import Two Excel Sheets
- 8 Import Using a Filter
- 9 Import by Setting File Header Info
- 10 Manipulate Columns and Make Group Plot
- 11 Split Dataset to Training and Testing Data
This folder contains examples of data import, worksheet manipulation, calculation and graphing with originpro package.
For worksheet related functions in originpro, see worksheet.
For project related functions, see project.
Import CSV with Auto Settings
''' This sample shows how to use from_file function to import text data to worksheet ''' import originpro as op f = op.path('e')+r'Samples\Curve Fitting\Enzyme.dat' #assume active worksheet wks = op.find_sheet() #By default, CSV connector is used wks.from_file(f) print(wks.shape)
Import CSV with Selected Rows
''' This example shows how to do partial import with CSV data connector ''' import originpro as op f = op.path('e')+r'Samples\Import and Export\S15-125-03.dat' wks = op.new_sheet() #Use CSV connector which is the default #and connector will be removed after import dc = op.Connector(wks) #Specify the rows to import ss = dc.settings() partial = ss['partial'] partial[op.attrib_key('Use')]='1' partial['row']='10:20' #import from rwo 10 to row 20 dc.imp(f)
Import Excel Sheet
''' This example shows how to Customize the Excel data connector. 1. Parse headerlines to column labels. 2. Import partial cols and rows. 3. Specify the spreadsheet to import. ''' import originpro as op f = op.path('e')+r'Samples\Import and Export\Partial Import.xlsx' wks = op.new_sheet() #Create data connector object dc = op.Connector(wks, dctype='Excel', keep_DC=True) ss = dc.settings() #Headerlines to column label labels = ss['labels'] labels[op.attrib_key('Use')] = '1' labels['longname'] = 1 labels['unit'] = 2 #Setting for partial import partial = ss['partial'] partial[op.attrib_key('Use')] = '1' partial['col'] = '1:3' partial['row'] = '1:99' #Import the second spreadsheet 'expt2' dc.imp(f, sel='expt2')
Import HTML Table from the Web
''' Despite the name, from_file can actually import from the web This example shows using the HTML connector to import a specific table from a wikipedia web page ''' import originpro as op fn = 'https://en.wikipedia.org/wiki/World_population' wks=op.new_sheet() wks.from_file(fn, True, 'HTML', 'Tables/_6')
Import MATLAB
''' This sample shows how to import MATLAB file. You can try different selection by using GUI ''' import originpro as op fn = op.path('e')+r'Samples\Import and Export\GaussianData.mat' wks=op.new_sheet() wks.from_file(fn, True, 'MATLAB', 'MATLAB/Data')
Import Multiple Files using Pandas into One Worksheet
""" This shows import text files using DataFrame instead of Origin's internal import """ import originpro as op import pandas as pd import os fd = op.path('e') + r'Samples\Batch2' wks = op.new_sheet() col = 0 for file in os.listdir(fd): df = pd.read_table(os.path.join(fd, file), header=[0,1,2]) wks.from_df(df, col) wks.cols_axis('xy', col) wks.set_labels(df.columns.get_level_values(0), 'L', col) wks.set_labels(df.columns.get_level_values(1), 'U', col) #the sample data file has no comment for X column, so we need to remove the 'Unnamed: 0_level_2' comments = df.columns.get_level_values(2).tolist() comments[0] = "" wks.set_labels(comments, 'C', col) col = wks.cols
Import Two Excel Sheets
''' This example shows how to import a second sheet from an Exel file with multiple sheets. ''' import originpro as op f = op.path('e')+r'Samples\Import and Export\United States Energy (1980-2013).xls' wks = op.new_sheet() #Create data connector object dc = op.Connector(wks, dctype='Excel', keep_DC=False) ss = dc.settings() #1st two rows are header, followed by column name and units ss['mainheader']=2; labels = ss['labels'] #labels branch use an attrtibute to turn on, so from a dict, #special node is needed to set internal tree attribute labels[op.attrib_key('Use')] = '1' labels['longname'] = 1 labels['unit'] = 2 #Import the first sheet, which is "oil" dc.imp(f, sel='Oil') #Import a new Excel sheet as a new Origin sheet dc.new_sheet('Natural Gas')
Import Using a Filter
''' This example shows how to use a filter created from Import Wizard ''' import originpro as op fn = op.path('e')+r'Samples\Import and Export\S15-125-03.dat' wks=op.new_sheet() #use a filter in the same folder as the data file wks.from_file(fn, False, 'Import Filter','.\VarsFromFileNameAndHeader.oif')
Import by Setting File Header Info
''' This example shows how to control file header. Typically the CSV connector can automatically detect main header and column header info, but sometimes the automatic detection does not work for certain file so you need to specify it yourself. ''' import originpro as op wks = op.new_sheet() dc = op.Connector(wks) dc.source = op.path('e')+r'Samples\Import and Export\F1.dat' ss = dc.settings() ss['mainheader']=5 ss['heading']=1 ss['unit']=2 #must not pass in filepath if you are controlling file header #if file is passed in, the auto detection code will kick in to wipe out the settings above dc.imp()
Manipulate Columns and Make Group Plot
''' This sample shows how to prepare worksheet for plotting. ''' import originpro as op #Import Data File f = op.path('e')+r'Samples\Signal Processing\Sunspot.dat' wks = op.new_sheet() wks.from_file(f, keep_DC=False) #Set worksheet property wks.cols=5 wks.move_cols(1,2,1) #Insert a date column wks.cols_axis('xyy',2,4) #Set column designation #Set "Date" column property ncol=2 wks.set_formula(ncol,'date(B$ + "/1/" + A$)') wks.set_label(ncol, 'Date') wks.as_date(ncol,"yyyy/MM") #Set "Averaged Sunspot Number" column property ncol=4 wks.set_formula(ncol,'movavg(D,20,20)') wks.set_label(ncol, 'Averaged Sunspot Number') #Plot "Sunspot Number" vs "Date" gp = op.new_graph() gp[0].add_plot(f'{wks.lt_range()}!(3,4:5)') # Plot using data range gp[0].group() gp[0].rescale()
Split Dataset to Training and Testing Data
''' This example splits a dataset with multiple columns to two datasets named Train and Test, using the package sklearn. To check for and install if needed, open the Script Window (Shift+Alt+3), type the following and press Enter. pip -chk pandas sklearn ''' import numpy as np import pandas as pd import originpro as op from sklearn.model_selection import train_test_split # Import data and get the independent and dependent data to X and y respectively ws=op.new_sheet() ws.from_file(fname=op.path('e')+r"Samples\Statistics\Fisher's Iris Data.dat", keep_DC=False) # Get first 4 columns as X X = ws.to_df(c1=0, numcols=4) # Get the last column as y y = ws.to_df(c1=4, numcols=1) # Split the dataset into train and test datasets # train dataset contains 70% samples, and test dataset contains 30% samples # shuffle the data before splitting X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=1) # Create worksheet for the splitted datasets wks = op.new_sheet('w', 'Split') Num = X_train.columns.shape[0] wks.from_list(Num, y_train.iloc[:, 0].tolist(), comments='Train', lname=y_train.columns[0]) wks.from_list(Num*2+1, y_test.iloc[:, 0].tolist(), comments='Test', lname=y_train.columns[0]) for idx in range(Num): wks.from_list(idx, X_train.iloc[:, idx].tolist(), comments='Train', lname=X_train.columns[idx]) wks.from_list(Num+1+idx, X_test.iloc[:, idx].tolist(), comments='Test', lname=X_train.columns[idx])